genericopenlibs/liboil/src/multsum_sse.c
changeset 18 47c74d1534e1
equal deleted inserted replaced
0:e4d67989cc36 18:47c74d1534e1
       
     1 /*
       
     2 * Copyright (c) 2009 Nokia Corporation and/or its subsidiary(-ies).
       
     3 * All rights reserved.
       
     4 * This component and the accompanying materials are made available
       
     5 * under the terms of "Eclipse Public License v1.0"
       
     6 * which accompanies this distribution, and is available
       
     7 * at the URL "http://www.eclipse.org/legal/epl-v10.html".
       
     8 *
       
     9 * Initial Contributors:
       
    10 * Nokia Corporation - initial contribution.
       
    11 *
       
    12 * Contributors:
       
    13 *
       
    14 * Description: 
       
    15 *
       
    16 */
       
    17 
       
    18 #ifdef HAVE_CONFIG_H
       
    19 #include "config.h"
       
    20 #endif
       
    21 #include <liboil/liboilclasses.h>
       
    22 #include <liboil/liboilfunction.h>
       
    23 #include <emmintrin.h>
       
    24 
       
    25 #define SSE_FUNCTION __attribute__((force_align_arg_pointer))
       
    26 
       
    27 #define MULTSUM_SSE2_NSTRIDED(i) { \
       
    28   t1 = _mm_load_pd(&OIL_GET(src1, i, double)); \
       
    29   t2 = _mm_load_pd(&OIL_GET(src2, i, double)); \
       
    30   t1 = _mm_mul_pd(t1,t2); \
       
    31   sum.reg = _mm_add_pd(sum.reg,t1); \
       
    32 }
       
    33 #define MULTSUM_SSE2_NSTRIDEDP(i) { \
       
    34   t1 = _mm_load_pd(&OIL_GET(src1, i*sstr1, double)); \
       
    35   t2 = _mm_loadl_pd(t2, &OIL_GET(src2, i*sstr2, double)); \
       
    36   t2 = _mm_loadh_pd(t2, &OIL_GET(src2, (i+1)*sstr2, double)); \
       
    37   t1 = _mm_mul_pd(t1,t2); \
       
    38   sum.reg = _mm_add_pd(sum.reg,t1); \
       
    39 }
       
    40 #define MULTSUM_SSE2_STRIDED(i) { \
       
    41   t1 = _mm_loadl_pd(t1, &OIL_GET(src1, i*sstr1, double)); \
       
    42   t1 = _mm_loadh_pd(t1, &OIL_GET(src1, (i+1)*sstr1, double)); \
       
    43   t2 = _mm_loadl_pd(t2, &OIL_GET(src2, i*sstr2, double)); \
       
    44   t2 = _mm_loadh_pd(t2, &OIL_GET(src2, (i+1)*sstr2, double)); \
       
    45   t1 = _mm_mul_pd(t1,t2); \
       
    46   sum.reg = _mm_add_pd(sum.reg,t1); \
       
    47 }
       
    48 
       
    49 
       
    50 #ifdef ENABLE_BROKEN_IMPLS
       
    51 SSE_FUNCTION static void
       
    52 multsum_f64_sse2_unroll4(double *dest,
       
    53      const double *src1, int sstr1,
       
    54      const double *src2, int sstr2,
       
    55      int n)
       
    56 {
       
    57   __m128d t1, t2;
       
    58   union {
       
    59     __m128d reg;
       
    60     double vals[2];
       
    61   } sum;
       
    62   int i = 0;
       
    63 
       
    64   sum.reg = _mm_setzero_pd();
       
    65   while (i < n-3) {
       
    66     MULTSUM_SSE2_STRIDED(0);
       
    67     MULTSUM_SSE2_STRIDED(2);
       
    68 
       
    69     OIL_INCREMENT(src1, 4*sstr1);
       
    70     OIL_INCREMENT(src2, 4*sstr2);
       
    71     i += 4;
       
    72   }
       
    73   while (i < n-1) {
       
    74     MULTSUM_SSE2_STRIDED(0);
       
    75 
       
    76     OIL_INCREMENT(src1, 2*sstr1);
       
    77     OIL_INCREMENT(src2, 2*sstr2);
       
    78     i+=2;
       
    79   }
       
    80   *dest = sum.vals[0] + sum.vals[1];
       
    81   if (i < n) {
       
    82     *dest += (OIL_GET(src1,0,double)*OIL_GET(src2,0,double));
       
    83   }
       
    84 }
       
    85 OIL_DEFINE_IMPL_FULL (multsum_f64_sse2_unroll4, multsum_f64, OIL_IMPL_FLAG_SSE2);
       
    86 #endif
       
    87 
       
    88 
       
    89 
       
    90 #ifdef	__SYMBIAN32__
       
    91  
       
    92 OilFunctionImpl* __oil_function_impl_multsum_f64_sse2_unroll4, multsum_f64() {
       
    93 		return &_oil_function_impl_multsum_f64_sse2_unroll4, multsum_f64;
       
    94 }
       
    95 #endif
       
    96