genericopenlibs/liboil/src/multsum_sse.c
author Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
Tue, 31 Aug 2010 16:54:36 +0300
branchRCL_3
changeset 56 acd3cd4aaceb
permissions -rw-r--r--
Revision: 201021 Kit: 201035
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
56
acd3cd4aaceb Revision: 201021
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
     1
/*
acd3cd4aaceb Revision: 201021
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
     2
* Copyright (c) 2009 Nokia Corporation and/or its subsidiary(-ies).
acd3cd4aaceb Revision: 201021
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
     3
* All rights reserved.
acd3cd4aaceb Revision: 201021
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
     4
* This component and the accompanying materials are made available
acd3cd4aaceb Revision: 201021
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
     5
* under the terms of "Eclipse Public License v1.0"
acd3cd4aaceb Revision: 201021
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
     6
* which accompanies this distribution, and is available
acd3cd4aaceb Revision: 201021
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
     7
* at the URL "http://www.eclipse.org/legal/epl-v10.html".
acd3cd4aaceb Revision: 201021
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
     8
*
acd3cd4aaceb Revision: 201021
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
     9
* Initial Contributors:
acd3cd4aaceb Revision: 201021
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    10
* Nokia Corporation - initial contribution.
acd3cd4aaceb Revision: 201021
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    11
*
acd3cd4aaceb Revision: 201021
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    12
* Contributors:
acd3cd4aaceb Revision: 201021
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    13
*
acd3cd4aaceb Revision: 201021
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    14
* Description: 
acd3cd4aaceb Revision: 201021
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    15
*
acd3cd4aaceb Revision: 201021
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    16
*/
acd3cd4aaceb Revision: 201021
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    17
acd3cd4aaceb Revision: 201021
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    18
#ifdef HAVE_CONFIG_H
acd3cd4aaceb Revision: 201021
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    19
#include "config.h"
acd3cd4aaceb Revision: 201021
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    20
#endif
acd3cd4aaceb Revision: 201021
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    21
#include <liboil/liboilclasses.h>
acd3cd4aaceb Revision: 201021
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    22
#include <liboil/liboilfunction.h>
acd3cd4aaceb Revision: 201021
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    23
#include <emmintrin.h>
acd3cd4aaceb Revision: 201021
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    24
acd3cd4aaceb Revision: 201021
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    25
#define SSE_FUNCTION __attribute__((force_align_arg_pointer))
acd3cd4aaceb Revision: 201021
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    26
acd3cd4aaceb Revision: 201021
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    27
#define MULTSUM_SSE2_NSTRIDED(i) { \
acd3cd4aaceb Revision: 201021
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    28
  t1 = _mm_load_pd(&OIL_GET(src1, i, double)); \
acd3cd4aaceb Revision: 201021
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    29
  t2 = _mm_load_pd(&OIL_GET(src2, i, double)); \
acd3cd4aaceb Revision: 201021
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    30
  t1 = _mm_mul_pd(t1,t2); \
acd3cd4aaceb Revision: 201021
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    31
  sum.reg = _mm_add_pd(sum.reg,t1); \
acd3cd4aaceb Revision: 201021
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    32
}
acd3cd4aaceb Revision: 201021
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    33
#define MULTSUM_SSE2_NSTRIDEDP(i) { \
acd3cd4aaceb Revision: 201021
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    34
  t1 = _mm_load_pd(&OIL_GET(src1, i*sstr1, double)); \
acd3cd4aaceb Revision: 201021
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    35
  t2 = _mm_loadl_pd(t2, &OIL_GET(src2, i*sstr2, double)); \
acd3cd4aaceb Revision: 201021
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    36
  t2 = _mm_loadh_pd(t2, &OIL_GET(src2, (i+1)*sstr2, double)); \
acd3cd4aaceb Revision: 201021
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    37
  t1 = _mm_mul_pd(t1,t2); \
acd3cd4aaceb Revision: 201021
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    38
  sum.reg = _mm_add_pd(sum.reg,t1); \
acd3cd4aaceb Revision: 201021
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    39
}
acd3cd4aaceb Revision: 201021
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    40
#define MULTSUM_SSE2_STRIDED(i) { \
acd3cd4aaceb Revision: 201021
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    41
  t1 = _mm_loadl_pd(t1, &OIL_GET(src1, i*sstr1, double)); \
acd3cd4aaceb Revision: 201021
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    42
  t1 = _mm_loadh_pd(t1, &OIL_GET(src1, (i+1)*sstr1, double)); \
acd3cd4aaceb Revision: 201021
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    43
  t2 = _mm_loadl_pd(t2, &OIL_GET(src2, i*sstr2, double)); \
acd3cd4aaceb Revision: 201021
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    44
  t2 = _mm_loadh_pd(t2, &OIL_GET(src2, (i+1)*sstr2, double)); \
acd3cd4aaceb Revision: 201021
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    45
  t1 = _mm_mul_pd(t1,t2); \
acd3cd4aaceb Revision: 201021
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    46
  sum.reg = _mm_add_pd(sum.reg,t1); \
acd3cd4aaceb Revision: 201021
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    47
}
acd3cd4aaceb Revision: 201021
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    48
acd3cd4aaceb Revision: 201021
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    49
acd3cd4aaceb Revision: 201021
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    50
#ifdef ENABLE_BROKEN_IMPLS
acd3cd4aaceb Revision: 201021
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    51
SSE_FUNCTION static void
acd3cd4aaceb Revision: 201021
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    52
multsum_f64_sse2_unroll4(double *dest,
acd3cd4aaceb Revision: 201021
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    53
     const double *src1, int sstr1,
acd3cd4aaceb Revision: 201021
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    54
     const double *src2, int sstr2,
acd3cd4aaceb Revision: 201021
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    55
     int n)
acd3cd4aaceb Revision: 201021
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    56
{
acd3cd4aaceb Revision: 201021
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    57
  __m128d t1, t2;
acd3cd4aaceb Revision: 201021
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    58
  union {
acd3cd4aaceb Revision: 201021
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    59
    __m128d reg;
acd3cd4aaceb Revision: 201021
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    60
    double vals[2];
acd3cd4aaceb Revision: 201021
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    61
  } sum;
acd3cd4aaceb Revision: 201021
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    62
  int i = 0;
acd3cd4aaceb Revision: 201021
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    63
acd3cd4aaceb Revision: 201021
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    64
  sum.reg = _mm_setzero_pd();
acd3cd4aaceb Revision: 201021
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    65
  while (i < n-3) {
acd3cd4aaceb Revision: 201021
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    66
    MULTSUM_SSE2_STRIDED(0);
acd3cd4aaceb Revision: 201021
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    67
    MULTSUM_SSE2_STRIDED(2);
acd3cd4aaceb Revision: 201021
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    68
acd3cd4aaceb Revision: 201021
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    69
    OIL_INCREMENT(src1, 4*sstr1);
acd3cd4aaceb Revision: 201021
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    70
    OIL_INCREMENT(src2, 4*sstr2);
acd3cd4aaceb Revision: 201021
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    71
    i += 4;
acd3cd4aaceb Revision: 201021
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    72
  }
acd3cd4aaceb Revision: 201021
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    73
  while (i < n-1) {
acd3cd4aaceb Revision: 201021
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    74
    MULTSUM_SSE2_STRIDED(0);
acd3cd4aaceb Revision: 201021
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    75
acd3cd4aaceb Revision: 201021
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    76
    OIL_INCREMENT(src1, 2*sstr1);
acd3cd4aaceb Revision: 201021
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    77
    OIL_INCREMENT(src2, 2*sstr2);
acd3cd4aaceb Revision: 201021
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    78
    i+=2;
acd3cd4aaceb Revision: 201021
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    79
  }
acd3cd4aaceb Revision: 201021
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    80
  *dest = sum.vals[0] + sum.vals[1];
acd3cd4aaceb Revision: 201021
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    81
  if (i < n) {
acd3cd4aaceb Revision: 201021
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    82
    *dest += (OIL_GET(src1,0,double)*OIL_GET(src2,0,double));
acd3cd4aaceb Revision: 201021
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    83
  }
acd3cd4aaceb Revision: 201021
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    84
}
acd3cd4aaceb Revision: 201021
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    85
OIL_DEFINE_IMPL_FULL (multsum_f64_sse2_unroll4, multsum_f64, OIL_IMPL_FLAG_SSE2);
acd3cd4aaceb Revision: 201021
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    86
#endif
acd3cd4aaceb Revision: 201021
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    87
acd3cd4aaceb Revision: 201021
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    88
acd3cd4aaceb Revision: 201021
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    89
acd3cd4aaceb Revision: 201021
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    90
#ifdef	__SYMBIAN32__
acd3cd4aaceb Revision: 201021
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    91
 
acd3cd4aaceb Revision: 201021
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    92
OilFunctionImpl* __oil_function_impl_multsum_f64_sse2_unroll4, multsum_f64() {
acd3cd4aaceb Revision: 201021
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    93
		return &_oil_function_impl_multsum_f64_sse2_unroll4, multsum_f64;
acd3cd4aaceb Revision: 201021
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    94
}
acd3cd4aaceb Revision: 201021
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    95
#endif
acd3cd4aaceb Revision: 201021
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    96