|
1 /* |
|
2 * Copyright (c) 2009 Nokia Corporation and/or its subsidiary(-ies). |
|
3 * All rights reserved. |
|
4 * This component and the accompanying materials are made available |
|
5 * under the terms of "Eclipse Public License v1.0" |
|
6 * which accompanies this distribution, and is available |
|
7 * at the URL "http://www.eclipse.org/legal/epl-v10.html". |
|
8 * |
|
9 * Initial Contributors: |
|
10 * Nokia Corporation - initial contribution. |
|
11 * |
|
12 * Contributors: |
|
13 * |
|
14 * Description: |
|
15 * |
|
16 */ |
|
17 |
|
18 #ifdef HAVE_CONFIG_H |
|
19 #include "config.h" |
|
20 #endif |
|
21 #include <liboil/liboilclasses.h> |
|
22 #include <liboil/liboilfunction.h> |
|
23 #include <emmintrin.h> |
|
24 |
|
25 #define SSE_FUNCTION __attribute__((force_align_arg_pointer)) |
|
26 |
|
27 #define MULTSUM_SSE2_NSTRIDED(i) { \ |
|
28 t1 = _mm_load_pd(&OIL_GET(src1, i, double)); \ |
|
29 t2 = _mm_load_pd(&OIL_GET(src2, i, double)); \ |
|
30 t1 = _mm_mul_pd(t1,t2); \ |
|
31 sum.reg = _mm_add_pd(sum.reg,t1); \ |
|
32 } |
|
33 #define MULTSUM_SSE2_NSTRIDEDP(i) { \ |
|
34 t1 = _mm_load_pd(&OIL_GET(src1, i*sstr1, double)); \ |
|
35 t2 = _mm_loadl_pd(t2, &OIL_GET(src2, i*sstr2, double)); \ |
|
36 t2 = _mm_loadh_pd(t2, &OIL_GET(src2, (i+1)*sstr2, double)); \ |
|
37 t1 = _mm_mul_pd(t1,t2); \ |
|
38 sum.reg = _mm_add_pd(sum.reg,t1); \ |
|
39 } |
|
40 #define MULTSUM_SSE2_STRIDED(i) { \ |
|
41 t1 = _mm_loadl_pd(t1, &OIL_GET(src1, i*sstr1, double)); \ |
|
42 t1 = _mm_loadh_pd(t1, &OIL_GET(src1, (i+1)*sstr1, double)); \ |
|
43 t2 = _mm_loadl_pd(t2, &OIL_GET(src2, i*sstr2, double)); \ |
|
44 t2 = _mm_loadh_pd(t2, &OIL_GET(src2, (i+1)*sstr2, double)); \ |
|
45 t1 = _mm_mul_pd(t1,t2); \ |
|
46 sum.reg = _mm_add_pd(sum.reg,t1); \ |
|
47 } |
|
48 |
|
49 |
|
50 #ifdef ENABLE_BROKEN_IMPLS |
|
51 SSE_FUNCTION static void |
|
52 multsum_f64_sse2_unroll4(double *dest, |
|
53 const double *src1, int sstr1, |
|
54 const double *src2, int sstr2, |
|
55 int n) |
|
56 { |
|
57 __m128d t1, t2; |
|
58 union { |
|
59 __m128d reg; |
|
60 double vals[2]; |
|
61 } sum; |
|
62 int i = 0; |
|
63 |
|
64 sum.reg = _mm_setzero_pd(); |
|
65 while (i < n-3) { |
|
66 MULTSUM_SSE2_STRIDED(0); |
|
67 MULTSUM_SSE2_STRIDED(2); |
|
68 |
|
69 OIL_INCREMENT(src1, 4*sstr1); |
|
70 OIL_INCREMENT(src2, 4*sstr2); |
|
71 i += 4; |
|
72 } |
|
73 while (i < n-1) { |
|
74 MULTSUM_SSE2_STRIDED(0); |
|
75 |
|
76 OIL_INCREMENT(src1, 2*sstr1); |
|
77 OIL_INCREMENT(src2, 2*sstr2); |
|
78 i+=2; |
|
79 } |
|
80 *dest = sum.vals[0] + sum.vals[1]; |
|
81 if (i < n) { |
|
82 *dest += (OIL_GET(src1,0,double)*OIL_GET(src2,0,double)); |
|
83 } |
|
84 } |
|
85 OIL_DEFINE_IMPL_FULL (multsum_f64_sse2_unroll4, multsum_f64, OIL_IMPL_FLAG_SSE2); |
|
86 #endif |
|
87 |
|
88 |
|
89 |
|
90 #ifdef __SYMBIAN32__ |
|
91 |
|
92 OilFunctionImpl* __oil_function_impl_multsum_f64_sse2_unroll4, multsum_f64() { |
|
93 return &_oil_function_impl_multsum_f64_sse2_unroll4, multsum_f64; |
|
94 } |
|
95 #endif |
|
96 |