|
1 /* |
|
2 * LIBOIL - Library of Optimized Inner Loops |
|
3 * Copyright (c) 2003,2004 David A. Schleef <ds@schleef.org> |
|
4 * All rights reserved. |
|
5 * |
|
6 * Redistribution and use in source and binary forms, with or without |
|
7 * modification, are permitted provided that the following conditions |
|
8 * are met: |
|
9 * 1. Redistributions of source code must retain the above copyright |
|
10 * notice, this list of conditions and the following disclaimer. |
|
11 * 2. Redistributions in binary form must reproduce the above copyright |
|
12 * notice, this list of conditions and the following disclaimer in the |
|
13 * documentation and/or other materials provided with the distribution. |
|
14 * |
|
15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR |
|
16 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED |
|
17 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
|
18 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, |
|
19 * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES |
|
20 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR |
|
21 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) |
|
22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, |
|
23 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING |
|
24 * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE |
|
25 * POSSIBILITY OF SUCH DAMAGE. |
|
26 */ |
|
27 //Portions Copyright (c) 2008-2009 Nokia Corporation and/or its subsidiary(-ies). All rights reserved. |
|
28 |
|
29 #ifdef HAVE_CONFIG_H |
|
30 #include "config.h" |
|
31 #endif |
|
32 |
|
33 #include <liboil/liboilfunction.h> |
|
34 #include "liboil/simdpack/simdpack.h" |
|
35 |
|
36 #ifdef __SYMBIAN32__ |
|
37 #ifdef __WINSCW__ |
|
38 #pragma warn_emptydecl off |
|
39 #endif//__WINSCW__ |
|
40 #endif//__SYMBIAN32__ |
|
41 |
|
42 #define SCALARMULT_DEFINE_UNROLL2(type) \ |
|
43 static void scalarmult_ ## type ## _unroll2( \ |
|
44 oil_type_ ## type *dest, int dstr, \ |
|
45 oil_type_ ## type *src, int sstr, \ |
|
46 oil_type_ ## type *val, int n) \ |
|
47 { \ |
|
48 if(n&1) { \ |
|
49 *dest = *src * *val; \ |
|
50 OIL_INCREMENT(dest, dstr); \ |
|
51 OIL_INCREMENT(src, sstr); \ |
|
52 } \ |
|
53 n /= 2; \ |
|
54 while(n>0){ \ |
|
55 *dest = *src * *val; \ |
|
56 OIL_INCREMENT(dest,dstr); \ |
|
57 OIL_INCREMENT(src,sstr); \ |
|
58 *dest = *src * *val; \ |
|
59 OIL_INCREMENT(dest,dstr); \ |
|
60 OIL_INCREMENT(src,sstr); \ |
|
61 n--; \ |
|
62 } \ |
|
63 } \ |
|
64 OIL_DEFINE_IMPL (scalarmult_ ## type ## _unroll2, scalarmult_ ## type); |
|
65 |
|
66 SCALARMULT_DEFINE_UNROLL2 (s8); |
|
67 SCALARMULT_DEFINE_UNROLL2 (u8); |
|
68 SCALARMULT_DEFINE_UNROLL2 (s16); |
|
69 SCALARMULT_DEFINE_UNROLL2 (u16); |
|
70 SCALARMULT_DEFINE_UNROLL2 (s32); |
|
71 SCALARMULT_DEFINE_UNROLL2 (u32); |
|
72 SCALARMULT_DEFINE_UNROLL2 (f32); |
|
73 SCALARMULT_DEFINE_UNROLL2 (f64); |
|
74 |
|
75 #define SCALARMULT_DEFINE_UNROLL2x(type) \ |
|
76 static void scalarmult_ ## type ## _unroll2x( \ |
|
77 oil_type_ ## type *dest, int dstr, \ |
|
78 oil_type_ ## type *src, int sstr, \ |
|
79 oil_type_ ## type *val, int n) \ |
|
80 { \ |
|
81 oil_type_ ## type *dest2; \ |
|
82 oil_type_ ## type *src2; \ |
|
83 int i; \ |
|
84 if(n&1) { \ |
|
85 *dest = *src * *val; \ |
|
86 OIL_INCREMENT(dest, dstr); \ |
|
87 OIL_INCREMENT(src, sstr); \ |
|
88 } \ |
|
89 src2 = OIL_OFFSET(src, sstr); \ |
|
90 dest2 = OIL_OFFSET(dest, dstr); \ |
|
91 n /= 2; \ |
|
92 sstr *= 2; \ |
|
93 dstr *= 2; \ |
|
94 for(i=0;i<n;i++){ \ |
|
95 OIL_GET(dest,dstr*i,oil_type_ ## type) = OIL_GET(src,sstr*i,oil_type_ ## type) * *val; \ |
|
96 OIL_GET(dest2,dstr*i,oil_type_ ## type) = OIL_GET(src2,sstr*i,oil_type_ ## type) * *val; \ |
|
97 } \ |
|
98 } \ |
|
99 OIL_DEFINE_IMPL (scalarmult_ ## type ## _unroll2x, scalarmult_ ## type); |
|
100 |
|
101 SCALARMULT_DEFINE_UNROLL2x (s8); |
|
102 SCALARMULT_DEFINE_UNROLL2x (u8); |
|
103 SCALARMULT_DEFINE_UNROLL2x (s16); |
|
104 SCALARMULT_DEFINE_UNROLL2x (u16); |
|
105 SCALARMULT_DEFINE_UNROLL2x (s32); |
|
106 SCALARMULT_DEFINE_UNROLL2x (u32); |
|
107 SCALARMULT_DEFINE_UNROLL2x (f32); |
|
108 SCALARMULT_DEFINE_UNROLL2x (f64); |
|
109 |
|
110 #define SCALARMULT_DEFINE_UNROLL4(type) \ |
|
111 static void scalarmult_ ## type ## _unroll4( \ |
|
112 oil_type_ ## type *dest, int dstr, \ |
|
113 oil_type_ ## type *src, int sstr, \ |
|
114 oil_type_ ## type *val, int n) \ |
|
115 { \ |
|
116 if(n&1) { \ |
|
117 *dest = *src * *val; \ |
|
118 OIL_INCREMENT(dest, dstr); \ |
|
119 OIL_INCREMENT(src, sstr); \ |
|
120 } \ |
|
121 if(n&2) { \ |
|
122 *dest = *src * *val; \ |
|
123 OIL_INCREMENT(dest, dstr); \ |
|
124 OIL_INCREMENT(src, sstr); \ |
|
125 *dest = *src * *val; \ |
|
126 OIL_INCREMENT(dest, dstr); \ |
|
127 OIL_INCREMENT(src, sstr); \ |
|
128 } \ |
|
129 n /= 4; \ |
|
130 while(n>0){ \ |
|
131 *dest = *src * *val; \ |
|
132 OIL_INCREMENT(dest,dstr); \ |
|
133 OIL_INCREMENT(src,sstr); \ |
|
134 *dest = *src * *val; \ |
|
135 OIL_INCREMENT(dest,dstr); \ |
|
136 OIL_INCREMENT(src,sstr); \ |
|
137 *dest = *src * *val; \ |
|
138 OIL_INCREMENT(dest,dstr); \ |
|
139 OIL_INCREMENT(src,sstr); \ |
|
140 *dest = *src * *val; \ |
|
141 OIL_INCREMENT(dest,dstr); \ |
|
142 OIL_INCREMENT(src,sstr); \ |
|
143 n--; \ |
|
144 } \ |
|
145 } \ |
|
146 OIL_DEFINE_IMPL (scalarmult_ ## type ## _unroll4, scalarmult_ ## type); |
|
147 |
|
148 SCALARMULT_DEFINE_UNROLL4 (s8); |
|
149 SCALARMULT_DEFINE_UNROLL4 (u8); |
|
150 SCALARMULT_DEFINE_UNROLL4 (s16); |
|
151 SCALARMULT_DEFINE_UNROLL4 (u16); |
|
152 SCALARMULT_DEFINE_UNROLL4 (s32); |
|
153 SCALARMULT_DEFINE_UNROLL4 (u32); |
|
154 SCALARMULT_DEFINE_UNROLL4 (f32); |
|
155 SCALARMULT_DEFINE_UNROLL4 (f64); |
|
156 |
|
157 |
|
158 |
|
159 |
|
160 |
|
161 #define SCALARMULT_DEFINE_X(type) \ |
|
162 static void scalarmult_ ## type ## _x( \ |
|
163 oil_type_ ## type *dest, int dstr, \ |
|
164 oil_type_ ## type *src, int sstr, \ |
|
165 oil_type_ ## type *val, int n) \ |
|
166 { \ |
|
167 int i; \ |
|
168 for(i=0;i+1<n;i+=2){ \ |
|
169 OIL_GET(dest, i*dstr,oil_type_ ## type) = \ |
|
170 OIL_GET(src, i*sstr,oil_type_ ## type) * *val; \ |
|
171 OIL_GET(dest,(i+1)*dstr,oil_type_ ## type) = \ |
|
172 OIL_GET(src,(i+1)*sstr,oil_type_ ## type) * *val; \ |
|
173 } \ |
|
174 if (n&1) { \ |
|
175 OIL_GET(dest,i*dstr,oil_type_ ## type) = \ |
|
176 OIL_GET(src,i*sstr,oil_type_ ## type) * *val; \ |
|
177 } \ |
|
178 } \ |
|
179 OIL_DEFINE_IMPL (scalarmult_ ## type ## _x, scalarmult_ ## type); |
|
180 |
|
181 |
|
182 SCALARMULT_DEFINE_X (s8); |
|
183 SCALARMULT_DEFINE_X (u8); |
|
184 SCALARMULT_DEFINE_X (s16); |
|
185 SCALARMULT_DEFINE_X (u16); |
|
186 SCALARMULT_DEFINE_X (s32); |
|
187 SCALARMULT_DEFINE_X (u32); |
|
188 SCALARMULT_DEFINE_X (f32); |
|
189 SCALARMULT_DEFINE_X (f64); |
|
190 |
|
191 #ifdef __SYMBIAN32__ |
|
192 |
|
193 OilFunctionImpl* __oil_function_impl_scalarmult_s8_unroll2() { |
|
194 return &_oil_function_impl_scalarmult_s8_unroll2; |
|
195 } |
|
196 #endif |
|
197 |
|
198 #ifdef __SYMBIAN32__ |
|
199 |
|
200 OilFunctionImpl* __oil_function_impl_scalarmult_u8_unroll2() { |
|
201 return &_oil_function_impl_scalarmult_u8_unroll2; |
|
202 } |
|
203 #endif |
|
204 |
|
205 #ifdef __SYMBIAN32__ |
|
206 |
|
207 OilFunctionImpl* __oil_function_impl_scalarmult_s16_unroll2() { |
|
208 return &_oil_function_impl_scalarmult_s16_unroll2; |
|
209 } |
|
210 #endif |
|
211 |
|
212 #ifdef __SYMBIAN32__ |
|
213 |
|
214 OilFunctionImpl* __oil_function_impl_scalarmult_u16_unroll2() { |
|
215 return &_oil_function_impl_scalarmult_u16_unroll2; |
|
216 } |
|
217 #endif |
|
218 |
|
219 #ifdef __SYMBIAN32__ |
|
220 |
|
221 OilFunctionImpl* __oil_function_impl_scalarmult_s32_unroll2() { |
|
222 return &_oil_function_impl_scalarmult_s32_unroll2; |
|
223 } |
|
224 #endif |
|
225 |
|
226 #ifdef __SYMBIAN32__ |
|
227 |
|
228 OilFunctionImpl* __oil_function_impl_scalarmult_u32_unroll2() { |
|
229 return &_oil_function_impl_scalarmult_u32_unroll2; |
|
230 } |
|
231 #endif |
|
232 |
|
233 #ifdef __SYMBIAN32__ |
|
234 |
|
235 OilFunctionImpl* __oil_function_impl_scalarmult_f32_unroll2() { |
|
236 return &_oil_function_impl_scalarmult_f32_unroll2; |
|
237 } |
|
238 #endif |
|
239 |
|
240 #ifdef __SYMBIAN32__ |
|
241 |
|
242 OilFunctionImpl* __oil_function_impl_scalarmult_f64_unroll2() { |
|
243 return &_oil_function_impl_scalarmult_f64_unroll2; |
|
244 } |
|
245 #endif |
|
246 |
|
247 #ifdef __SYMBIAN32__ |
|
248 |
|
249 OilFunctionImpl* __oil_function_impl_scalarmult_s8_unroll4() { |
|
250 return &_oil_function_impl_scalarmult_s8_unroll4; |
|
251 } |
|
252 #endif |
|
253 |
|
254 #ifdef __SYMBIAN32__ |
|
255 |
|
256 OilFunctionImpl* __oil_function_impl_scalarmult_u8_unroll4() { |
|
257 return &_oil_function_impl_scalarmult_u8_unroll4; |
|
258 } |
|
259 #endif |
|
260 |
|
261 #ifdef __SYMBIAN32__ |
|
262 |
|
263 OilFunctionImpl* __oil_function_impl_scalarmult_s16_unroll4() { |
|
264 return &_oil_function_impl_scalarmult_s16_unroll4; |
|
265 } |
|
266 #endif |
|
267 |
|
268 #ifdef __SYMBIAN32__ |
|
269 |
|
270 OilFunctionImpl* __oil_function_impl_scalarmult_u16_unroll4() { |
|
271 return &_oil_function_impl_scalarmult_u16_unroll4; |
|
272 } |
|
273 #endif |
|
274 |
|
275 #ifdef __SYMBIAN32__ |
|
276 |
|
277 OilFunctionImpl* __oil_function_impl_scalarmult_s32_unroll4() { |
|
278 return &_oil_function_impl_scalarmult_s32_unroll4; |
|
279 } |
|
280 #endif |
|
281 |
|
282 #ifdef __SYMBIAN32__ |
|
283 |
|
284 OilFunctionImpl* __oil_function_impl_scalarmult_u32_unroll4() { |
|
285 return &_oil_function_impl_scalarmult_u32_unroll4; |
|
286 } |
|
287 #endif |
|
288 |
|
289 #ifdef __SYMBIAN32__ |
|
290 |
|
291 OilFunctionImpl* __oil_function_impl_scalarmult_f32_unroll4() { |
|
292 return &_oil_function_impl_scalarmult_f32_unroll4; |
|
293 } |
|
294 #endif |
|
295 |
|
296 #ifdef __SYMBIAN32__ |
|
297 |
|
298 OilFunctionImpl* __oil_function_impl_scalarmult_f64_unroll4() { |
|
299 return &_oil_function_impl_scalarmult_f64_unroll4; |
|
300 } |
|
301 #endif |
|
302 |
|
303 #ifdef __SYMBIAN32__ |
|
304 |
|
305 OilFunctionImpl* __oil_function_impl_scalarmult_s8_unroll2x() { |
|
306 return &_oil_function_impl_scalarmult_s8_unroll2x; |
|
307 } |
|
308 #endif |
|
309 |
|
310 #ifdef __SYMBIAN32__ |
|
311 |
|
312 OilFunctionImpl* __oil_function_impl_scalarmult_u8_unroll2x() { |
|
313 return &_oil_function_impl_scalarmult_u8_unroll2x; |
|
314 } |
|
315 #endif |
|
316 |
|
317 #ifdef __SYMBIAN32__ |
|
318 |
|
319 OilFunctionImpl* __oil_function_impl_scalarmult_s16_unroll2x() { |
|
320 return &_oil_function_impl_scalarmult_s16_unroll2x; |
|
321 } |
|
322 #endif |
|
323 |
|
324 #ifdef __SYMBIAN32__ |
|
325 |
|
326 OilFunctionImpl* __oil_function_impl_scalarmult_u16_unroll2x() { |
|
327 return &_oil_function_impl_scalarmult_u16_unroll2x; |
|
328 } |
|
329 #endif |
|
330 |
|
331 #ifdef __SYMBIAN32__ |
|
332 |
|
333 OilFunctionImpl* __oil_function_impl_scalarmult_s32_unroll2x() { |
|
334 return &_oil_function_impl_scalarmult_s32_unroll2x; |
|
335 } |
|
336 #endif |
|
337 |
|
338 #ifdef __SYMBIAN32__ |
|
339 |
|
340 OilFunctionImpl* __oil_function_impl_scalarmult_u32_unroll2x() { |
|
341 return &_oil_function_impl_scalarmult_u32_unroll2x; |
|
342 } |
|
343 #endif |
|
344 |
|
345 #ifdef __SYMBIAN32__ |
|
346 |
|
347 OilFunctionImpl* __oil_function_impl_scalarmult_f32_unroll2x() { |
|
348 return &_oil_function_impl_scalarmult_f32_unroll2x; |
|
349 } |
|
350 #endif |
|
351 |
|
352 #ifdef __SYMBIAN32__ |
|
353 |
|
354 OilFunctionImpl* __oil_function_impl_scalarmult_f64_unroll2x() { |
|
355 return &_oil_function_impl_scalarmult_f64_unroll2x; |
|
356 } |
|
357 #endif |
|
358 |
|
359 #ifdef __SYMBIAN32__ |
|
360 |
|
361 OilFunctionImpl* __oil_function_impl_scalarmult_s8_x() { |
|
362 return &_oil_function_impl_scalarmult_s8_x; |
|
363 } |
|
364 #endif |
|
365 |
|
366 #ifdef __SYMBIAN32__ |
|
367 |
|
368 OilFunctionImpl* __oil_function_impl_scalarmult_u8_x() { |
|
369 return &_oil_function_impl_scalarmult_u8_x; |
|
370 } |
|
371 #endif |
|
372 |
|
373 #ifdef __SYMBIAN32__ |
|
374 |
|
375 OilFunctionImpl* __oil_function_impl_scalarmult_s16_x() { |
|
376 return &_oil_function_impl_scalarmult_s16_x; |
|
377 } |
|
378 #endif |
|
379 |
|
380 #ifdef __SYMBIAN32__ |
|
381 |
|
382 OilFunctionImpl* __oil_function_impl_scalarmult_u16_x() { |
|
383 return &_oil_function_impl_scalarmult_u16_x; |
|
384 } |
|
385 #endif |
|
386 |
|
387 #ifdef __SYMBIAN32__ |
|
388 |
|
389 OilFunctionImpl* __oil_function_impl_scalarmult_s32_x() { |
|
390 return &_oil_function_impl_scalarmult_s32_x; |
|
391 } |
|
392 #endif |
|
393 |
|
394 #ifdef __SYMBIAN32__ |
|
395 |
|
396 OilFunctionImpl* __oil_function_impl_scalarmult_u32_x() { |
|
397 return &_oil_function_impl_scalarmult_u32_x; |
|
398 } |
|
399 #endif |
|
400 |
|
401 #ifdef __SYMBIAN32__ |
|
402 |
|
403 OilFunctionImpl* __oil_function_impl_scalarmult_f32_x() { |
|
404 return &_oil_function_impl_scalarmult_f32_x; |
|
405 } |
|
406 #endif |
|
407 |
|
408 #ifdef __SYMBIAN32__ |
|
409 |
|
410 OilFunctionImpl* __oil_function_impl_scalarmult_f64_x() { |
|
411 return &_oil_function_impl_scalarmult_f64_x; |
|
412 } |
|
413 #endif |