|
1 /* This file is autogenerated. Do not edit. */ |
|
2 /* |
|
3 * LIBOIL - Library of Optimized Inner Loops |
|
4 * Copyright (c) 2005 David A. Schleef <ds.org> |
|
5 * All rights reserved. |
|
6 * |
|
7 * Redistribution and use in source and binary forms, with or without |
|
8 * modification, are permitted provided that the following conditions |
|
9 * are met: |
|
10 * 1. Redistributions of source code must retain the above copyright |
|
11 * notice, this list of conditions and the following disclaimer. |
|
12 * 2. Redistributions in binary form must reproduce the above copyright |
|
13 * notice, this list of conditions and the following disclaimer in the |
|
14 * documentation and/or other materials provided with the distribution. |
|
15 * |
|
16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR |
|
17 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED |
|
18 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
|
19 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, |
|
20 * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES |
|
21 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR |
|
22 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) |
|
23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, |
|
24 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING |
|
25 * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE |
|
26 * POSSIBILITY OF SUCH DAMAGE. |
|
27 */ |
|
28 //Portions Copyright (c) 2008-2009 Nokia Corporation and/or its subsidiary(-ies). All rights reserved. |
|
29 |
|
30 #ifdef HAVE_CONFIG_H |
|
31 #include "config.h" |
|
32 #endif |
|
33 |
|
34 #include <math.h> |
|
35 |
|
36 #include <liboil/liboil.h> |
|
37 #include <liboil/liboilclasses.h> |
|
38 |
|
39 static void |
|
40 add_f32_pointer (oil_type_f32 *dest, oil_type_f32 *src1, oil_type_f32 *src2, int n) |
|
41 { |
|
42 while (n) { |
|
43 *dest = *src1 + *src2; |
|
44 dest++; |
|
45 src1++; |
|
46 src2++; |
|
47 n--; |
|
48 } |
|
49 } |
|
50 OIL_DEFINE_IMPL (add_f32_pointer, add_f32); |
|
51 |
|
52 static void |
|
53 add_f32_unroll2 (oil_type_f32 *dest, oil_type_f32 *src1, oil_type_f32 *src2, int n) |
|
54 { |
|
55 int i; |
|
56 |
|
57 if (n & 1) { |
|
58 dest[0] = src1[0] + src2[0]; |
|
59 dest++; |
|
60 src1++; |
|
61 src2++; |
|
62 n--; |
|
63 } |
|
64 for(i=0;i<n;i+=2){ |
|
65 dest[i] = src1[i] + src2[i]; |
|
66 dest[i+1] = src1[i+1] + src2[i+1]; |
|
67 } |
|
68 } |
|
69 OIL_DEFINE_IMPL (add_f32_unroll2, add_f32); |
|
70 |
|
71 static void |
|
72 add_f32_unroll4a (oil_type_f32 *dest, oil_type_f32 *src1, oil_type_f32 *src2, int n) |
|
73 { |
|
74 int i; |
|
75 |
|
76 while (n & 3) { |
|
77 dest[0] = src1[0] + src2[0]; |
|
78 dest++; |
|
79 src1++; |
|
80 src2++; |
|
81 n--; |
|
82 } |
|
83 for(i=0;i<n;i+=4){ |
|
84 dest[i] = src1[i] + src2[i]; |
|
85 dest[i+1] = src1[i+1] + src2[i+1]; |
|
86 dest[i+2] = src1[i+2] + src2[i+2]; |
|
87 dest[i+3] = src1[i+3] + src2[i+3]; |
|
88 } |
|
89 } |
|
90 OIL_DEFINE_IMPL (add_f32_unroll4a, add_f32); |
|
91 |
|
92 static void |
|
93 add_f32_unroll4b (oil_type_f32 *dest, oil_type_f32 *src1, oil_type_f32 *src2, int n) |
|
94 { |
|
95 int i; |
|
96 |
|
97 for(i=0;i<(n&(~0x3));i+=4){ |
|
98 dest[i+0] = src1[i+0] + src2[i+0]; |
|
99 dest[i+1] = src1[i+1] + src2[i+1]; |
|
100 dest[i+2] = src1[i+2] + src2[i+2]; |
|
101 dest[i+3] = src1[i+3] + src2[i+3]; |
|
102 } |
|
103 for(;i<n;i++){ |
|
104 dest[i] = src1[i] + src2[i]; |
|
105 } |
|
106 } |
|
107 OIL_DEFINE_IMPL (add_f32_unroll4b, add_f32); |
|
108 |
|
109 static void |
|
110 add_f32_unroll4c (oil_type_f32 *dest, oil_type_f32 *src1, oil_type_f32 *src2, int n) |
|
111 { |
|
112 int i; |
|
113 |
|
114 for(i=0;i<(n&(~0x3));i+=4){ |
|
115 *dest++ = *src1++ + *src2++; |
|
116 *dest++ = *src1++ + *src2++; |
|
117 *dest++ = *src1++ + *src2++; |
|
118 *dest++ = *src1++ + *src2++; |
|
119 } |
|
120 for(;i<n;i++){ |
|
121 *dest++ = *src1++ + *src2++; |
|
122 } |
|
123 } |
|
124 OIL_DEFINE_IMPL (add_f32_unroll4c, add_f32); |
|
125 |
|
126 static void |
|
127 add_f64_pointer (oil_type_f64 *dest, oil_type_f64 *src1, oil_type_f64 *src2, int n) |
|
128 { |
|
129 while (n) { |
|
130 *dest = *src1 + *src2; |
|
131 dest++; |
|
132 src1++; |
|
133 src2++; |
|
134 n--; |
|
135 } |
|
136 } |
|
137 OIL_DEFINE_IMPL (add_f64_pointer, add_f64); |
|
138 |
|
139 static void |
|
140 add_f64_unroll2 (oil_type_f64 *dest, oil_type_f64 *src1, oil_type_f64 *src2, int n) |
|
141 { |
|
142 int i; |
|
143 |
|
144 if (n & 1) { |
|
145 dest[0] = src1[0] + src2[0]; |
|
146 dest++; |
|
147 src1++; |
|
148 src2++; |
|
149 n--; |
|
150 } |
|
151 for(i=0;i<n;i+=2){ |
|
152 dest[i] = src1[i] + src2[i]; |
|
153 dest[i+1] = src1[i+1] + src2[i+1]; |
|
154 } |
|
155 } |
|
156 OIL_DEFINE_IMPL (add_f64_unroll2, add_f64); |
|
157 |
|
158 static void |
|
159 add_f64_unroll4a (oil_type_f64 *dest, oil_type_f64 *src1, oil_type_f64 *src2, int n) |
|
160 { |
|
161 int i; |
|
162 |
|
163 while (n & 3) { |
|
164 dest[0] = src1[0] + src2[0]; |
|
165 dest++; |
|
166 src1++; |
|
167 src2++; |
|
168 n--; |
|
169 } |
|
170 for(i=0;i<n;i+=4){ |
|
171 dest[i] = src1[i] + src2[i]; |
|
172 dest[i+1] = src1[i+1] + src2[i+1]; |
|
173 dest[i+2] = src1[i+2] + src2[i+2]; |
|
174 dest[i+3] = src1[i+3] + src2[i+3]; |
|
175 } |
|
176 } |
|
177 OIL_DEFINE_IMPL (add_f64_unroll4a, add_f64); |
|
178 |
|
179 static void |
|
180 add_f64_unroll4b (oil_type_f64 *dest, oil_type_f64 *src1, oil_type_f64 *src2, int n) |
|
181 { |
|
182 int i; |
|
183 |
|
184 for(i=0;i<(n&(~0x3));i+=4){ |
|
185 dest[i+0] = src1[i+0] + src2[i+0]; |
|
186 dest[i+1] = src1[i+1] + src2[i+1]; |
|
187 dest[i+2] = src1[i+2] + src2[i+2]; |
|
188 dest[i+3] = src1[i+3] + src2[i+3]; |
|
189 } |
|
190 for(;i<n;i++){ |
|
191 dest[i] = src1[i] + src2[i]; |
|
192 } |
|
193 } |
|
194 OIL_DEFINE_IMPL (add_f64_unroll4b, add_f64); |
|
195 |
|
196 static void |
|
197 add_f64_unroll4c (oil_type_f64 *dest, oil_type_f64 *src1, oil_type_f64 *src2, int n) |
|
198 { |
|
199 int i; |
|
200 |
|
201 for(i=0;i<(n&(~0x3));i+=4){ |
|
202 *dest++ = *src1++ + *src2++; |
|
203 *dest++ = *src1++ + *src2++; |
|
204 *dest++ = *src1++ + *src2++; |
|
205 *dest++ = *src1++ + *src2++; |
|
206 } |
|
207 for(;i<n;i++){ |
|
208 *dest++ = *src1++ + *src2++; |
|
209 } |
|
210 } |
|
211 OIL_DEFINE_IMPL (add_f64_unroll4c, add_f64); |
|
212 |
|
213 static void |
|
214 subtract_f32_pointer (oil_type_f32 *dest, oil_type_f32 *src1, oil_type_f32 *src2, int n) |
|
215 { |
|
216 while (n) { |
|
217 *dest = *src1 - *src2; |
|
218 dest++; |
|
219 src1++; |
|
220 src2++; |
|
221 n--; |
|
222 } |
|
223 } |
|
224 OIL_DEFINE_IMPL (subtract_f32_pointer, subtract_f32); |
|
225 |
|
226 static void |
|
227 subtract_f32_unroll2 (oil_type_f32 *dest, oil_type_f32 *src1, oil_type_f32 *src2, int n) |
|
228 { |
|
229 int i; |
|
230 |
|
231 if (n & 1) { |
|
232 dest[0] = src1[0] - src2[0]; |
|
233 dest++; |
|
234 src1++; |
|
235 src2++; |
|
236 n--; |
|
237 } |
|
238 for(i=0;i<n;i+=2){ |
|
239 dest[i] = src1[i] - src2[i]; |
|
240 dest[i+1] = src1[i+1] - src2[i+1]; |
|
241 } |
|
242 } |
|
243 OIL_DEFINE_IMPL (subtract_f32_unroll2, subtract_f32); |
|
244 |
|
245 static void |
|
246 subtract_f32_unroll4a (oil_type_f32 *dest, oil_type_f32 *src1, oil_type_f32 *src2, int n) |
|
247 { |
|
248 int i; |
|
249 |
|
250 while (n & 3) { |
|
251 dest[0] = src1[0] - src2[0]; |
|
252 dest++; |
|
253 src1++; |
|
254 src2++; |
|
255 n--; |
|
256 } |
|
257 for(i=0;i<n;i+=4){ |
|
258 dest[i] = src1[i] - src2[i]; |
|
259 dest[i+1] = src1[i+1] - src2[i+1]; |
|
260 dest[i+2] = src1[i+2] - src2[i+2]; |
|
261 dest[i+3] = src1[i+3] - src2[i+3]; |
|
262 } |
|
263 } |
|
264 OIL_DEFINE_IMPL (subtract_f32_unroll4a, subtract_f32); |
|
265 |
|
266 static void |
|
267 subtract_f32_unroll4b (oil_type_f32 *dest, oil_type_f32 *src1, oil_type_f32 *src2, int n) |
|
268 { |
|
269 int i; |
|
270 |
|
271 for(i=0;i<(n&(~0x3));i+=4){ |
|
272 dest[i+0] = src1[i+0] - src2[i+0]; |
|
273 dest[i+1] = src1[i+1] - src2[i+1]; |
|
274 dest[i+2] = src1[i+2] - src2[i+2]; |
|
275 dest[i+3] = src1[i+3] - src2[i+3]; |
|
276 } |
|
277 for(;i<n;i++){ |
|
278 dest[i] = src1[i] - src2[i]; |
|
279 } |
|
280 } |
|
281 OIL_DEFINE_IMPL (subtract_f32_unroll4b, subtract_f32); |
|
282 |
|
283 static void |
|
284 subtract_f32_unroll4c (oil_type_f32 *dest, oil_type_f32 *src1, oil_type_f32 *src2, int n) |
|
285 { |
|
286 int i; |
|
287 |
|
288 for(i=0;i<(n&(~0x3));i+=4){ |
|
289 *dest++ = *src1++ - *src2++; |
|
290 *dest++ = *src1++ - *src2++; |
|
291 *dest++ = *src1++ - *src2++; |
|
292 *dest++ = *src1++ - *src2++; |
|
293 } |
|
294 for(;i<n;i++){ |
|
295 *dest++ = *src1++ - *src2++; |
|
296 } |
|
297 } |
|
298 OIL_DEFINE_IMPL (subtract_f32_unroll4c, subtract_f32); |
|
299 |
|
300 static void |
|
301 subtract_f64_pointer (oil_type_f64 *dest, oil_type_f64 *src1, oil_type_f64 *src2, int n) |
|
302 { |
|
303 while (n) { |
|
304 *dest = *src1 - *src2; |
|
305 dest++; |
|
306 src1++; |
|
307 src2++; |
|
308 n--; |
|
309 } |
|
310 } |
|
311 OIL_DEFINE_IMPL (subtract_f64_pointer, subtract_f64); |
|
312 |
|
313 static void |
|
314 subtract_f64_unroll2 (oil_type_f64 *dest, oil_type_f64 *src1, oil_type_f64 *src2, int n) |
|
315 { |
|
316 int i; |
|
317 |
|
318 if (n & 1) { |
|
319 dest[0] = src1[0] - src2[0]; |
|
320 dest++; |
|
321 src1++; |
|
322 src2++; |
|
323 n--; |
|
324 } |
|
325 for(i=0;i<n;i+=2){ |
|
326 dest[i] = src1[i] - src2[i]; |
|
327 dest[i+1] = src1[i+1] - src2[i+1]; |
|
328 } |
|
329 } |
|
330 OIL_DEFINE_IMPL (subtract_f64_unroll2, subtract_f64); |
|
331 |
|
332 static void |
|
333 subtract_f64_unroll4a (oil_type_f64 *dest, oil_type_f64 *src1, oil_type_f64 *src2, int n) |
|
334 { |
|
335 int i; |
|
336 |
|
337 while (n & 3) { |
|
338 dest[0] = src1[0] - src2[0]; |
|
339 dest++; |
|
340 src1++; |
|
341 src2++; |
|
342 n--; |
|
343 } |
|
344 for(i=0;i<n;i+=4){ |
|
345 dest[i] = src1[i] - src2[i]; |
|
346 dest[i+1] = src1[i+1] - src2[i+1]; |
|
347 dest[i+2] = src1[i+2] - src2[i+2]; |
|
348 dest[i+3] = src1[i+3] - src2[i+3]; |
|
349 } |
|
350 } |
|
351 OIL_DEFINE_IMPL (subtract_f64_unroll4a, subtract_f64); |
|
352 |
|
353 static void |
|
354 subtract_f64_unroll4b (oil_type_f64 *dest, oil_type_f64 *src1, oil_type_f64 *src2, int n) |
|
355 { |
|
356 int i; |
|
357 |
|
358 for(i=0;i<(n&(~0x3));i+=4){ |
|
359 dest[i+0] = src1[i+0] - src2[i+0]; |
|
360 dest[i+1] = src1[i+1] - src2[i+1]; |
|
361 dest[i+2] = src1[i+2] - src2[i+2]; |
|
362 dest[i+3] = src1[i+3] - src2[i+3]; |
|
363 } |
|
364 for(;i<n;i++){ |
|
365 dest[i] = src1[i] - src2[i]; |
|
366 } |
|
367 } |
|
368 OIL_DEFINE_IMPL (subtract_f64_unroll4b, subtract_f64); |
|
369 |
|
370 static void |
|
371 subtract_f64_unroll4c (oil_type_f64 *dest, oil_type_f64 *src1, oil_type_f64 *src2, int n) |
|
372 { |
|
373 int i; |
|
374 |
|
375 for(i=0;i<(n&(~0x3));i+=4){ |
|
376 *dest++ = *src1++ - *src2++; |
|
377 *dest++ = *src1++ - *src2++; |
|
378 *dest++ = *src1++ - *src2++; |
|
379 *dest++ = *src1++ - *src2++; |
|
380 } |
|
381 for(;i<n;i++){ |
|
382 *dest++ = *src1++ - *src2++; |
|
383 } |
|
384 } |
|
385 OIL_DEFINE_IMPL (subtract_f64_unroll4c, subtract_f64); |
|
386 |
|
387 static void |
|
388 divide_f32_pointer (oil_type_f32 *dest, oil_type_f32 *src1, oil_type_f32 *src2, int n) |
|
389 { |
|
390 while (n) { |
|
391 *dest = *src1 / *src2; |
|
392 dest++; |
|
393 src1++; |
|
394 src2++; |
|
395 n--; |
|
396 } |
|
397 } |
|
398 OIL_DEFINE_IMPL (divide_f32_pointer, divide_f32); |
|
399 |
|
400 static void |
|
401 divide_f32_unroll2 (oil_type_f32 *dest, oil_type_f32 *src1, oil_type_f32 *src2, int n) |
|
402 { |
|
403 int i; |
|
404 |
|
405 if (n & 1) { |
|
406 dest[0] = src1[0] / src2[0]; |
|
407 dest++; |
|
408 src1++; |
|
409 src2++; |
|
410 n--; |
|
411 } |
|
412 for(i=0;i<n;i+=2){ |
|
413 dest[i] = src1[i] / src2[i]; |
|
414 dest[i+1] = src1[i+1] / src2[i+1]; |
|
415 } |
|
416 } |
|
417 OIL_DEFINE_IMPL (divide_f32_unroll2, divide_f32); |
|
418 |
|
419 static void |
|
420 divide_f32_unroll4a (oil_type_f32 *dest, oil_type_f32 *src1, oil_type_f32 *src2, int n) |
|
421 { |
|
422 int i; |
|
423 |
|
424 while (n & 3) { |
|
425 dest[0] = src1[0] / src2[0]; |
|
426 dest++; |
|
427 src1++; |
|
428 src2++; |
|
429 n--; |
|
430 } |
|
431 for(i=0;i<n;i+=4){ |
|
432 dest[i] = src1[i] / src2[i]; |
|
433 dest[i+1] = src1[i+1] / src2[i+1]; |
|
434 dest[i+2] = src1[i+2] / src2[i+2]; |
|
435 dest[i+3] = src1[i+3] / src2[i+3]; |
|
436 } |
|
437 } |
|
438 OIL_DEFINE_IMPL (divide_f32_unroll4a, divide_f32); |
|
439 |
|
440 static void |
|
441 divide_f32_unroll4b (oil_type_f32 *dest, oil_type_f32 *src1, oil_type_f32 *src2, int n) |
|
442 { |
|
443 int i; |
|
444 |
|
445 for(i=0;i<(n&(~0x3));i+=4){ |
|
446 dest[i+0] = src1[i+0] / src2[i+0]; |
|
447 dest[i+1] = src1[i+1] / src2[i+1]; |
|
448 dest[i+2] = src1[i+2] / src2[i+2]; |
|
449 dest[i+3] = src1[i+3] / src2[i+3]; |
|
450 } |
|
451 for(;i<n;i++){ |
|
452 dest[i] = src1[i] / src2[i]; |
|
453 } |
|
454 } |
|
455 OIL_DEFINE_IMPL (divide_f32_unroll4b, divide_f32); |
|
456 |
|
457 static void |
|
458 divide_f32_unroll4c (oil_type_f32 *dest, oil_type_f32 *src1, oil_type_f32 *src2, int n) |
|
459 { |
|
460 int i; |
|
461 |
|
462 for(i=0;i<(n&(~0x3));i+=4){ |
|
463 *dest++ = *src1++ / *src2++; |
|
464 *dest++ = *src1++ / *src2++; |
|
465 *dest++ = *src1++ / *src2++; |
|
466 *dest++ = *src1++ / *src2++; |
|
467 } |
|
468 for(;i<n;i++){ |
|
469 *dest++ = *src1++ / *src2++; |
|
470 } |
|
471 } |
|
472 OIL_DEFINE_IMPL (divide_f32_unroll4c, divide_f32); |
|
473 |
|
474 static void |
|
475 divide_f64_pointer (oil_type_f64 *dest, oil_type_f64 *src1, oil_type_f64 *src2, int n) |
|
476 { |
|
477 while (n) { |
|
478 *dest = *src1 / *src2; |
|
479 dest++; |
|
480 src1++; |
|
481 src2++; |
|
482 n--; |
|
483 } |
|
484 } |
|
485 OIL_DEFINE_IMPL (divide_f64_pointer, divide_f64); |
|
486 |
|
487 static void |
|
488 divide_f64_unroll2 (oil_type_f64 *dest, oil_type_f64 *src1, oil_type_f64 *src2, int n) |
|
489 { |
|
490 int i; |
|
491 |
|
492 if (n & 1) { |
|
493 dest[0] = src1[0] / src2[0]; |
|
494 dest++; |
|
495 src1++; |
|
496 src2++; |
|
497 n--; |
|
498 } |
|
499 for(i=0;i<n;i+=2){ |
|
500 dest[i] = src1[i] / src2[i]; |
|
501 dest[i+1] = src1[i+1] / src2[i+1]; |
|
502 } |
|
503 } |
|
504 OIL_DEFINE_IMPL (divide_f64_unroll2, divide_f64); |
|
505 |
|
506 static void |
|
507 divide_f64_unroll4a (oil_type_f64 *dest, oil_type_f64 *src1, oil_type_f64 *src2, int n) |
|
508 { |
|
509 int i; |
|
510 |
|
511 while (n & 3) { |
|
512 dest[0] = src1[0] / src2[0]; |
|
513 dest++; |
|
514 src1++; |
|
515 src2++; |
|
516 n--; |
|
517 } |
|
518 for(i=0;i<n;i+=4){ |
|
519 dest[i] = src1[i] / src2[i]; |
|
520 dest[i+1] = src1[i+1] / src2[i+1]; |
|
521 dest[i+2] = src1[i+2] / src2[i+2]; |
|
522 dest[i+3] = src1[i+3] / src2[i+3]; |
|
523 } |
|
524 } |
|
525 OIL_DEFINE_IMPL (divide_f64_unroll4a, divide_f64); |
|
526 |
|
527 static void |
|
528 divide_f64_unroll4b (oil_type_f64 *dest, oil_type_f64 *src1, oil_type_f64 *src2, int n) |
|
529 { |
|
530 int i; |
|
531 |
|
532 for(i=0;i<(n&(~0x3));i+=4){ |
|
533 dest[i+0] = src1[i+0] / src2[i+0]; |
|
534 dest[i+1] = src1[i+1] / src2[i+1]; |
|
535 dest[i+2] = src1[i+2] / src2[i+2]; |
|
536 dest[i+3] = src1[i+3] / src2[i+3]; |
|
537 } |
|
538 for(;i<n;i++){ |
|
539 dest[i] = src1[i] / src2[i]; |
|
540 } |
|
541 } |
|
542 OIL_DEFINE_IMPL (divide_f64_unroll4b, divide_f64); |
|
543 |
|
544 static void |
|
545 divide_f64_unroll4c (oil_type_f64 *dest, oil_type_f64 *src1, oil_type_f64 *src2, int n) |
|
546 { |
|
547 int i; |
|
548 |
|
549 for(i=0;i<(n&(~0x3));i+=4){ |
|
550 *dest++ = *src1++ / *src2++; |
|
551 *dest++ = *src1++ / *src2++; |
|
552 *dest++ = *src1++ / *src2++; |
|
553 *dest++ = *src1++ / *src2++; |
|
554 } |
|
555 for(;i<n;i++){ |
|
556 *dest++ = *src1++ / *src2++; |
|
557 } |
|
558 } |
|
559 OIL_DEFINE_IMPL (divide_f64_unroll4c, divide_f64); |
|
560 |
|
561 static void |
|
562 multiply_f32_pointer (oil_type_f32 *dest, oil_type_f32 *src1, oil_type_f32 *src2, int n) |
|
563 { |
|
564 while (n) { |
|
565 *dest = *src1 * *src2; |
|
566 dest++; |
|
567 src1++; |
|
568 src2++; |
|
569 n--; |
|
570 } |
|
571 } |
|
572 OIL_DEFINE_IMPL (multiply_f32_pointer, multiply_f32); |
|
573 |
|
574 static void |
|
575 multiply_f32_unroll2 (oil_type_f32 *dest, oil_type_f32 *src1, oil_type_f32 *src2, int n) |
|
576 { |
|
577 int i; |
|
578 |
|
579 if (n & 1) { |
|
580 dest[0] = src1[0] * src2[0]; |
|
581 dest++; |
|
582 src1++; |
|
583 src2++; |
|
584 n--; |
|
585 } |
|
586 for(i=0;i<n;i+=2){ |
|
587 dest[i] = src1[i] * src2[i]; |
|
588 dest[i+1] = src1[i+1] * src2[i+1]; |
|
589 } |
|
590 } |
|
591 OIL_DEFINE_IMPL (multiply_f32_unroll2, multiply_f32); |
|
592 |
|
593 static void |
|
594 multiply_f32_unroll4a (oil_type_f32 *dest, oil_type_f32 *src1, oil_type_f32 *src2, int n) |
|
595 { |
|
596 int i; |
|
597 |
|
598 while (n & 3) { |
|
599 dest[0] = src1[0] * src2[0]; |
|
600 dest++; |
|
601 src1++; |
|
602 src2++; |
|
603 n--; |
|
604 } |
|
605 for(i=0;i<n;i+=4){ |
|
606 dest[i] = src1[i] * src2[i]; |
|
607 dest[i+1] = src1[i+1] * src2[i+1]; |
|
608 dest[i+2] = src1[i+2] * src2[i+2]; |
|
609 dest[i+3] = src1[i+3] * src2[i+3]; |
|
610 } |
|
611 } |
|
612 OIL_DEFINE_IMPL (multiply_f32_unroll4a, multiply_f32); |
|
613 |
|
614 static void |
|
615 multiply_f32_unroll4b (oil_type_f32 *dest, oil_type_f32 *src1, oil_type_f32 *src2, int n) |
|
616 { |
|
617 int i; |
|
618 |
|
619 for(i=0;i<(n&(~0x3));i+=4){ |
|
620 dest[i+0] = src1[i+0] * src2[i+0]; |
|
621 dest[i+1] = src1[i+1] * src2[i+1]; |
|
622 dest[i+2] = src1[i+2] * src2[i+2]; |
|
623 dest[i+3] = src1[i+3] * src2[i+3]; |
|
624 } |
|
625 for(;i<n;i++){ |
|
626 dest[i] = src1[i] * src2[i]; |
|
627 } |
|
628 } |
|
629 OIL_DEFINE_IMPL (multiply_f32_unroll4b, multiply_f32); |
|
630 |
|
631 static void |
|
632 multiply_f32_unroll4c (oil_type_f32 *dest, oil_type_f32 *src1, oil_type_f32 *src2, int n) |
|
633 { |
|
634 int i; |
|
635 |
|
636 for(i=0;i<(n&(~0x3));i+=4){ |
|
637 *dest++ = *src1++ * *src2++; |
|
638 *dest++ = *src1++ * *src2++; |
|
639 *dest++ = *src1++ * *src2++; |
|
640 *dest++ = *src1++ * *src2++; |
|
641 } |
|
642 for(;i<n;i++){ |
|
643 *dest++ = *src1++ * *src2++; |
|
644 } |
|
645 } |
|
646 OIL_DEFINE_IMPL (multiply_f32_unroll4c, multiply_f32); |
|
647 |
|
648 static void |
|
649 multiply_f64_pointer (oil_type_f64 *dest, oil_type_f64 *src1, oil_type_f64 *src2, int n) |
|
650 { |
|
651 while (n) { |
|
652 *dest = *src1 * *src2; |
|
653 dest++; |
|
654 src1++; |
|
655 src2++; |
|
656 n--; |
|
657 } |
|
658 } |
|
659 OIL_DEFINE_IMPL (multiply_f64_pointer, multiply_f64); |
|
660 |
|
661 static void |
|
662 multiply_f64_unroll2 (oil_type_f64 *dest, oil_type_f64 *src1, oil_type_f64 *src2, int n) |
|
663 { |
|
664 int i; |
|
665 |
|
666 if (n & 1) { |
|
667 dest[0] = src1[0] * src2[0]; |
|
668 dest++; |
|
669 src1++; |
|
670 src2++; |
|
671 n--; |
|
672 } |
|
673 for(i=0;i<n;i+=2){ |
|
674 dest[i] = src1[i] * src2[i]; |
|
675 dest[i+1] = src1[i+1] * src2[i+1]; |
|
676 } |
|
677 } |
|
678 OIL_DEFINE_IMPL (multiply_f64_unroll2, multiply_f64); |
|
679 |
|
680 static void |
|
681 multiply_f64_unroll4a (oil_type_f64 *dest, oil_type_f64 *src1, oil_type_f64 *src2, int n) |
|
682 { |
|
683 int i; |
|
684 |
|
685 while (n & 3) { |
|
686 dest[0] = src1[0] * src2[0]; |
|
687 dest++; |
|
688 src1++; |
|
689 src2++; |
|
690 n--; |
|
691 } |
|
692 for(i=0;i<n;i+=4){ |
|
693 dest[i] = src1[i] * src2[i]; |
|
694 dest[i+1] = src1[i+1] * src2[i+1]; |
|
695 dest[i+2] = src1[i+2] * src2[i+2]; |
|
696 dest[i+3] = src1[i+3] * src2[i+3]; |
|
697 } |
|
698 } |
|
699 OIL_DEFINE_IMPL (multiply_f64_unroll4a, multiply_f64); |
|
700 |
|
701 static void |
|
702 multiply_f64_unroll4b (oil_type_f64 *dest, oil_type_f64 *src1, oil_type_f64 *src2, int n) |
|
703 { |
|
704 int i; |
|
705 |
|
706 for(i=0;i<(n&(~0x3));i+=4){ |
|
707 dest[i+0] = src1[i+0] * src2[i+0]; |
|
708 dest[i+1] = src1[i+1] * src2[i+1]; |
|
709 dest[i+2] = src1[i+2] * src2[i+2]; |
|
710 dest[i+3] = src1[i+3] * src2[i+3]; |
|
711 } |
|
712 for(;i<n;i++){ |
|
713 dest[i] = src1[i] * src2[i]; |
|
714 } |
|
715 } |
|
716 OIL_DEFINE_IMPL (multiply_f64_unroll4b, multiply_f64); |
|
717 |
|
718 static void |
|
719 multiply_f64_unroll4c (oil_type_f64 *dest, oil_type_f64 *src1, oil_type_f64 *src2, int n) |
|
720 { |
|
721 int i; |
|
722 |
|
723 for(i=0;i<(n&(~0x3));i+=4){ |
|
724 *dest++ = *src1++ * *src2++; |
|
725 *dest++ = *src1++ * *src2++; |
|
726 *dest++ = *src1++ * *src2++; |
|
727 *dest++ = *src1++ * *src2++; |
|
728 } |
|
729 for(;i<n;i++){ |
|
730 *dest++ = *src1++ * *src2++; |
|
731 } |
|
732 } |
|
733 OIL_DEFINE_IMPL (multiply_f64_unroll4c, multiply_f64); |
|
734 |
|
735 |
|
736 |
|
737 #ifdef __SYMBIAN32__ |
|
738 |
|
739 OilFunctionImpl* __oil_function_impl_add_f32_pointer() { |
|
740 return &_oil_function_impl_add_f32_pointer; |
|
741 } |
|
742 #endif |
|
743 |
|
744 #ifdef __SYMBIAN32__ |
|
745 |
|
746 OilFunctionImpl* __oil_function_impl_add_f32_unroll2() { |
|
747 return &_oil_function_impl_add_f32_unroll2; |
|
748 } |
|
749 #endif |
|
750 |
|
751 #ifdef __SYMBIAN32__ |
|
752 |
|
753 OilFunctionImpl* __oil_function_impl_add_f32_unroll4a() { |
|
754 return &_oil_function_impl_add_f32_unroll4a; |
|
755 } |
|
756 #endif |
|
757 |
|
758 #ifdef __SYMBIAN32__ |
|
759 |
|
760 OilFunctionImpl* __oil_function_impl_add_f32_unroll4b() { |
|
761 return &_oil_function_impl_add_f32_unroll4b; |
|
762 } |
|
763 #endif |
|
764 |
|
765 #ifdef __SYMBIAN32__ |
|
766 |
|
767 OilFunctionImpl* __oil_function_impl_add_f32_unroll4c() { |
|
768 return &_oil_function_impl_add_f32_unroll4c; |
|
769 } |
|
770 #endif |
|
771 |
|
772 #ifdef __SYMBIAN32__ |
|
773 |
|
774 OilFunctionImpl* __oil_function_impl_add_f64_pointer() { |
|
775 return &_oil_function_impl_add_f64_pointer; |
|
776 } |
|
777 #endif |
|
778 |
|
779 #ifdef __SYMBIAN32__ |
|
780 |
|
781 OilFunctionImpl* __oil_function_impl_add_f64_unroll2() { |
|
782 return &_oil_function_impl_add_f64_unroll2; |
|
783 } |
|
784 #endif |
|
785 |
|
786 #ifdef __SYMBIAN32__ |
|
787 |
|
788 OilFunctionImpl* __oil_function_impl_add_f64_unroll4a() { |
|
789 return &_oil_function_impl_add_f64_unroll4a; |
|
790 } |
|
791 #endif |
|
792 |
|
793 #ifdef __SYMBIAN32__ |
|
794 |
|
795 OilFunctionImpl* __oil_function_impl_add_f64_unroll4b() { |
|
796 return &_oil_function_impl_add_f64_unroll4b; |
|
797 } |
|
798 #endif |
|
799 |
|
800 #ifdef __SYMBIAN32__ |
|
801 |
|
802 OilFunctionImpl* __oil_function_impl_add_f64_unroll4c() { |
|
803 return &_oil_function_impl_add_f64_unroll4c; |
|
804 } |
|
805 #endif |
|
806 |
|
807 #ifdef __SYMBIAN32__ |
|
808 |
|
809 OilFunctionImpl* __oil_function_impl_subtract_f32_pointer() { |
|
810 return &_oil_function_impl_subtract_f32_pointer; |
|
811 } |
|
812 #endif |
|
813 |
|
814 #ifdef __SYMBIAN32__ |
|
815 |
|
816 OilFunctionImpl* __oil_function_impl_subtract_f32_unroll2() { |
|
817 return &_oil_function_impl_subtract_f32_unroll2; |
|
818 } |
|
819 #endif |
|
820 |
|
821 #ifdef __SYMBIAN32__ |
|
822 |
|
823 OilFunctionImpl* __oil_function_impl_subtract_f32_unroll4a() { |
|
824 return &_oil_function_impl_subtract_f32_unroll4a; |
|
825 } |
|
826 #endif |
|
827 |
|
828 #ifdef __SYMBIAN32__ |
|
829 |
|
830 OilFunctionImpl* __oil_function_impl_subtract_f32_unroll4b() { |
|
831 return &_oil_function_impl_subtract_f32_unroll4b; |
|
832 } |
|
833 #endif |
|
834 |
|
835 #ifdef __SYMBIAN32__ |
|
836 |
|
837 OilFunctionImpl* __oil_function_impl_subtract_f32_unroll4c() { |
|
838 return &_oil_function_impl_subtract_f32_unroll4c; |
|
839 } |
|
840 #endif |
|
841 |
|
842 #ifdef __SYMBIAN32__ |
|
843 |
|
844 OilFunctionImpl* __oil_function_impl_subtract_f64_pointer() { |
|
845 return &_oil_function_impl_subtract_f64_pointer; |
|
846 } |
|
847 #endif |
|
848 |
|
849 #ifdef __SYMBIAN32__ |
|
850 |
|
851 OilFunctionImpl* __oil_function_impl_subtract_f64_unroll2() { |
|
852 return &_oil_function_impl_subtract_f64_unroll2; |
|
853 } |
|
854 #endif |
|
855 |
|
856 #ifdef __SYMBIAN32__ |
|
857 |
|
858 OilFunctionImpl* __oil_function_impl_subtract_f64_unroll4a() { |
|
859 return &_oil_function_impl_subtract_f64_unroll4a; |
|
860 } |
|
861 #endif |
|
862 |
|
863 #ifdef __SYMBIAN32__ |
|
864 |
|
865 OilFunctionImpl* __oil_function_impl_subtract_f64_unroll4b() { |
|
866 return &_oil_function_impl_subtract_f64_unroll4b; |
|
867 } |
|
868 #endif |
|
869 |
|
870 #ifdef __SYMBIAN32__ |
|
871 |
|
872 OilFunctionImpl* __oil_function_impl_subtract_f64_unroll4c() { |
|
873 return &_oil_function_impl_subtract_f64_unroll4c; |
|
874 } |
|
875 #endif |
|
876 |
|
877 #ifdef __SYMBIAN32__ |
|
878 |
|
879 OilFunctionImpl* __oil_function_impl_divide_f32_pointer() { |
|
880 return &_oil_function_impl_divide_f32_pointer; |
|
881 } |
|
882 #endif |
|
883 |
|
884 #ifdef __SYMBIAN32__ |
|
885 |
|
886 OilFunctionImpl* __oil_function_impl_divide_f32_unroll2() { |
|
887 return &_oil_function_impl_divide_f32_unroll2; |
|
888 } |
|
889 #endif |
|
890 |
|
891 #ifdef __SYMBIAN32__ |
|
892 |
|
893 OilFunctionImpl* __oil_function_impl_divide_f32_unroll4a() { |
|
894 return &_oil_function_impl_divide_f32_unroll4a; |
|
895 } |
|
896 #endif |
|
897 |
|
898 #ifdef __SYMBIAN32__ |
|
899 |
|
900 OilFunctionImpl* __oil_function_impl_divide_f32_unroll4b() { |
|
901 return &_oil_function_impl_divide_f32_unroll4b; |
|
902 } |
|
903 #endif |
|
904 |
|
905 #ifdef __SYMBIAN32__ |
|
906 |
|
907 OilFunctionImpl* __oil_function_impl_divide_f32_unroll4c() { |
|
908 return &_oil_function_impl_divide_f32_unroll4c; |
|
909 } |
|
910 #endif |
|
911 |
|
912 #ifdef __SYMBIAN32__ |
|
913 |
|
914 OilFunctionImpl* __oil_function_impl_divide_f64_pointer() { |
|
915 return &_oil_function_impl_divide_f64_pointer; |
|
916 } |
|
917 #endif |
|
918 |
|
919 #ifdef __SYMBIAN32__ |
|
920 |
|
921 OilFunctionImpl* __oil_function_impl_divide_f64_unroll2() { |
|
922 return &_oil_function_impl_divide_f64_unroll2; |
|
923 } |
|
924 #endif |
|
925 |
|
926 #ifdef __SYMBIAN32__ |
|
927 |
|
928 OilFunctionImpl* __oil_function_impl_divide_f64_unroll4a() { |
|
929 return &_oil_function_impl_divide_f64_unroll4a; |
|
930 } |
|
931 #endif |
|
932 |
|
933 #ifdef __SYMBIAN32__ |
|
934 |
|
935 OilFunctionImpl* __oil_function_impl_divide_f64_unroll4b() { |
|
936 return &_oil_function_impl_divide_f64_unroll4b; |
|
937 } |
|
938 #endif |
|
939 |
|
940 #ifdef __SYMBIAN32__ |
|
941 |
|
942 OilFunctionImpl* __oil_function_impl_divide_f64_unroll4c() { |
|
943 return &_oil_function_impl_divide_f64_unroll4c; |
|
944 } |
|
945 #endif |
|
946 |
|
947 #ifdef __SYMBIAN32__ |
|
948 |
|
949 OilFunctionImpl* __oil_function_impl_multiply_f32_pointer() { |
|
950 return &_oil_function_impl_multiply_f32_pointer; |
|
951 } |
|
952 #endif |
|
953 |
|
954 #ifdef __SYMBIAN32__ |
|
955 |
|
956 OilFunctionImpl* __oil_function_impl_multiply_f32_unroll2() { |
|
957 return &_oil_function_impl_multiply_f32_unroll2; |
|
958 } |
|
959 #endif |
|
960 |
|
961 #ifdef __SYMBIAN32__ |
|
962 |
|
963 OilFunctionImpl* __oil_function_impl_multiply_f32_unroll4a() { |
|
964 return &_oil_function_impl_multiply_f32_unroll4a; |
|
965 } |
|
966 #endif |
|
967 |
|
968 #ifdef __SYMBIAN32__ |
|
969 |
|
970 OilFunctionImpl* __oil_function_impl_multiply_f32_unroll4b() { |
|
971 return &_oil_function_impl_multiply_f32_unroll4b; |
|
972 } |
|
973 #endif |
|
974 |
|
975 #ifdef __SYMBIAN32__ |
|
976 |
|
977 OilFunctionImpl* __oil_function_impl_multiply_f32_unroll4c() { |
|
978 return &_oil_function_impl_multiply_f32_unroll4c; |
|
979 } |
|
980 #endif |
|
981 |
|
982 #ifdef __SYMBIAN32__ |
|
983 |
|
984 OilFunctionImpl* __oil_function_impl_multiply_f64_pointer() { |
|
985 return &_oil_function_impl_multiply_f64_pointer; |
|
986 } |
|
987 #endif |
|
988 |
|
989 #ifdef __SYMBIAN32__ |
|
990 |
|
991 OilFunctionImpl* __oil_function_impl_multiply_f64_unroll2() { |
|
992 return &_oil_function_impl_multiply_f64_unroll2; |
|
993 } |
|
994 #endif |
|
995 |
|
996 #ifdef __SYMBIAN32__ |
|
997 |
|
998 OilFunctionImpl* __oil_function_impl_multiply_f64_unroll4a() { |
|
999 return &_oil_function_impl_multiply_f64_unroll4a; |
|
1000 } |
|
1001 #endif |
|
1002 |
|
1003 #ifdef __SYMBIAN32__ |
|
1004 |
|
1005 OilFunctionImpl* __oil_function_impl_multiply_f64_unroll4b() { |
|
1006 return &_oil_function_impl_multiply_f64_unroll4b; |
|
1007 } |
|
1008 #endif |
|
1009 |
|
1010 #ifdef __SYMBIAN32__ |
|
1011 |
|
1012 OilFunctionImpl* __oil_function_impl_multiply_f64_unroll4c() { |
|
1013 return &_oil_function_impl_multiply_f64_unroll4c; |
|
1014 } |
|
1015 #endif |
|
1016 |