1 #include"liboil.h" |
|
2 |
|
3 //Arun's changes |
|
4 typedef struct _OilFunctionImpl OilFunctionImpl; |
|
5 typedef struct _OilFunctionClass OilFunctionClass; |
|
6 typedef void (*OilTestFunction) (OilFunctionClass *klass,OilFunctionImpl *impl); |
|
7 |
|
8 #define OIL_CHECK_PROTOTYPE(a) |
|
9 /** |
|
10 * OilFunctionClass: |
|
11 * |
|
12 * An opaque structure representing a function class. |
|
13 * |
|
14 */ |
|
15 struct _OilFunctionClass { |
|
16 /*< private >*/ |
|
17 void *func; |
|
18 const char *name; |
|
19 const char *desc; |
|
20 OilTestFunction test_func; |
|
21 |
|
22 OilFunctionImpl *first_impl; |
|
23 OilFunctionImpl *reference_impl; |
|
24 |
|
25 OilFunctionImpl *chosen_impl; |
|
26 |
|
27 const char *prototype; |
|
28 }; |
|
29 |
|
30 |
|
31 /** |
|
32 * OilFunctionImpl: |
|
33 * |
|
34 * An opaque structure representing a function implementation. |
|
35 * |
|
36 */ |
|
37 struct _OilFunctionImpl { |
|
38 /*< private >*/ |
|
39 void *next; |
|
40 OilFunctionClass *klass; |
|
41 void *func; |
|
42 unsigned int flags; |
|
43 const char *name; |
|
44 double profile_ave; |
|
45 double profile_std; |
|
46 }; |
|
47 |
|
48 #ifndef OIL_NO_CLASSES |
|
49 /** |
|
50 * OIL_DEFINE_CLASS_FULL: |
|
51 * @klass: name of class to declare (without oil_ prefix) |
|
52 * @string: prototype of class |
|
53 * @test: test function |
|
54 * |
|
55 * Defines a #OilFunctionClass structure for @klass. Classes |
|
56 * defined this way will be automatically at Liboil initialization |
|
57 * time. |
|
58 */ |
|
59 |
|
60 #define OIL_DEFINE_CLASS_FULL(klass, string, test) \ |
|
61 OilFunctionClass _oil_function_class_ ## klass = { \ |
|
62 NULL, \ |
|
63 #klass , \ |
|
64 NULL, \ |
|
65 test, \ |
|
66 NULL, \ |
|
67 NULL, \ |
|
68 NULL, \ |
|
69 string \ |
|
70 }; \ |
|
71 OilFunctionClass *oil_function_class_ptr_ ## klass = \ |
|
72 &_oil_function_class_ ## klass |
|
73 #else |
|
74 #define OIL_DEFINE_CLASS_FULL(klass, string, test) \ |
|
75 OIL_DECLARE_CLASS(klass) |
|
76 #endif |
|
77 |
|
78 /** |
|
79 * OIL_DEFINE_CLASS: |
|
80 * @klass: name of class to declare (without oil_ prefix) |
|
81 * @string: prototype of class |
|
82 * |
|
83 * Defines a #OilFunctionClass structure for @klass. Classes |
|
84 * defined this way will be automatically at Liboil initialization |
|
85 * time. |
|
86 */ |
|
87 #define OIL_DEFINE_CLASS(klass, string) \ |
|
88 OIL_DEFINE_CLASS_FULL (klass, string, NULL) |
|
89 |
|
90 |
|
91 OIL_DEFINE_CLASS (scalarmultiply_f32_ns, "float *d, float *s1, float *s2_1, int n"); |
|
92 |
|
93 OIL_DEFINE_CLASS_FULL (resample_linear_argb, |
|
94 "uint32_t *d_n, uint32_t *s_2xn, int n, uint32_t *i_2", |
|
95 NULL); |
|
96 |
|
97 OIL_DEFINE_CLASS_FULL (resample_linear_u8, |
|
98 "uint8_t *d_n, uint8_t *s_2xn, int n, uint32_t *i_2", |
|
99 NULL); |
|
100 |
|
101 OIL_DEFINE_CLASS_FULL (merge_linear_argb, |
|
102 "uint32_t *d_n, uint32_t *s_n, uint32_t *s2_n, uint32_t *s3_1, int n", |
|
103 NULL); |
|
104 OIL_DEFINE_CLASS_FULL (merge_linear_u8, |
|
105 "uint8_t *d_n, uint8_t *s_n, uint8_t *s2_n, uint32_t *s3_1, int n", |
|
106 NULL); |
|
107 |
|
108 OIL_DEFINE_CLASS(splat_u8_ns,"uint8_t *dest, uint8_t *s1_1, int n"); |
|
109 |
|
110 OIL_DEFINE_CLASS(splat_u8,"uint8_t *dest, int dstr, uint8_t *s1_1, int n"); |
|
111 |
|
112 typedef enum { |
|
113 OIL_IMPL_FLAG_REF = (1<<0), |
|
114 OIL_IMPL_FLAG_OPT = (1<<1), |
|
115 OIL_IMPL_FLAG_ASM = (1<<2), |
|
116 OIL_IMPL_FLAG_DISABLED = (1<<3), |
|
117 OIL_IMPL_FLAG_CMOV = (1<<16), |
|
118 OIL_IMPL_FLAG_MMX = (1<<17), |
|
119 OIL_IMPL_FLAG_SSE = (1<<18), |
|
120 OIL_IMPL_FLAG_MMXEXT = (1<<19), |
|
121 OIL_IMPL_FLAG_SSE2 = (1<<20), |
|
122 OIL_IMPL_FLAG_3DNOW = (1<<21), |
|
123 OIL_IMPL_FLAG_3DNOWEXT = (1<<22), |
|
124 OIL_IMPL_FLAG_SSE3 = (1<<23), |
|
125 OIL_IMPL_FLAG_ALTIVEC = (1<<24), |
|
126 OIL_IMPL_FLAG_EDSP = (1<<25), |
|
127 OIL_IMPL_FLAG_ARM6 = (1<<26), |
|
128 OIL_IMPL_FLAG_VFP = (1<<27), |
|
129 OIL_IMPL_FLAG_SSSE3 = (1<<28) |
|
130 } OilImplFlag; |
|
131 |
|
132 #ifndef OIL_OPT_MANGLE |
|
133 #define OIL_OPT_MANGLE(a) a |
|
134 #define OIL_OPT_FLAG_MANGLE(a) a |
|
135 #else |
|
136 #define OIL_NO_CLASSES |
|
137 #define OIL_OPT_FLAG_MANGLE(a) (((a)&(~OIL_IMPL_FLAG_REF)) | OIL_IMPL_FLAG_OPT) |
|
138 #endif |
|
139 #ifndef OIL_OPT_SUFFIX |
|
140 #define OIL_OPT_SUFFIX |
|
141 #endif |
|
142 |
|
143 /** |
|
144 * OIL_DEFINE_IMPL_FULL: |
|
145 * @function: name of function |
|
146 * @klass: name of class to declare (without oil_ prefix) |
|
147 * @flags: implementation flags and CPU requirements |
|
148 * |
|
149 * Defines a #OilFunctionImpl structure for the function @function |
|
150 * and class @klass. CPU-dependent flags in @flags will indicate |
|
151 * that this implementation requires the given CPU flags. |
|
152 */ |
|
153 #define OIL_DEFINE_IMPL_FULL(function,klass,flags) \ |
|
154 OilFunctionImpl OIL_OPT_MANGLE(_oil_function_impl_ ## function) = { \ |
|
155 NULL, \ |
|
156 &_oil_function_class_ ## klass , \ |
|
157 (void *)function, \ |
|
158 OIL_OPT_FLAG_MANGLE(flags), \ |
|
159 #function OIL_OPT_SUFFIX \ |
|
160 } \ |
|
161 OIL_CHECK_PROTOTYPE(;_oil_type_ ## klass _ignore_me_ ## function = function) |
|
162 |
|
163 /** |
|
164 * OIL_DEFINE_IMPL: |
|
165 * @function: name of function |
|
166 * @klass: name of class to declare (without oil_ prefix) |
|
167 * |
|
168 * Shorthand for defining a C implementation. See OIL_DEFINE_IMPL_FULL(). |
|
169 */ |
|
170 #define OIL_DEFINE_IMPL(function,klass) \ |
|
171 OIL_DEFINE_IMPL_FULL(function,klass,0) |
|
172 /** |
|
173 * OIL_DEFINE_IMPL_REF: |
|
174 * @function: name of function |
|
175 * @klass: name of class to declare (without oil_ prefix) |
|
176 * |
|
177 * Shorthand for defining a reference implementation. See OIL_DEFINE_IMPL_FULL(). |
|
178 */ |
|
179 #define OIL_DEFINE_IMPL_REF(function,klass) \ |
|
180 OIL_DEFINE_IMPL_FULL(function,klass,OIL_IMPL_FLAG_REF) |
|
181 |
|
182 |
|
183 |
|
184 |
|
185 typedef void (*_oil_type_scalarmultiply_f32_ns)(float * d, const float * s1, const float * s2_1, int n); |
|
186 //#define oil_scalarmultiply_f32_ns ((_oil_type_scalarmultiply_f32_ns)(*(void **)oil_function_class_ptr_scalarmultiply_f32_ns)) |
|
187 |
|
188 |
|
189 |
|
190 #define OIL_GET(ptr, offset, type) (*(type *)((uint8_t *)(ptr) + (offset)) ) |
|
191 |
|
192 /**************'_oil_resample_linear_u8'****************************/ |
|
193 #ifdef __SYMBIAN32__ |
|
194 EXPORT_C |
|
195 #endif |
|
196 |
|
197 |
|
198 static void |
|
199 resample_linear_u8_ref (uint8_t *dest, uint8_t *src, int n, |
|
200 uint32_t *in) |
|
201 { |
|
202 int acc = in[0]; |
|
203 int increment = in[1]; |
|
204 int i; |
|
205 int j; |
|
206 int x; |
|
207 |
|
208 for(i=0;i<n;i++){ |
|
209 j = acc>>16; |
|
210 x = (acc&0xffff)>>8; |
|
211 dest[i] = (src[j]*(256-x) + src[j+1]*x) >> 8; |
|
212 |
|
213 acc += increment; |
|
214 } |
|
215 |
|
216 in[0] = acc; |
|
217 } |
|
218 |
|
219 /************************'_oil_resample_linear_argb'***************************/ |
|
220 static void |
|
221 resample_linear_argb_ref (uint32_t *d, uint32_t *s, int n, uint32_t *in) |
|
222 { |
|
223 uint8_t *src = (uint8_t *)s; |
|
224 uint8_t *dest = (uint8_t *)d; |
|
225 int acc = in[0]; |
|
226 int increment = in[1]; |
|
227 int i; |
|
228 int j; |
|
229 int x; |
|
230 |
|
231 for(i=0;i<n;i++){ |
|
232 j = acc>>16; |
|
233 x = (acc&0xffff)>>8; |
|
234 dest[4*i+0] = (src[4*j+0]*(256-x) + src[4*j+4]*x) >> 8; |
|
235 dest[4*i+1] = (src[4*j+1]*(256-x) + src[4*j+5]*x) >> 8; |
|
236 dest[4*i+2] = (src[4*j+2]*(256-x) + src[4*j+6]*x) >> 8; |
|
237 dest[4*i+3] = (src[4*j+3]*(256-x) + src[4*j+7]*x) >> 8; |
|
238 |
|
239 acc += increment; |
|
240 } |
|
241 |
|
242 in[0] = acc; |
|
243 } |
|
244 |
|
245 /****************** '_oil_merge_linear_argb'**************************/ |
|
246 static void |
|
247 merge_linear_argb_ref (uint32_t *d, uint32_t *s1, uint32_t *s2, |
|
248 uint32_t *src3, int n) |
|
249 { |
|
250 uint8_t *src1 = (uint8_t *)s1; |
|
251 uint8_t *src2 = (uint8_t *)s2; |
|
252 uint8_t *dest = (uint8_t *)d; |
|
253 int i; |
|
254 int x = src3[0]; |
|
255 |
|
256 for(i=0;i<n;i++){ |
|
257 dest[4*i+0] = (src1[4*i+0]*(256-x) + src2[4*i+0]*x) >> 8; |
|
258 dest[4*i+1] = (src1[4*i+1]*(256-x) + src2[4*i+1]*x) >> 8; |
|
259 dest[4*i+2] = (src1[4*i+2]*(256-x) + src2[4*i+2]*x) >> 8; |
|
260 dest[4*i+3] = (src1[4*i+3]*(256-x) + src2[4*i+3]*x) >> 8; |
|
261 } |
|
262 } |
|
263 |
|
264 static void |
|
265 merge_linear_u8_ref (uint8_t *dest, uint8_t *src1, uint8_t *src2, |
|
266 uint32_t *src3, int n) |
|
267 { |
|
268 int i; |
|
269 int x = src3[0]; |
|
270 |
|
271 for(i=0;i<n;i++){ |
|
272 dest[i] = (src1[i]*(256-x) + src2[i]*x) >> 8; |
|
273 } |
|
274 } |
|
275 |
|
276 static void splat_u8_ref (uint8_t *dest, int dstr, uint8_t *param, int n) |
|
277 { |
|
278 int i; |
|
279 for(i=0;i<n;i++){ |
|
280 OIL_GET(dest,i*dstr, uint8_t) = *param; |
|
281 } |
|
282 } |
|
283 |
|
284 static void splat_u8_ns_ref (uint8_t *dest, uint8_t *param, int n) |
|
285 { |
|
286 int i; |
|
287 for(i=0;i<n;i++){ |
|
288 dest[i] = *param; |
|
289 } |
|
290 } |
|
291 |
|
292 static void |
|
293 scalarmultiply_f32_ns_ref (float *dest, float *src1, float *src2, int n) |
|
294 { |
|
295 int i; |
|
296 |
|
297 for(i=0;i<n;i++){ |
|
298 dest[i] = src1[i] * src2[0]; |
|
299 } |
|
300 } |
|
301 |
|
302 /********oil_splat_u8*******/ |
|
303 /* |
|
304 EXPORT_C void splat_u8_ref (uint8_t *dest, int dstr, uint8_t *param, int n) |
|
305 { |
|
306 int i; |
|
307 for(i=0;i<n;i++){ |
|
308 OIL_GET(dest,i*dstr, uint8_t) = *param; |
|
309 } |
|
310 } |
|
311 */ |
|
312 |
|
313 unsigned long oil_cpu_flags; |
|
314 |
|
315 /** |
|
316 * oil_cpu_get_flags: |
|
317 * |
|
318 * Returns a bitmask containing the available CPU features. |
|
319 * |
|
320 * Returns: the CPU features. |
|
321 */ |
|
322 #ifdef __SYMBIAN32__ |
|
323 EXPORT_C |
|
324 #endif |
|
325 |
|
326 unsigned int |
|
327 oil_cpu_get_flags (void) |
|
328 { |
|
329 return oil_cpu_flags; |
|
330 } |
|
331 |
|
332 /** |
|
333 * OIL_CPU_FLAG_MASK: |
|
334 * |
|
335 * Mask describing which bits in #OilImplFlag depend on the current |
|
336 * CPU. |
|
337 */ |
|
338 #define OIL_CPU_FLAG_MASK 0xffff0000 |
|
339 |
|
340 /** |
|
341 * oil_impl_is_runnable: |
|
342 * @impl: an @OilFunctionImpl |
|
343 * |
|
344 * Determines whether the function implementation given by @impl |
|
345 * can be executed by the current CPU. |
|
346 * |
|
347 * Returns: 1 if the implementation can be executed, otherwise 0 |
|
348 */ |
|
349 #ifdef __SYMBIAN32__ |
|
350 EXPORT_C |
|
351 #endif |
|
352 |
|
353 int |
|
354 oil_impl_is_runnable (OilFunctionImpl *impl) |
|
355 { |
|
356 unsigned int oil_cpu_flags = oil_cpu_get_flags(); |
|
357 |
|
358 if ((impl->flags & OIL_CPU_FLAG_MASK) & (~oil_cpu_flags)) |
|
359 return 0; |
|
360 return 1; |
|
361 } |
|
362 |
|
363 /** |
|
364 * oil_class_optimize: |
|
365 * @klass: a function class |
|
366 * |
|
367 * Tests and profiles each implementation for the given function |
|
368 * class. Testing compares the output of running each implementation |
|
369 * on random input against the reference implementation for the |
|
370 * same input. |
|
371 */ |
|
372 #ifdef __SYMBIAN32__ |
|
373 EXPORT_C |
|
374 #endif |
|
375 |
|
376 void |
|
377 oil_class_optimize (OilFunctionClass * klass) |
|
378 { |
|
379 OilFunctionImpl *impl; |
|
380 OilFunctionImpl *min_impl; |
|
381 int ret; |
|
382 |
|
383 |
|
384 if (klass->reference_impl == NULL) { |
|
385 return; |
|
386 } |
|
387 if (klass->first_impl == NULL) { |
|
388 return; |
|
389 } |
|
390 |
|
391 min_impl = NULL; |
|
392 |
|
393 for (impl = klass->first_impl; impl; impl = impl->next) { |
|
394 if (!oil_impl_is_runnable (impl)) |
|
395 continue; |
|
396 } |
|
397 |
|
398 if (min_impl == NULL) { |
|
399 return; |
|
400 } |
|
401 |
|
402 klass->chosen_impl = min_impl; |
|
403 klass->func = min_impl->func; |
|
404 |
|
405 } |
|
406 |
|
407 |
|
408 #ifdef __SYMBIAN32__ |
|
409 EXPORT_C |
|
410 #endif |
|
411 void oil_scalarmultiply_f32_ns (float * d, const float * s1, const float * s2_1, int n) |
|
412 { |
|
413 /* |
|
414 if (_oil_function_class_scalarmultiply_f32_ns.func == NULL) { |
|
415 oil_class_optimize (&_oil_function_class_scalarmultiply_f32_ns); |
|
416 } |
|
417 */ |
|
418 scalarmultiply_f32_ns_ref(d,(float*) s1,(float*) s2_1, n); |
|
419 //((void (*)(float * d, const float * s1, const float * s2_1, int n))(_oil_function_class_scalarmultiply_f32_ns.func))(d, s1, s2_1, n); |
|
420 } |
|
421 #ifdef __SYMBIAN32__ |
|
422 EXPORT_C |
|
423 #endif |
|
424 |
|
425 void |
|
426 oil_merge_linear_argb (uint32_t * d_n, const uint32_t * s_n, const uint32_t * s2_n, const uint32_t * s3_1, int n) |
|
427 { |
|
428 if (_oil_function_class_merge_linear_argb.func == NULL) { |
|
429 oil_class_optimize (&_oil_function_class_merge_linear_argb); |
|
430 } |
|
431 ((void (*)(uint32_t * d_n, const uint32_t * s_n, const uint32_t * s2_n, const uint32_t * s3_1, int n))(_oil_function_class_merge_linear_argb.func))(d_n, s_n, s2_n, s3_1, n); |
|
432 } |
|
433 #ifdef __SYMBIAN32__ |
|
434 EXPORT_C |
|
435 #endif |
|
436 void |
|
437 oil_merge_linear_u8 (uint8_t * d_n, const uint8_t * s_n, const uint8_t * s2_n, const uint32_t * s3_1, int n) |
|
438 { |
|
439 if (_oil_function_class_merge_linear_u8.func == NULL) { |
|
440 oil_class_optimize (&_oil_function_class_merge_linear_u8); |
|
441 } |
|
442 ((void (*)(uint8_t * d_n, const uint8_t * s_n, const uint8_t * s2_n, const uint32_t * s3_1, int n))(_oil_function_class_merge_linear_u8.func))(d_n, s_n, s2_n, s3_1, n); |
|
443 } |
|
444 #ifdef __SYMBIAN32__ |
|
445 EXPORT_C |
|
446 #endif |
|
447 |
|
448 |
|
449 void |
|
450 oil_resample_linear_argb (uint32_t * d_n, const uint32_t * s_2xn, int n, uint32_t * i_2) |
|
451 { |
|
452 if (_oil_function_class_resample_linear_argb.func == NULL) { |
|
453 oil_class_optimize (&_oil_function_class_resample_linear_argb); |
|
454 } |
|
455 ((void (*)(uint32_t * d_n, const uint32_t * s_2xn, int n, uint32_t * i_2))(_oil_function_class_resample_linear_argb.func))(d_n, s_2xn, n, i_2); |
|
456 } |
|
457 #ifdef __SYMBIAN32__ |
|
458 EXPORT_C |
|
459 #endif |
|
460 void |
|
461 oil_resample_linear_u8 (uint8_t * d_n, const uint8_t * s_2xn, int n, uint32_t * i_2) |
|
462 { |
|
463 if (_oil_function_class_resample_linear_u8.func == NULL) { |
|
464 oil_class_optimize (&_oil_function_class_resample_linear_u8); |
|
465 } |
|
466 ((void (*)(uint8_t * d_n, const uint8_t * s_2xn, int n, uint32_t * i_2))(_oil_function_class_resample_linear_u8.func))(d_n, s_2xn, n, i_2); |
|
467 } |
|
468 #ifdef __SYMBIAN32__ |
|
469 EXPORT_C |
|
470 #endif |
|
471 |
|
472 |
|
473 void |
|
474 oil_splat_u8 (uint8_t * dest, int dstr, const uint8_t * s1_1, int n) |
|
475 { |
|
476 if (_oil_function_class_splat_u8.func == NULL) { |
|
477 oil_class_optimize (&_oil_function_class_splat_u8); |
|
478 } |
|
479 ((void (*)(uint8_t * dest, int dstr, const uint8_t * s1_1, int n))(_oil_function_class_splat_u8.func))(dest, dstr, s1_1, n); |
|
480 } |
|
481 #ifdef __SYMBIAN32__ |
|
482 EXPORT_C |
|
483 #endif |
|
484 |
|
485 void |
|
486 oil_splat_u8_ns (uint8_t * dest, const uint8_t * s1_1, int n) |
|
487 { |
|
488 if (_oil_function_class_splat_u8_ns.func == NULL) { |
|
489 oil_class_optimize (&_oil_function_class_splat_u8_ns); |
|
490 } |
|
491 ((void (*)(uint8_t * dest, const uint8_t * s1_1, int n))(_oil_function_class_splat_u8_ns.func))(dest, s1_1, n); |
|
492 } |
|
493 |
|
494 OIL_DEFINE_IMPL_REF (scalarmultiply_f32_ns_ref, scalarmultiply_f32_ns); |
|
495 OIL_DEFINE_IMPL_REF (resample_linear_u8_ref, resample_linear_u8); |
|
496 OIL_DEFINE_IMPL_REF (resample_linear_argb_ref, resample_linear_argb); |
|
497 OIL_DEFINE_IMPL_REF (merge_linear_argb_ref, merge_linear_argb); |
|
498 OIL_DEFINE_IMPL_REF (merge_linear_u8_ref, merge_linear_u8); |
|
499 OIL_DEFINE_IMPL_REF(splat_u8_ref, splat_u8); |
|
500 OIL_DEFINE_IMPL_REF(splat_u8_ns_ref, splat_u8_ns); |
|
501 |
|