|
1 /* |
|
2 * LIBOIL - Library of Optimized Inner Loops |
|
3 * Copyright (c) 2003,2004 David A. Schleef <ds@schleef.org> |
|
4 * All rights reserved. |
|
5 * |
|
6 * Redistribution and use in source and binary forms, with or without |
|
7 * modification, are permitted provided that the following conditions |
|
8 * are met: |
|
9 * 1. Redistributions of source code must retain the above copyright |
|
10 * notice, this list of conditions and the following disclaimer. |
|
11 * 2. Redistributions in binary form must reproduce the above copyright |
|
12 * notice, this list of conditions and the following disclaimer in the |
|
13 * documentation and/or other materials provided with the distribution. |
|
14 * |
|
15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR |
|
16 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED |
|
17 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
|
18 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, |
|
19 * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES |
|
20 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR |
|
21 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) |
|
22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, |
|
23 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING |
|
24 * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE |
|
25 * POSSIBILITY OF SUCH DAMAGE. |
|
26 */ |
|
27 //Portions Copyright (c) 2008-2009 Nokia Corporation and/or its subsidiary(-ies). All rights reserved. |
|
28 |
|
29 #ifdef HAVE_CONFIG_H |
|
30 #include "config.h" |
|
31 #endif |
|
32 |
|
33 #include <liboil/liboilfunction.h> |
|
34 |
|
35 OIL_DECLARE_CLASS (diff8x8_s16_u8); |
|
36 OIL_DECLARE_CLASS (diff8x8_const128_s16_u8); |
|
37 OIL_DECLARE_CLASS (diff8x8_average_s16_u8); |
|
38 |
|
39 |
|
40 static void |
|
41 diff8x8_s16_u8_mmx (int16_t *dest, uint8_t *src1, int ss1, uint8_t *src2, int ss2) |
|
42 { |
|
43 #if !defined(__WINSCW__) && !defined(__WINS__) |
|
44 __asm__ __volatile__ ( |
|
45 " pxor %%mm7, %%mm7 \n\t" |
|
46 |
|
47 #define LOOP \ |
|
48 " movq (%0), %%mm0 \n\t" /* mm0 = FiltPtr */ \ |
|
49 " movq (%1), %%mm1 \n\t" /* mm1 = ReconPtr */ \ |
|
50 " movq %%mm0, %%mm2 \n\t" /* dup to prepare for up conversion */ \ |
|
51 " movq %%mm1, %%mm3 \n\t" /* dup to prepare for up conversion */ \ |
|
52 /* convert from UINT8 to INT16 */ \ |
|
53 " punpcklbw %%mm7, %%mm0 \n\t" /* mm0 = INT16(FiltPtr) */ \ |
|
54 " punpcklbw %%mm7, %%mm1 \n\t" /* mm1 = INT16(ReconPtr) */ \ |
|
55 " punpckhbw %%mm7, %%mm2 \n\t" /* mm2 = INT16(FiltPtr) */ \ |
|
56 " punpckhbw %%mm7, %%mm3 \n\t" /* mm3 = INT16(ReconPtr) */ \ |
|
57 /* start calculation */ \ |
|
58 " psubw %%mm1, %%mm0 \n\t" /* mm0 = FiltPtr - ReconPtr */ \ |
|
59 " psubw %%mm3, %%mm2 \n\t" /* mm2 = FiltPtr - ReconPtr */ \ |
|
60 " movq %%mm0, (%2) \n\t" /* write answer out */ \ |
|
61 " movq %%mm2, 8(%2) \n\t" /* write answer out */ \ |
|
62 /* Increment pointers */ \ |
|
63 " add $16, %2 \n\t" \ |
|
64 " add %3, %0 \n\t" \ |
|
65 " add %4, %1 \n\t" |
|
66 |
|
67 LOOP |
|
68 LOOP |
|
69 LOOP |
|
70 LOOP |
|
71 LOOP |
|
72 LOOP |
|
73 LOOP |
|
74 LOOP |
|
75 #undef LOOP |
|
76 |
|
77 " emms \n\t" |
|
78 |
|
79 : "+r" (src1), |
|
80 "+r" (src2), |
|
81 "+r" (dest) |
|
82 : "m" (ss1), |
|
83 "m" (ss2) |
|
84 : "memory" |
|
85 ); |
|
86 #endif |
|
87 } |
|
88 OIL_DEFINE_IMPL_FULL (diff8x8_s16_u8_mmx, diff8x8_s16_u8, OIL_IMPL_FLAG_MMX); |
|
89 |
|
90 static void |
|
91 diff8x8_const128_s16_u8_mmx (int16_t *dest, uint8_t *src1, int ss1) |
|
92 { |
|
93 #if !defined(__WINSCW__) && !defined(__WINS__) |
|
94 const int16_t tmp[4] = { 0x0080, 0x0080, 0x0080, 0x0080 }; |
|
95 |
|
96 __asm__ __volatile__ ( |
|
97 " pxor %%mm7, %%mm7 \n\t" |
|
98 " movq (%3), %%mm1 \n\t" |
|
99 |
|
100 #define LOOP \ |
|
101 " movq (%0), %%mm0 \n\t" /* mm0 = FiltPtr */ \ |
|
102 " movq %%mm0, %%mm2 \n\t" /* dup to prepare for up conversion */ \ |
|
103 /* convert from UINT8 to INT16 */ \ |
|
104 " punpcklbw %%mm7, %%mm0 \n\t" /* mm0 = INT16(FiltPtr) */ \ |
|
105 " punpckhbw %%mm7, %%mm2 \n\t" /* mm2 = INT16(FiltPtr) */ \ |
|
106 /* start calculation */ \ |
|
107 " psubw %%mm1, %%mm0 \n\t" /* mm0 = FiltPtr - 128 */ \ |
|
108 " psubw %%mm1, %%mm2 \n\t" /* mm2 = FiltPtr - 128 */ \ |
|
109 " movq %%mm0, (%1) \n\t" /* write answer out */ \ |
|
110 " movq %%mm2, 8(%1) \n\t" /* write answer out */ \ |
|
111 /* Increment pointers */ \ |
|
112 " add $16, %1 \n\t" \ |
|
113 " add %2, %0 \n\t" |
|
114 |
|
115 LOOP |
|
116 LOOP |
|
117 LOOP |
|
118 LOOP |
|
119 LOOP |
|
120 LOOP |
|
121 LOOP |
|
122 LOOP |
|
123 #undef LOOP |
|
124 |
|
125 " emms \n\t" |
|
126 |
|
127 : "+r" (src1), |
|
128 "+r" (dest) |
|
129 : "r" (ss1), |
|
130 "r" (tmp) |
|
131 : "memory" |
|
132 ); |
|
133 #endif |
|
134 } |
|
135 OIL_DEFINE_IMPL_FULL (diff8x8_const128_s16_u8_mmx, diff8x8_const128_s16_u8, OIL_IMPL_FLAG_MMX); |
|
136 |
|
137 static void |
|
138 diff8x8_average_s16_u8_mmx (int16_t *dest, uint8_t *src1, int ss1, uint8_t *src2, int ss2, uint8_t *src3) |
|
139 { |
|
140 #if !defined(__WINSCW__) && !defined(__WINS__) |
|
141 __asm__ __volatile__ ( |
|
142 " pxor %%mm7, %%mm7 \n\t" |
|
143 |
|
144 #define LOOP \ |
|
145 " movq (%0), %%mm0 \n\t" /* mm0 = FiltPtr */ \ |
|
146 " movq (%1), %%mm1 \n\t" /* mm1 = ReconPtr1 */ \ |
|
147 " movq (%2), %%mm4 \n\t" /* mm1 = ReconPtr2 */ \ |
|
148 " movq %%mm0, %%mm2 \n\t" /* dup to prepare for up conversion */ \ |
|
149 " movq %%mm1, %%mm3 \n\t" /* dup to prepare for up conversion */ \ |
|
150 " movq %%mm4, %%mm5 \n\t" /* dup to prepare for up conversion */ \ |
|
151 /* convert from UINT8 to INT16 */ \ |
|
152 " punpcklbw %%mm7, %%mm0 \n\t" /* mm0 = INT16(FiltPtr) */ \ |
|
153 " punpcklbw %%mm7, %%mm1 \n\t" /* mm1 = INT16(ReconPtr1) */ \ |
|
154 " punpcklbw %%mm7, %%mm4 \n\t" /* mm1 = INT16(ReconPtr2) */ \ |
|
155 " punpckhbw %%mm7, %%mm2 \n\t" /* mm2 = INT16(FiltPtr) */ \ |
|
156 " punpckhbw %%mm7, %%mm3 \n\t" /* mm3 = INT16(ReconPtr1) */ \ |
|
157 " punpckhbw %%mm7, %%mm5 \n\t" /* mm3 = INT16(ReconPtr2) */ \ |
|
158 /* average ReconPtr1 and ReconPtr2 */ \ |
|
159 " paddw %%mm4, %%mm1 \n\t" /* mm1 = ReconPtr1 + ReconPtr2 */ \ |
|
160 " paddw %%mm5, %%mm3 \n\t" /* mm3 = ReconPtr1 + ReconPtr2 */ \ |
|
161 " psrlw $1, %%mm1 \n\t" /* mm1 = (ReconPtr1 + ReconPtr2) / 2 */ \ |
|
162 " psrlw $1, %%mm3 \n\t" /* mm3 = (ReconPtr1 + ReconPtr2) / 2 */ \ |
|
163 " psubw %%mm1, %%mm0 \n\t" /* mm0 = FiltPtr - ((ReconPtr1 + ReconPtr2) / 2) */ \ |
|
164 " psubw %%mm3, %%mm2 \n\t" /* mm2 = FiltPtr - ((ReconPtr1 + ReconPtr2) / 2) */ \ |
|
165 " movq %%mm0, (%3) \n\t" /* write answer out */ \ |
|
166 " movq %%mm2, 8(%3) \n\t" /* write answer out */ \ |
|
167 /* Increment pointers */ \ |
|
168 " add $16, %3 \n\t" \ |
|
169 " add %4, %0 \n\t" \ |
|
170 " add %5, %1 \n\t" \ |
|
171 " add %5, %2 \n\t" |
|
172 |
|
173 LOOP |
|
174 LOOP |
|
175 LOOP |
|
176 LOOP |
|
177 LOOP |
|
178 LOOP |
|
179 LOOP |
|
180 LOOP |
|
181 #undef LOOP |
|
182 |
|
183 " emms \n\t" |
|
184 |
|
185 : "+r" (src1), |
|
186 "+r" (src2), |
|
187 "+r" (src3), |
|
188 "+r" (dest) |
|
189 : "m" (ss1), |
|
190 "m" (ss2) |
|
191 : "memory" |
|
192 ); |
|
193 #endif |
|
194 } |
|
195 OIL_DEFINE_IMPL_FULL (diff8x8_average_s16_u8_mmx, diff8x8_average_s16_u8, OIL_IMPL_FLAG_MMX); |
|
196 |
|
197 |
|
198 |
|
199 #ifdef __SYMBIAN32__ |
|
200 |
|
201 OilFunctionImpl* __oil_function_impl_diff8x8_s16_u8_mmx, diff8x8_s16_u8() { |
|
202 return &_oil_function_impl_diff8x8_s16_u8_mmx, diff8x8_s16_u8; |
|
203 } |
|
204 #endif |
|
205 |
|
206 #ifdef __SYMBIAN32__ |
|
207 |
|
208 OilFunctionImpl* __oil_function_impl_diff8x8_const128_s16_u8_mmx, diff8x8_const128_s16_u8() { |
|
209 return &_oil_function_impl_diff8x8_const128_s16_u8_mmx, diff8x8_const128_s16_u8; |
|
210 } |
|
211 #endif |
|
212 |
|
213 #ifdef __SYMBIAN32__ |
|
214 |
|
215 OilFunctionImpl* __oil_function_impl_diff8x8_average_s16_u8_mmx, diff8x8_average_s16_u8() { |
|
216 return &_oil_function_impl_diff8x8_average_s16_u8_mmx, diff8x8_average_s16_u8; |
|
217 } |
|
218 #endif |
|
219 |