|
1 /* |
|
2 * MMX/3DNow!/SSE/SSE2/SSE3/SSSE3/SSE4/PNI support |
|
3 * |
|
4 * Copyright (c) 2005 Fabrice Bellard |
|
5 * |
|
6 * This library is free software; you can redistribute it and/or |
|
7 * modify it under the terms of the GNU Lesser General Public |
|
8 * License as published by the Free Software Foundation; either |
|
9 * version 2 of the License, or (at your option) any later version. |
|
10 * |
|
11 * This library is distributed in the hope that it will be useful, |
|
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of |
|
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
|
14 * Lesser General Public License for more details. |
|
15 * |
|
16 * You should have received a copy of the GNU Lesser General Public |
|
17 * License along with this library; if not, write to the Free Software |
|
18 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA |
|
19 */ |
|
20 #if SHIFT == 0 |
|
21 #define Reg MMXReg |
|
22 #define SUFFIX _mmx |
|
23 #else |
|
24 #define Reg XMMReg |
|
25 #define SUFFIX _xmm |
|
26 #endif |
|
27 |
|
28 #define dh_alias_Reg ptr |
|
29 #define dh_alias_XMMReg ptr |
|
30 #define dh_alias_MMXReg ptr |
|
31 #define dh_ctype_Reg Reg * |
|
32 #define dh_ctype_XMMReg XMMReg * |
|
33 #define dh_ctype_MMXReg MMXReg * |
|
34 |
|
35 DEF_HELPER_2(glue(psrlw, SUFFIX), void, Reg, Reg) |
|
36 DEF_HELPER_2(glue(psraw, SUFFIX), void, Reg, Reg) |
|
37 DEF_HELPER_2(glue(psllw, SUFFIX), void, Reg, Reg) |
|
38 DEF_HELPER_2(glue(psrld, SUFFIX), void, Reg, Reg) |
|
39 DEF_HELPER_2(glue(psrad, SUFFIX), void, Reg, Reg) |
|
40 DEF_HELPER_2(glue(pslld, SUFFIX), void, Reg, Reg) |
|
41 DEF_HELPER_2(glue(psrlq, SUFFIX), void, Reg, Reg) |
|
42 DEF_HELPER_2(glue(psllq, SUFFIX), void, Reg, Reg) |
|
43 |
|
44 #if SHIFT == 1 |
|
45 DEF_HELPER_2(glue(psrldq, SUFFIX), void, Reg, Reg) |
|
46 DEF_HELPER_2(glue(pslldq, SUFFIX), void, Reg, Reg) |
|
47 #endif |
|
48 |
|
49 #define SSE_HELPER_B(name, F)\ |
|
50 DEF_HELPER_2(glue(name, SUFFIX), void, Reg, Reg) |
|
51 |
|
52 #define SSE_HELPER_W(name, F)\ |
|
53 DEF_HELPER_2(glue(name, SUFFIX), void, Reg, Reg) |
|
54 |
|
55 #define SSE_HELPER_L(name, F)\ |
|
56 DEF_HELPER_2(glue(name, SUFFIX), void, Reg, Reg) |
|
57 |
|
58 #define SSE_HELPER_Q(name, F)\ |
|
59 DEF_HELPER_2(glue(name, SUFFIX), void, Reg, Reg) |
|
60 |
|
61 SSE_HELPER_B(paddb, FADD) |
|
62 SSE_HELPER_W(paddw, FADD) |
|
63 SSE_HELPER_L(paddl, FADD) |
|
64 SSE_HELPER_Q(paddq, FADD) |
|
65 |
|
66 SSE_HELPER_B(psubb, FSUB) |
|
67 SSE_HELPER_W(psubw, FSUB) |
|
68 SSE_HELPER_L(psubl, FSUB) |
|
69 SSE_HELPER_Q(psubq, FSUB) |
|
70 |
|
71 SSE_HELPER_B(paddusb, FADDUB) |
|
72 SSE_HELPER_B(paddsb, FADDSB) |
|
73 SSE_HELPER_B(psubusb, FSUBUB) |
|
74 SSE_HELPER_B(psubsb, FSUBSB) |
|
75 |
|
76 SSE_HELPER_W(paddusw, FADDUW) |
|
77 SSE_HELPER_W(paddsw, FADDSW) |
|
78 SSE_HELPER_W(psubusw, FSUBUW) |
|
79 SSE_HELPER_W(psubsw, FSUBSW) |
|
80 |
|
81 SSE_HELPER_B(pminub, FMINUB) |
|
82 SSE_HELPER_B(pmaxub, FMAXUB) |
|
83 |
|
84 SSE_HELPER_W(pminsw, FMINSW) |
|
85 SSE_HELPER_W(pmaxsw, FMAXSW) |
|
86 |
|
87 SSE_HELPER_Q(pand, FAND) |
|
88 SSE_HELPER_Q(pandn, FANDN) |
|
89 SSE_HELPER_Q(por, FOR) |
|
90 SSE_HELPER_Q(pxor, FXOR) |
|
91 |
|
92 SSE_HELPER_B(pcmpgtb, FCMPGTB) |
|
93 SSE_HELPER_W(pcmpgtw, FCMPGTW) |
|
94 SSE_HELPER_L(pcmpgtl, FCMPGTL) |
|
95 |
|
96 SSE_HELPER_B(pcmpeqb, FCMPEQ) |
|
97 SSE_HELPER_W(pcmpeqw, FCMPEQ) |
|
98 SSE_HELPER_L(pcmpeql, FCMPEQ) |
|
99 |
|
100 SSE_HELPER_W(pmullw, FMULLW) |
|
101 #if SHIFT == 0 |
|
102 SSE_HELPER_W(pmulhrw, FMULHRW) |
|
103 #endif |
|
104 SSE_HELPER_W(pmulhuw, FMULHUW) |
|
105 SSE_HELPER_W(pmulhw, FMULHW) |
|
106 |
|
107 SSE_HELPER_B(pavgb, FAVG) |
|
108 SSE_HELPER_W(pavgw, FAVG) |
|
109 |
|
110 DEF_HELPER_2(glue(pmuludq, SUFFIX), void, Reg, Reg) |
|
111 DEF_HELPER_2(glue(pmaddwd, SUFFIX), void, Reg, Reg) |
|
112 |
|
113 DEF_HELPER_2(glue(psadbw, SUFFIX), void, Reg, Reg) |
|
114 DEF_HELPER_3(glue(maskmov, SUFFIX), void, Reg, Reg, tl) |
|
115 DEF_HELPER_2(glue(movl_mm_T0, SUFFIX), void, Reg, i32) |
|
116 #ifdef TARGET_X86_64 |
|
117 DEF_HELPER_2(glue(movq_mm_T0, SUFFIX), void, Reg, i64) |
|
118 #endif |
|
119 |
|
120 #if SHIFT == 0 |
|
121 DEF_HELPER_3(glue(pshufw, SUFFIX), void, Reg, Reg, int) |
|
122 #else |
|
123 DEF_HELPER_3(shufps, void, Reg, Reg, int) |
|
124 DEF_HELPER_3(shufpd, void, Reg, Reg, int) |
|
125 DEF_HELPER_3(glue(pshufd, SUFFIX), void, Reg, Reg, int) |
|
126 DEF_HELPER_3(glue(pshuflw, SUFFIX), void, Reg, Reg, int) |
|
127 DEF_HELPER_3(glue(pshufhw, SUFFIX), void, Reg, Reg, int) |
|
128 #endif |
|
129 |
|
130 #if SHIFT == 1 |
|
131 /* FPU ops */ |
|
132 /* XXX: not accurate */ |
|
133 |
|
134 #define SSE_HELPER_S(name, F)\ |
|
135 DEF_HELPER_2(name ## ps , void, Reg, Reg) \ |
|
136 DEF_HELPER_2(name ## ss , void, Reg, Reg) \ |
|
137 DEF_HELPER_2(name ## pd , void, Reg, Reg) \ |
|
138 DEF_HELPER_2(name ## sd , void, Reg, Reg) |
|
139 |
|
140 SSE_HELPER_S(add, FPU_ADD) |
|
141 SSE_HELPER_S(sub, FPU_SUB) |
|
142 SSE_HELPER_S(mul, FPU_MUL) |
|
143 SSE_HELPER_S(div, FPU_DIV) |
|
144 SSE_HELPER_S(min, FPU_MIN) |
|
145 SSE_HELPER_S(max, FPU_MAX) |
|
146 SSE_HELPER_S(sqrt, FPU_SQRT) |
|
147 |
|
148 |
|
149 DEF_HELPER_2(cvtps2pd, void, Reg, Reg) |
|
150 DEF_HELPER_2(cvtpd2ps, void, Reg, Reg) |
|
151 DEF_HELPER_2(cvtss2sd, void, Reg, Reg) |
|
152 DEF_HELPER_2(cvtsd2ss, void, Reg, Reg) |
|
153 DEF_HELPER_2(cvtdq2ps, void, Reg, Reg) |
|
154 DEF_HELPER_2(cvtdq2pd, void, Reg, Reg) |
|
155 DEF_HELPER_2(cvtpi2ps, void, XMMReg, MMXReg) |
|
156 DEF_HELPER_2(cvtpi2pd, void, XMMReg, MMXReg) |
|
157 DEF_HELPER_2(cvtsi2ss, void, XMMReg, i32) |
|
158 DEF_HELPER_2(cvtsi2sd, void, XMMReg, i32) |
|
159 |
|
160 #ifdef TARGET_X86_64 |
|
161 DEF_HELPER_2(cvtsq2ss, void, XMMReg, i64) |
|
162 DEF_HELPER_2(cvtsq2sd, void, XMMReg, i64) |
|
163 #endif |
|
164 |
|
165 DEF_HELPER_2(cvtps2dq, void, XMMReg, XMMReg) |
|
166 DEF_HELPER_2(cvtpd2dq, void, XMMReg, XMMReg) |
|
167 DEF_HELPER_2(cvtps2pi, void, MMXReg, XMMReg) |
|
168 DEF_HELPER_2(cvtpd2pi, void, MMXReg, XMMReg) |
|
169 DEF_HELPER_1(cvtss2si, s32, XMMReg) |
|
170 DEF_HELPER_1(cvtsd2si, s32, XMMReg) |
|
171 #ifdef TARGET_X86_64 |
|
172 DEF_HELPER_1(cvtss2sq, s64, XMMReg) |
|
173 DEF_HELPER_1(cvtsd2sq, s64, XMMReg) |
|
174 #endif |
|
175 |
|
176 DEF_HELPER_2(cvttps2dq, void, XMMReg, XMMReg) |
|
177 DEF_HELPER_2(cvttpd2dq, void, XMMReg, XMMReg) |
|
178 DEF_HELPER_2(cvttps2pi, void, MMXReg, XMMReg) |
|
179 DEF_HELPER_2(cvttpd2pi, void, MMXReg, XMMReg) |
|
180 DEF_HELPER_1(cvttss2si, s32, XMMReg) |
|
181 DEF_HELPER_1(cvttsd2si, s32, XMMReg) |
|
182 #ifdef TARGET_X86_64 |
|
183 DEF_HELPER_1(cvttss2sq, s64, XMMReg) |
|
184 DEF_HELPER_1(cvttsd2sq, s64, XMMReg) |
|
185 #endif |
|
186 |
|
187 DEF_HELPER_2(rsqrtps, void, XMMReg, XMMReg) |
|
188 DEF_HELPER_2(rsqrtss, void, XMMReg, XMMReg) |
|
189 DEF_HELPER_2(rcpps, void, XMMReg, XMMReg) |
|
190 DEF_HELPER_2(rcpss, void, XMMReg, XMMReg) |
|
191 DEF_HELPER_2(haddps, void, XMMReg, XMMReg) |
|
192 DEF_HELPER_2(haddpd, void, XMMReg, XMMReg) |
|
193 DEF_HELPER_2(hsubps, void, XMMReg, XMMReg) |
|
194 DEF_HELPER_2(hsubpd, void, XMMReg, XMMReg) |
|
195 DEF_HELPER_2(addsubps, void, XMMReg, XMMReg) |
|
196 DEF_HELPER_2(addsubpd, void, XMMReg, XMMReg) |
|
197 |
|
198 #define SSE_HELPER_CMP(name, F)\ |
|
199 DEF_HELPER_2( name ## ps , void, Reg, Reg) \ |
|
200 DEF_HELPER_2( name ## ss , void, Reg, Reg) \ |
|
201 DEF_HELPER_2( name ## pd , void, Reg, Reg) \ |
|
202 DEF_HELPER_2( name ## sd , void, Reg, Reg) |
|
203 |
|
204 SSE_HELPER_CMP(cmpeq, FPU_CMPEQ) |
|
205 SSE_HELPER_CMP(cmplt, FPU_CMPLT) |
|
206 SSE_HELPER_CMP(cmple, FPU_CMPLE) |
|
207 SSE_HELPER_CMP(cmpunord, FPU_CMPUNORD) |
|
208 SSE_HELPER_CMP(cmpneq, FPU_CMPNEQ) |
|
209 SSE_HELPER_CMP(cmpnlt, FPU_CMPNLT) |
|
210 SSE_HELPER_CMP(cmpnle, FPU_CMPNLE) |
|
211 SSE_HELPER_CMP(cmpord, FPU_CMPORD) |
|
212 |
|
213 DEF_HELPER_2(ucomiss, void, Reg, Reg) |
|
214 DEF_HELPER_2(comiss, void, Reg, Reg) |
|
215 DEF_HELPER_2(ucomisd, void, Reg, Reg) |
|
216 DEF_HELPER_2(comisd, void, Reg, Reg) |
|
217 DEF_HELPER_1(movmskps, i32, Reg) |
|
218 DEF_HELPER_1(movmskpd, i32, Reg) |
|
219 #endif |
|
220 |
|
221 DEF_HELPER_1(glue(pmovmskb, SUFFIX), i32, Reg) |
|
222 DEF_HELPER_2(glue(packsswb, SUFFIX), void, Reg, Reg) |
|
223 DEF_HELPER_2(glue(packuswb, SUFFIX), void, Reg, Reg) |
|
224 DEF_HELPER_2(glue(packssdw, SUFFIX), void, Reg, Reg) |
|
225 #define UNPCK_OP(base_name, base) \ |
|
226 DEF_HELPER_2(glue(punpck ## base_name ## bw, SUFFIX) , void, Reg, Reg) \ |
|
227 DEF_HELPER_2(glue(punpck ## base_name ## wd, SUFFIX) , void, Reg, Reg) \ |
|
228 DEF_HELPER_2(glue(punpck ## base_name ## dq, SUFFIX) , void, Reg, Reg) |
|
229 |
|
230 UNPCK_OP(l, 0) |
|
231 UNPCK_OP(h, 1) |
|
232 |
|
233 #if SHIFT == 1 |
|
234 DEF_HELPER_2(glue(punpcklqdq, SUFFIX), void, Reg, Reg) |
|
235 DEF_HELPER_2(glue(punpckhqdq, SUFFIX), void, Reg, Reg) |
|
236 #endif |
|
237 |
|
238 /* 3DNow! float ops */ |
|
239 #if SHIFT == 0 |
|
240 DEF_HELPER_2(pi2fd, void, MMXReg, MMXReg) |
|
241 DEF_HELPER_2(pi2fw, void, MMXReg, MMXReg) |
|
242 DEF_HELPER_2(pf2id, void, MMXReg, MMXReg) |
|
243 DEF_HELPER_2(pf2iw, void, MMXReg, MMXReg) |
|
244 DEF_HELPER_2(pfacc, void, MMXReg, MMXReg) |
|
245 DEF_HELPER_2(pfadd, void, MMXReg, MMXReg) |
|
246 DEF_HELPER_2(pfcmpeq, void, MMXReg, MMXReg) |
|
247 DEF_HELPER_2(pfcmpge, void, MMXReg, MMXReg) |
|
248 DEF_HELPER_2(pfcmpgt, void, MMXReg, MMXReg) |
|
249 DEF_HELPER_2(pfmax, void, MMXReg, MMXReg) |
|
250 DEF_HELPER_2(pfmin, void, MMXReg, MMXReg) |
|
251 DEF_HELPER_2(pfmul, void, MMXReg, MMXReg) |
|
252 DEF_HELPER_2(pfnacc, void, MMXReg, MMXReg) |
|
253 DEF_HELPER_2(pfpnacc, void, MMXReg, MMXReg) |
|
254 DEF_HELPER_2(pfrcp, void, MMXReg, MMXReg) |
|
255 DEF_HELPER_2(pfrsqrt, void, MMXReg, MMXReg) |
|
256 DEF_HELPER_2(pfsub, void, MMXReg, MMXReg) |
|
257 DEF_HELPER_2(pfsubr, void, MMXReg, MMXReg) |
|
258 DEF_HELPER_2(pswapd, void, MMXReg, MMXReg) |
|
259 #endif |
|
260 |
|
261 /* SSSE3 op helpers */ |
|
262 DEF_HELPER_2(glue(phaddw, SUFFIX), void, Reg, Reg) |
|
263 DEF_HELPER_2(glue(phaddd, SUFFIX), void, Reg, Reg) |
|
264 DEF_HELPER_2(glue(phaddsw, SUFFIX), void, Reg, Reg) |
|
265 DEF_HELPER_2(glue(phsubw, SUFFIX), void, Reg, Reg) |
|
266 DEF_HELPER_2(glue(phsubd, SUFFIX), void, Reg, Reg) |
|
267 DEF_HELPER_2(glue(phsubsw, SUFFIX), void, Reg, Reg) |
|
268 DEF_HELPER_2(glue(pabsb, SUFFIX), void, Reg, Reg) |
|
269 DEF_HELPER_2(glue(pabsw, SUFFIX), void, Reg, Reg) |
|
270 DEF_HELPER_2(glue(pabsd, SUFFIX), void, Reg, Reg) |
|
271 DEF_HELPER_2(glue(pmaddubsw, SUFFIX), void, Reg, Reg) |
|
272 DEF_HELPER_2(glue(pmulhrsw, SUFFIX), void, Reg, Reg) |
|
273 DEF_HELPER_2(glue(pshufb, SUFFIX), void, Reg, Reg) |
|
274 DEF_HELPER_2(glue(psignb, SUFFIX), void, Reg, Reg) |
|
275 DEF_HELPER_2(glue(psignw, SUFFIX), void, Reg, Reg) |
|
276 DEF_HELPER_2(glue(psignd, SUFFIX), void, Reg, Reg) |
|
277 DEF_HELPER_3(glue(palignr, SUFFIX), void, Reg, Reg, s32) |
|
278 |
|
279 /* SSE4.1 op helpers */ |
|
280 #if SHIFT == 1 |
|
281 DEF_HELPER_2(glue(pblendvb, SUFFIX), void, Reg, Reg) |
|
282 DEF_HELPER_2(glue(blendvps, SUFFIX), void, Reg, Reg) |
|
283 DEF_HELPER_2(glue(blendvpd, SUFFIX), void, Reg, Reg) |
|
284 DEF_HELPER_2(glue(ptest, SUFFIX), void, Reg, Reg) |
|
285 DEF_HELPER_2(glue(pmovsxbw, SUFFIX), void, Reg, Reg) |
|
286 DEF_HELPER_2(glue(pmovsxbd, SUFFIX), void, Reg, Reg) |
|
287 DEF_HELPER_2(glue(pmovsxbq, SUFFIX), void, Reg, Reg) |
|
288 DEF_HELPER_2(glue(pmovsxwd, SUFFIX), void, Reg, Reg) |
|
289 DEF_HELPER_2(glue(pmovsxwq, SUFFIX), void, Reg, Reg) |
|
290 DEF_HELPER_2(glue(pmovsxdq, SUFFIX), void, Reg, Reg) |
|
291 DEF_HELPER_2(glue(pmovzxbw, SUFFIX), void, Reg, Reg) |
|
292 DEF_HELPER_2(glue(pmovzxbd, SUFFIX), void, Reg, Reg) |
|
293 DEF_HELPER_2(glue(pmovzxbq, SUFFIX), void, Reg, Reg) |
|
294 DEF_HELPER_2(glue(pmovzxwd, SUFFIX), void, Reg, Reg) |
|
295 DEF_HELPER_2(glue(pmovzxwq, SUFFIX), void, Reg, Reg) |
|
296 DEF_HELPER_2(glue(pmovzxdq, SUFFIX), void, Reg, Reg) |
|
297 DEF_HELPER_2(glue(pmuldq, SUFFIX), void, Reg, Reg) |
|
298 DEF_HELPER_2(glue(pcmpeqq, SUFFIX), void, Reg, Reg) |
|
299 DEF_HELPER_2(glue(packusdw, SUFFIX), void, Reg, Reg) |
|
300 DEF_HELPER_2(glue(pminsb, SUFFIX), void, Reg, Reg) |
|
301 DEF_HELPER_2(glue(pminsd, SUFFIX), void, Reg, Reg) |
|
302 DEF_HELPER_2(glue(pminuw, SUFFIX), void, Reg, Reg) |
|
303 DEF_HELPER_2(glue(pminud, SUFFIX), void, Reg, Reg) |
|
304 DEF_HELPER_2(glue(pmaxsb, SUFFIX), void, Reg, Reg) |
|
305 DEF_HELPER_2(glue(pmaxsd, SUFFIX), void, Reg, Reg) |
|
306 DEF_HELPER_2(glue(pmaxuw, SUFFIX), void, Reg, Reg) |
|
307 DEF_HELPER_2(glue(pmaxud, SUFFIX), void, Reg, Reg) |
|
308 DEF_HELPER_2(glue(pmulld, SUFFIX), void, Reg, Reg) |
|
309 DEF_HELPER_2(glue(phminposuw, SUFFIX), void, Reg, Reg) |
|
310 DEF_HELPER_3(glue(roundps, SUFFIX), void, Reg, Reg, i32) |
|
311 DEF_HELPER_3(glue(roundpd, SUFFIX), void, Reg, Reg, i32) |
|
312 DEF_HELPER_3(glue(roundss, SUFFIX), void, Reg, Reg, i32) |
|
313 DEF_HELPER_3(glue(roundsd, SUFFIX), void, Reg, Reg, i32) |
|
314 DEF_HELPER_3(glue(blendps, SUFFIX), void, Reg, Reg, i32) |
|
315 DEF_HELPER_3(glue(blendpd, SUFFIX), void, Reg, Reg, i32) |
|
316 DEF_HELPER_3(glue(pblendw, SUFFIX), void, Reg, Reg, i32) |
|
317 DEF_HELPER_3(glue(dpps, SUFFIX), void, Reg, Reg, i32) |
|
318 DEF_HELPER_3(glue(dppd, SUFFIX), void, Reg, Reg, i32) |
|
319 DEF_HELPER_3(glue(mpsadbw, SUFFIX), void, Reg, Reg, i32) |
|
320 #endif |
|
321 |
|
322 /* SSE4.2 op helpers */ |
|
323 #if SHIFT == 1 |
|
324 DEF_HELPER_2(glue(pcmpgtq, SUFFIX), void, Reg, Reg) |
|
325 DEF_HELPER_3(glue(pcmpestri, SUFFIX), void, Reg, Reg, i32) |
|
326 DEF_HELPER_3(glue(pcmpestrm, SUFFIX), void, Reg, Reg, i32) |
|
327 DEF_HELPER_3(glue(pcmpistri, SUFFIX), void, Reg, Reg, i32) |
|
328 DEF_HELPER_3(glue(pcmpistrm, SUFFIX), void, Reg, Reg, i32) |
|
329 DEF_HELPER_3(crc32, tl, i32, tl, i32) |
|
330 DEF_HELPER_2(popcnt, tl, tl, i32) |
|
331 #endif |
|
332 |
|
333 #undef SHIFT |
|
334 #undef Reg |
|
335 #undef SUFFIX |
|
336 |
|
337 #undef SSE_HELPER_B |
|
338 #undef SSE_HELPER_W |
|
339 #undef SSE_HELPER_L |
|
340 #undef SSE_HELPER_Q |
|
341 #undef SSE_HELPER_S |
|
342 #undef SSE_HELPER_CMP |
|
343 #undef UNPCK_OP |