|
1 /**************************************************************************** |
|
2 ** |
|
3 ** Copyright (C) 2009 Nokia Corporation and/or its subsidiary(-ies). |
|
4 ** All rights reserved. |
|
5 ** Contact: Nokia Corporation (qt-info@nokia.com) |
|
6 ** |
|
7 ** This file is part of the QtGui module of the Qt Toolkit. |
|
8 ** |
|
9 ** $QT_BEGIN_LICENSE:LGPL$ |
|
10 ** No Commercial Usage |
|
11 ** This file contains pre-release code and may not be distributed. |
|
12 ** You may use this file in accordance with the terms and conditions |
|
13 ** contained in the Technology Preview License Agreement accompanying |
|
14 ** this package. |
|
15 ** |
|
16 ** GNU Lesser General Public License Usage |
|
17 ** Alternatively, this file may be used under the terms of the GNU Lesser |
|
18 ** General Public License version 2.1 as published by the Free Software |
|
19 ** Foundation and appearing in the file LICENSE.LGPL included in the |
|
20 ** packaging of this file. Please review the following information to |
|
21 ** ensure the GNU Lesser General Public License version 2.1 requirements |
|
22 ** will be met: http://www.gnu.org/licenses/old-licenses/lgpl-2.1.html. |
|
23 ** |
|
24 ** In addition, as a special exception, Nokia gives you certain additional |
|
25 ** rights. These rights are described in the Nokia Qt LGPL Exception |
|
26 ** version 1.1, included in the file LGPL_EXCEPTION.txt in this package. |
|
27 ** |
|
28 ** If you have questions regarding the use of this file, please contact |
|
29 ** Nokia at qt-info@nokia.com. |
|
30 ** |
|
31 ** |
|
32 ** |
|
33 ** |
|
34 ** |
|
35 ** |
|
36 ** |
|
37 ** |
|
38 ** $QT_END_LICENSE$ |
|
39 ** |
|
40 ****************************************************************************/ |
|
41 |
|
42 #ifndef QDRAWHELPER_MMX_P_H |
|
43 #define QDRAWHELPER_MMX_P_H |
|
44 |
|
45 // |
|
46 // W A R N I N G |
|
47 // ------------- |
|
48 // |
|
49 // This file is not part of the Qt API. It exists purely as an |
|
50 // implementation detail. This header file may change from version to |
|
51 // version without notice, or even be removed. |
|
52 // |
|
53 // We mean it. |
|
54 // |
|
55 |
|
56 #include <private/qdrawhelper_p.h> |
|
57 #include <private/qdrawhelper_x86_p.h> |
|
58 #include <private/qpaintengine_raster_p.h> |
|
59 |
|
60 #ifdef QT_HAVE_MMX |
|
61 #include <mmintrin.h> |
|
62 #endif |
|
63 |
|
64 #define C_FF const m64 mmx_0x00ff = _mm_set1_pi16(0xff) |
|
65 #define C_80 const m64 mmx_0x0080 = _mm_set1_pi16(0x80) |
|
66 #define C_00 const m64 mmx_0x0000 = _mm_setzero_si64() |
|
67 |
|
68 #ifdef Q_CC_MSVC |
|
69 # pragma warning(disable: 4799) // No EMMS at end of function |
|
70 #endif |
|
71 |
|
72 typedef __m64 m64; |
|
73 |
|
74 QT_BEGIN_NAMESPACE |
|
75 |
|
76 struct QMMXCommonIntrinsics |
|
77 { |
|
78 static inline m64 alpha(m64 x) { |
|
79 x = _mm_unpackhi_pi16(x, x); |
|
80 x = _mm_unpackhi_pi16(x, x); |
|
81 return x; |
|
82 } |
|
83 |
|
84 static inline m64 _negate(const m64 &x, const m64 &mmx_0x00ff) { |
|
85 return _mm_xor_si64(x, mmx_0x00ff); |
|
86 } |
|
87 |
|
88 static inline m64 add(const m64 &a, const m64 &b) { |
|
89 return _mm_adds_pu16 (a, b); |
|
90 } |
|
91 |
|
92 static inline m64 _byte_mul(const m64 &a, const m64 &b, |
|
93 const m64 &mmx_0x0080) |
|
94 { |
|
95 m64 res = _mm_mullo_pi16(a, b); |
|
96 res = _mm_adds_pu16(res, mmx_0x0080); |
|
97 res = _mm_adds_pu16(res, _mm_srli_pi16 (res, 8)); |
|
98 return _mm_srli_pi16(res, 8); |
|
99 } |
|
100 |
|
101 static inline m64 interpolate_pixel_256(const m64 &x, const m64 &a, |
|
102 const m64 &y, const m64 &b) |
|
103 { |
|
104 m64 res = _mm_adds_pu16(_mm_mullo_pi16(x, a), _mm_mullo_pi16(y, b)); |
|
105 return _mm_srli_pi16(res, 8); |
|
106 } |
|
107 |
|
108 static inline m64 _interpolate_pixel_255(const m64 &x, const m64 &a, |
|
109 const m64 &y, const m64 &b, |
|
110 const m64 &mmx_0x0080) |
|
111 { |
|
112 m64 res = _mm_adds_pu16(_mm_mullo_pi16(x, a), _mm_mullo_pi16(y, b)); |
|
113 res = _mm_adds_pu16(res, mmx_0x0080); |
|
114 res = _mm_adds_pu16(res, _mm_srli_pi16 (res, 8)); |
|
115 return _mm_srli_pi16(res, 8); |
|
116 } |
|
117 |
|
118 static inline m64 _premul(m64 x, const m64 &mmx_0x0080) { |
|
119 m64 a = alpha(x); |
|
120 return _byte_mul(x, a, mmx_0x0080); |
|
121 } |
|
122 |
|
123 static inline m64 _load(uint x, const m64 &mmx_0x0000) { |
|
124 return _mm_unpacklo_pi8(_mm_cvtsi32_si64(x), mmx_0x0000); |
|
125 } |
|
126 |
|
127 static inline m64 _load_alpha(uint x, const m64 &) { |
|
128 x |= (x << 16); |
|
129 return _mm_set1_pi32(x); |
|
130 } |
|
131 |
|
132 static inline uint _store(const m64 &x, const m64 &mmx_0x0000) { |
|
133 return _mm_cvtsi64_si32(_mm_packs_pu16(x, mmx_0x0000)); |
|
134 } |
|
135 }; |
|
136 |
|
137 #define negate(x) _negate(x, mmx_0x00ff) |
|
138 #define byte_mul(a, b) _byte_mul(a, b, mmx_0x0080) |
|
139 #define interpolate_pixel_255(x, a, y, b) _interpolate_pixel_255(x, a, y, b, mmx_0x0080) |
|
140 #define premul(x) _premul(x, mmx_0x0080) |
|
141 #define load(x) _load(x, mmx_0x0000) |
|
142 #define load_alpha(x) _load_alpha(x, mmx_0x0000) |
|
143 #define store(x) _store(x, mmx_0x0000) |
|
144 |
|
145 /* |
|
146 result = 0 |
|
147 d = d * cia |
|
148 */ |
|
149 template <class MM> |
|
150 static void QT_FASTCALL comp_func_solid_Clear(uint *dest, int length, uint, uint const_alpha) |
|
151 { |
|
152 if (!length) |
|
153 return; |
|
154 |
|
155 if (const_alpha == 255) { |
|
156 qt_memfill(static_cast<quint32*>(dest), quint32(0), length); |
|
157 } else { |
|
158 C_FF; C_80; C_00; |
|
159 m64 ia = MM::negate(MM::load_alpha(const_alpha)); |
|
160 for (int i = 0; i < length; ++i) { |
|
161 dest[i] = MM::store(MM::byte_mul(MM::load(dest[i]), ia)); |
|
162 } |
|
163 } |
|
164 MM::end(); |
|
165 } |
|
166 |
|
167 template <class MM> |
|
168 static void QT_FASTCALL comp_func_Clear(uint *dest, const uint *, int length, uint const_alpha) |
|
169 { |
|
170 if (const_alpha == 255) { |
|
171 qt_memfill(static_cast<quint32*>(dest), quint32(0), length); |
|
172 } else { |
|
173 C_FF; C_80; C_00; |
|
174 m64 ia = MM::negate(MM::load_alpha(const_alpha)); |
|
175 for (int i = 0; i < length; ++i) |
|
176 dest[i] = MM::store(MM::byte_mul(MM::load(dest[i]), ia)); |
|
177 } |
|
178 MM::end(); |
|
179 } |
|
180 |
|
181 /* |
|
182 result = s |
|
183 dest = s * ca + d * cia |
|
184 */ |
|
185 template <class MM> |
|
186 static void QT_FASTCALL comp_func_solid_Source(uint *dest, int length, uint src, uint const_alpha) |
|
187 { |
|
188 if (const_alpha == 255) { |
|
189 qt_memfill(static_cast<quint32*>(dest), quint32(src), length); |
|
190 } else { |
|
191 C_FF; C_80; C_00; |
|
192 const m64 a = MM::load_alpha(const_alpha); |
|
193 const m64 ia = MM::negate(a); |
|
194 const m64 s = MM::byte_mul(MM::load(src), a); |
|
195 for (int i = 0; i < length; ++i) { |
|
196 dest[i] = MM::store(MM::add(s, MM::byte_mul(MM::load(dest[i]), ia))); |
|
197 } |
|
198 MM::end(); |
|
199 } |
|
200 } |
|
201 |
|
202 template <class MM> |
|
203 static void QT_FASTCALL comp_func_Source(uint *dest, const uint *src, int length, uint const_alpha) |
|
204 { |
|
205 if (const_alpha == 255) { |
|
206 ::memcpy(dest, src, length * sizeof(uint)); |
|
207 } else { |
|
208 C_FF; C_80; C_00; |
|
209 const m64 a = MM::load_alpha(const_alpha); |
|
210 const m64 ia = MM::negate(a); |
|
211 for (int i = 0; i < length; ++i) |
|
212 dest[i] = MM::store(MM::interpolate_pixel_255(MM::load(src[i]), a, |
|
213 MM::load(dest[i]), ia)); |
|
214 } |
|
215 MM::end(); |
|
216 } |
|
217 |
|
218 /* |
|
219 result = s + d * sia |
|
220 dest = (s + d * sia) * ca + d * cia |
|
221 = s * ca + d * (sia * ca + cia) |
|
222 = s * ca + d * (1 - sa*ca) |
|
223 */ |
|
224 template <class MM> |
|
225 static void QT_FASTCALL comp_func_solid_SourceOver(uint *dest, int length, uint src, uint const_alpha) |
|
226 { |
|
227 if ((const_alpha & qAlpha(src)) == 255) { |
|
228 qt_memfill(static_cast<quint32*>(dest), quint32(src), length); |
|
229 } else { |
|
230 C_FF; C_80; C_00; |
|
231 m64 s = MM::load(src); |
|
232 if (const_alpha != 255) { |
|
233 m64 ca = MM::load_alpha(const_alpha); |
|
234 s = MM::byte_mul(s, ca); |
|
235 } |
|
236 m64 a = MM::negate(MM::alpha(s)); |
|
237 for (int i = 0; i < length; ++i) |
|
238 dest[i] = MM::store(MM::add(s, MM::byte_mul(MM::load(dest[i]), a))); |
|
239 MM::end(); |
|
240 } |
|
241 } |
|
242 |
|
243 template <class MM> |
|
244 static void QT_FASTCALL comp_func_SourceOver(uint *dest, const uint *src, int length, uint const_alpha) |
|
245 { |
|
246 C_FF; C_80; C_00; |
|
247 if (const_alpha == 255) { |
|
248 for (int i = 0; i < length; ++i) { |
|
249 if ((0xff000000 & src[i]) == 0xff000000) { |
|
250 dest[i] = src[i]; |
|
251 } else { |
|
252 m64 s = MM::load(src[i]); |
|
253 m64 ia = MM::negate(MM::alpha(s)); |
|
254 dest[i] = MM::store(MM::add(s, MM::byte_mul(MM::load(dest[i]), ia))); |
|
255 } |
|
256 } |
|
257 } else { |
|
258 m64 ca = MM::load_alpha(const_alpha); |
|
259 for (int i = 0; i < length; ++i) { |
|
260 m64 s = MM::byte_mul(MM::load(src[i]), ca); |
|
261 m64 ia = MM::negate(MM::alpha(s)); |
|
262 dest[i] = MM::store(MM::add(s, MM::byte_mul(MM::load(dest[i]), ia))); |
|
263 } |
|
264 } |
|
265 MM::end(); |
|
266 } |
|
267 |
|
268 /* |
|
269 result = d + s * dia |
|
270 dest = (d + s * dia) * ca + d * cia |
|
271 = d + s * dia * ca |
|
272 */ |
|
273 template <class MM> |
|
274 static void QT_FASTCALL comp_func_solid_DestinationOver(uint *dest, int length, uint src, uint const_alpha) |
|
275 { |
|
276 C_FF; C_80; C_00; |
|
277 m64 s = MM::load(src); |
|
278 if (const_alpha != 255) |
|
279 s = MM::byte_mul(s, MM::load_alpha(const_alpha)); |
|
280 |
|
281 for (int i = 0; i < length; ++i) { |
|
282 m64 d = MM::load(dest[i]); |
|
283 m64 dia = MM::negate(MM::alpha(d)); |
|
284 dest[i] = MM::store(MM::add(d, MM::byte_mul(s, dia))); |
|
285 } |
|
286 MM::end(); |
|
287 } |
|
288 |
|
289 template <class MM> |
|
290 static void QT_FASTCALL comp_func_DestinationOver(uint *dest, const uint *src, int length, uint const_alpha) |
|
291 { |
|
292 C_FF; C_80; C_00; |
|
293 if (const_alpha == 255) { |
|
294 for (int i = 0; i < length; ++i) { |
|
295 m64 d = MM::load(dest[i]); |
|
296 m64 ia = MM::negate(MM::alpha(d)); |
|
297 dest[i] = MM::store(MM::add(d, MM::byte_mul(MM::load(src[i]), ia))); |
|
298 } |
|
299 } else { |
|
300 m64 ca = MM::load_alpha(const_alpha); |
|
301 for (int i = 0; i < length; ++i) { |
|
302 m64 d = MM::load(dest[i]); |
|
303 m64 dia = MM::negate(MM::alpha(d)); |
|
304 dia = MM::byte_mul(dia, ca); |
|
305 dest[i] = MM::store(MM::add(d, MM::byte_mul(MM::load(src[i]), dia))); |
|
306 } |
|
307 } |
|
308 MM::end(); |
|
309 } |
|
310 |
|
311 /* |
|
312 result = s * da |
|
313 dest = s * da * ca + d * cia |
|
314 */ |
|
315 template <class MM> |
|
316 static void QT_FASTCALL comp_func_solid_SourceIn(uint *dest, int length, uint src, uint const_alpha) |
|
317 { |
|
318 C_80; C_00; |
|
319 if (const_alpha == 255) { |
|
320 m64 s = MM::load(src); |
|
321 for (int i = 0; i < length; ++i) { |
|
322 m64 da = MM::alpha(MM::load(dest[i])); |
|
323 dest[i] = MM::store(MM::byte_mul(s, da)); |
|
324 } |
|
325 } else { |
|
326 C_FF; |
|
327 m64 s = MM::load(src); |
|
328 m64 ca = MM::load_alpha(const_alpha); |
|
329 s = MM::byte_mul(s, ca); |
|
330 m64 cia = MM::negate(ca); |
|
331 for (int i = 0; i < length; ++i) { |
|
332 m64 d = MM::load(dest[i]); |
|
333 dest[i] = MM::store(MM::interpolate_pixel_255(s, MM::alpha(d), d, cia)); |
|
334 } |
|
335 } |
|
336 MM::end(); |
|
337 } |
|
338 |
|
339 template <class MM> |
|
340 static void QT_FASTCALL comp_func_SourceIn(uint *dest, const uint *src, int length, uint const_alpha) |
|
341 { |
|
342 C_FF; C_80; C_00; |
|
343 if (const_alpha == 255) { |
|
344 for (int i = 0; i < length; ++i) { |
|
345 m64 a = MM::alpha(MM::load(dest[i])); |
|
346 dest[i] = MM::store(MM::byte_mul(MM::load(src[i]), a)); |
|
347 } |
|
348 } else { |
|
349 m64 ca = MM::load_alpha(const_alpha); |
|
350 m64 cia = MM::negate(ca); |
|
351 for (int i = 0; i < length; ++i) { |
|
352 m64 d = MM::load(dest[i]); |
|
353 m64 da = MM::byte_mul(MM::alpha(d), ca); |
|
354 dest[i] = MM::store(MM::interpolate_pixel_255( |
|
355 MM::load(src[i]), da, d, cia)); |
|
356 } |
|
357 } |
|
358 MM::end(); |
|
359 } |
|
360 |
|
361 /* |
|
362 result = d * sa |
|
363 dest = d * sa * ca + d * cia |
|
364 = d * (sa * ca + cia) |
|
365 */ |
|
366 template <class MM> |
|
367 static void QT_FASTCALL comp_func_solid_DestinationIn(uint *dest, int length, uint src, uint const_alpha) |
|
368 { |
|
369 C_80; C_00; |
|
370 m64 a = MM::alpha(MM::load(src)); |
|
371 if (const_alpha != 255) { |
|
372 C_FF; |
|
373 m64 ca = MM::load_alpha(const_alpha); |
|
374 m64 cia = MM::negate(ca); |
|
375 a = MM::byte_mul(a, ca); |
|
376 a = MM::add(a, cia); |
|
377 } |
|
378 for (int i = 0; i < length; ++i) |
|
379 dest[i] = MM::store(MM::byte_mul(MM::load(dest[i]), a)); |
|
380 MM::end(); |
|
381 } |
|
382 |
|
383 template <class MM> |
|
384 static void QT_FASTCALL comp_func_DestinationIn(uint *dest, const uint *src, int length, uint const_alpha) |
|
385 { |
|
386 C_FF; C_80; C_00; |
|
387 if (const_alpha == 255) { |
|
388 for (int i = 0; i < length; ++i) { |
|
389 m64 a = MM::alpha(MM::load(src[i])); |
|
390 dest[i] = MM::store(MM::byte_mul(MM::load(dest[i]), a)); |
|
391 } |
|
392 } else { |
|
393 m64 ca = MM::load_alpha(const_alpha); |
|
394 m64 cia = MM::negate(ca); |
|
395 for (int i = 0; i < length; ++i) { |
|
396 m64 d = MM::load(dest[i]); |
|
397 m64 a = MM::alpha(MM::load(src[i])); |
|
398 a = MM::byte_mul(a, ca); |
|
399 a = MM::add(a, cia); |
|
400 dest[i] = MM::store(MM::byte_mul(d, a)); |
|
401 } |
|
402 } |
|
403 MM::end(); |
|
404 } |
|
405 |
|
406 /* |
|
407 result = s * dia |
|
408 dest = s * dia * ca + d * cia |
|
409 */ |
|
410 template <class MM> |
|
411 static void QT_FASTCALL comp_func_solid_SourceOut(uint *dest, int length, uint src, uint const_alpha) |
|
412 { |
|
413 C_FF; C_80; C_00; |
|
414 m64 s = MM::load(src); |
|
415 if (const_alpha == 255) { |
|
416 for (int i = 0; i < length; ++i) { |
|
417 m64 dia = MM::negate(MM::alpha(MM::load(dest[i]))); |
|
418 dest[i] = MM::store(MM::byte_mul(s, dia)); |
|
419 } |
|
420 } else { |
|
421 m64 ca = MM::load_alpha(const_alpha); |
|
422 m64 cia = MM::negate(ca); |
|
423 s = MM::byte_mul(s, ca); |
|
424 for (int i = 0; i < length; ++i) { |
|
425 m64 d = MM::load(dest[i]); |
|
426 dest[i] = MM::store(MM::interpolate_pixel_255(s, MM::negate(MM::alpha(d)), d, cia)); |
|
427 } |
|
428 } |
|
429 MM::end(); |
|
430 } |
|
431 |
|
432 template <class MM> |
|
433 static void QT_FASTCALL comp_func_SourceOut(uint *dest, const uint *src, int length, uint const_alpha) |
|
434 { |
|
435 C_FF; C_80; C_00; |
|
436 if (const_alpha == 255) { |
|
437 for (int i = 0; i < length; ++i) { |
|
438 m64 ia = MM::negate(MM::alpha(MM::load(dest[i]))); |
|
439 dest[i] = MM::store(MM::byte_mul(MM::load(src[i]), ia)); |
|
440 } |
|
441 } else { |
|
442 m64 ca = MM::load_alpha(const_alpha); |
|
443 m64 cia = MM::negate(ca); |
|
444 for (int i = 0; i < length; ++i) { |
|
445 m64 d = MM::load(dest[i]); |
|
446 m64 dia = MM::byte_mul(MM::negate(MM::alpha(d)), ca); |
|
447 dest[i] = MM::store(MM::interpolate_pixel_255(MM::load(src[i]), dia, d, cia)); |
|
448 } |
|
449 } |
|
450 MM::end(); |
|
451 } |
|
452 |
|
453 /* |
|
454 result = d * sia |
|
455 dest = d * sia * ca + d * cia |
|
456 = d * (sia * ca + cia) |
|
457 */ |
|
458 template <class MM> |
|
459 static void QT_FASTCALL comp_func_solid_DestinationOut(uint *dest, int length, uint src, uint const_alpha) |
|
460 { |
|
461 C_FF; C_80; C_00; |
|
462 m64 a = MM::negate(MM::alpha(MM::load(src))); |
|
463 if (const_alpha != 255) { |
|
464 m64 ca = MM::load_alpha(const_alpha); |
|
465 a = MM::byte_mul(a, ca); |
|
466 a = MM::add(a, MM::negate(ca)); |
|
467 } |
|
468 for (int i = 0; i < length; ++i) |
|
469 dest[i] = MM::store(MM::byte_mul(MM::load(dest[i]), a)); |
|
470 MM::end(); |
|
471 } |
|
472 |
|
473 template <class MM> |
|
474 static void QT_FASTCALL comp_func_DestinationOut(uint *dest, const uint *src, int length, uint const_alpha) |
|
475 { |
|
476 C_FF; C_80; C_00; |
|
477 if (const_alpha == 255) { |
|
478 for (int i = 0; i < length; ++i) { |
|
479 m64 a = MM::negate(MM::alpha(MM::load(src[i]))); |
|
480 dest[i] = MM::store(MM::byte_mul(MM::load(dest[i]), a)); |
|
481 } |
|
482 } else { |
|
483 m64 ca = MM::load_alpha(const_alpha); |
|
484 m64 cia = MM::negate(ca); |
|
485 for (int i = 0; i < length; ++i) { |
|
486 m64 d = MM::load(dest[i]); |
|
487 m64 a = MM::negate(MM::alpha(MM::load(src[i]))); |
|
488 a = MM::byte_mul(a, ca); |
|
489 a = MM::add(a, cia); |
|
490 dest[i] = MM::store(MM::byte_mul(d, a)); |
|
491 } |
|
492 } |
|
493 MM::end(); |
|
494 } |
|
495 |
|
496 /* |
|
497 result = s*da + d*sia |
|
498 dest = s*da*ca + d*sia*ca + d *cia |
|
499 = s*ca * da + d * (sia*ca + cia) |
|
500 = s*ca * da + d * (1 - sa*ca) |
|
501 */ |
|
502 template <class MM> |
|
503 static void QT_FASTCALL comp_func_solid_SourceAtop(uint *dest, int length, uint src, uint const_alpha) |
|
504 { |
|
505 C_FF; C_80; C_00; |
|
506 m64 s = MM::load(src); |
|
507 if (const_alpha != 255) { |
|
508 m64 ca = MM::load_alpha(const_alpha); |
|
509 s = MM::byte_mul(s, ca); |
|
510 } |
|
511 m64 a = MM::negate(MM::alpha(s)); |
|
512 for (int i = 0; i < length; ++i) { |
|
513 m64 d = MM::load(dest[i]); |
|
514 dest[i] = MM::store(MM::interpolate_pixel_255(s, MM::alpha(d), d, a)); |
|
515 } |
|
516 MM::end(); |
|
517 } |
|
518 |
|
519 template <class MM> |
|
520 static void QT_FASTCALL comp_func_SourceAtop(uint *dest, const uint *src, int length, uint const_alpha) |
|
521 { |
|
522 C_FF; C_80; C_00; |
|
523 if (const_alpha == 255) { |
|
524 for (int i = 0; i < length; ++i) { |
|
525 m64 s = MM::load(src[i]); |
|
526 m64 d = MM::load(dest[i]); |
|
527 dest[i] = MM::store(MM::interpolate_pixel_255(s, MM::alpha(d), d, |
|
528 MM::negate(MM::alpha(s)))); |
|
529 } |
|
530 } else { |
|
531 m64 ca = MM::load_alpha(const_alpha); |
|
532 for (int i = 0; i < length; ++i) { |
|
533 m64 s = MM::load(src[i]); |
|
534 s = MM::byte_mul(s, ca); |
|
535 m64 d = MM::load(dest[i]); |
|
536 dest[i] = MM::store(MM::interpolate_pixel_255(s, MM::alpha(d), d, |
|
537 MM::negate(MM::alpha(s)))); |
|
538 } |
|
539 } |
|
540 MM::end(); |
|
541 } |
|
542 |
|
543 /* |
|
544 result = d*sa + s*dia |
|
545 dest = d*sa*ca + s*dia*ca + d *cia |
|
546 = s*ca * dia + d * (sa*ca + cia) |
|
547 */ |
|
548 template <class MM> |
|
549 static void QT_FASTCALL comp_func_solid_DestinationAtop(uint *dest, int length, uint src, uint const_alpha) |
|
550 { |
|
551 C_FF; C_80; C_00; |
|
552 m64 s = MM::load(src); |
|
553 m64 a = MM::alpha(s); |
|
554 if (const_alpha != 255) { |
|
555 m64 ca = MM::load_alpha(const_alpha); |
|
556 s = MM::byte_mul(s, ca); |
|
557 a = MM::alpha(s); |
|
558 a = MM::add(a, MM::negate(ca)); |
|
559 } |
|
560 for (int i = 0; i < length; ++i) { |
|
561 m64 d = MM::load(dest[i]); |
|
562 dest[i] = MM::store(MM::interpolate_pixel_255(s, MM::negate(MM::alpha(d)), d, a)); |
|
563 } |
|
564 MM::end(); |
|
565 } |
|
566 |
|
567 template <class MM> |
|
568 static void QT_FASTCALL comp_func_DestinationAtop(uint *dest, const uint *src, int length, uint const_alpha) |
|
569 { |
|
570 C_FF; C_80; C_00; |
|
571 if (const_alpha == 255) { |
|
572 for (int i = 0; i < length; ++i) { |
|
573 m64 s = MM::load(src[i]); |
|
574 m64 d = MM::load(dest[i]); |
|
575 dest[i] = MM::store(MM::interpolate_pixel_255(d, MM::alpha(s), s, |
|
576 MM::negate(MM::alpha(d)))); |
|
577 } |
|
578 } else { |
|
579 m64 ca = MM::load_alpha(const_alpha); |
|
580 for (int i = 0; i < length; ++i) { |
|
581 m64 s = MM::load(src[i]); |
|
582 s = MM::byte_mul(s, ca); |
|
583 m64 d = MM::load(dest[i]); |
|
584 m64 a = MM::alpha(s); |
|
585 a = MM::add(a, MM::negate(ca)); |
|
586 dest[i] = MM::store(MM::interpolate_pixel_255(s, MM::negate(MM::alpha(d)), |
|
587 d, a)); |
|
588 } |
|
589 } |
|
590 MM::end(); |
|
591 } |
|
592 |
|
593 /* |
|
594 result = d*sia + s*dia |
|
595 dest = d*sia*ca + s*dia*ca + d *cia |
|
596 = s*ca * dia + d * (sia*ca + cia) |
|
597 = s*ca * dia + d * (1 - sa*ca) |
|
598 */ |
|
599 template <class MM> |
|
600 static void QT_FASTCALL comp_func_solid_XOR(uint *dest, int length, uint src, uint const_alpha) |
|
601 { |
|
602 C_FF; C_80; C_00; |
|
603 m64 s = MM::load(src); |
|
604 if (const_alpha != 255) { |
|
605 m64 ca = MM::load_alpha(const_alpha); |
|
606 s = MM::byte_mul(s, ca); |
|
607 } |
|
608 m64 a = MM::negate(MM::alpha(s)); |
|
609 for (int i = 0; i < length; ++i) { |
|
610 m64 d = MM::load(dest[i]); |
|
611 dest[i] = MM::store(MM::interpolate_pixel_255(s, MM::negate(MM::alpha(d)), |
|
612 d, a)); |
|
613 } |
|
614 MM::end(); |
|
615 } |
|
616 |
|
617 template <class MM> |
|
618 static void QT_FASTCALL comp_func_XOR(uint *dest, const uint *src, int length, uint const_alpha) |
|
619 { |
|
620 C_FF; C_80; C_00; |
|
621 if (const_alpha == 255) { |
|
622 for (int i = 0; i < length; ++i) { |
|
623 m64 s = MM::load(src[i]); |
|
624 m64 d = MM::load(dest[i]); |
|
625 dest[i] = MM::store(MM::interpolate_pixel_255(s, MM::negate(MM::alpha(d)), |
|
626 d, MM::negate(MM::alpha(s)))); |
|
627 } |
|
628 } else { |
|
629 m64 ca = MM::load_alpha(const_alpha); |
|
630 for (int i = 0; i < length; ++i) { |
|
631 m64 s = MM::load(src[i]); |
|
632 s = MM::byte_mul(s, ca); |
|
633 m64 d = MM::load(dest[i]); |
|
634 dest[i] = MM::store(MM::interpolate_pixel_255(s, MM::negate(MM::alpha(d)), |
|
635 d, MM::negate(MM::alpha(s)))); |
|
636 } |
|
637 } |
|
638 MM::end(); |
|
639 } |
|
640 |
|
641 template <class MM> |
|
642 static void QT_FASTCALL rasterop_solid_SourceOrDestination(uint *dest, |
|
643 int length, |
|
644 uint color, |
|
645 uint const_alpha) |
|
646 { |
|
647 Q_UNUSED(const_alpha); |
|
648 |
|
649 if ((quintptr)(dest) & 0x7) { |
|
650 *dest++ |= color; |
|
651 --length; |
|
652 } |
|
653 |
|
654 const int length64 = length / 2; |
|
655 if (length64) { |
|
656 __m64 *dst64 = reinterpret_cast<__m64*>(dest); |
|
657 const __m64 color64 = _mm_set_pi32(color, color); |
|
658 |
|
659 int n = (length64 + 3) / 4; |
|
660 switch (length64 & 0x3) { |
|
661 case 0: do { *dst64 = _mm_or_si64(*dst64, color64); ++dst64; |
|
662 case 3: *dst64 = _mm_or_si64(*dst64, color64); ++dst64; |
|
663 case 2: *dst64 = _mm_or_si64(*dst64, color64); ++dst64; |
|
664 case 1: *dst64 = _mm_or_si64(*dst64, color64); ++dst64; |
|
665 } while (--n > 0); |
|
666 } |
|
667 } |
|
668 |
|
669 if (length & 0x1) { |
|
670 dest[length - 1] |= color; |
|
671 } |
|
672 |
|
673 MM::end(); |
|
674 } |
|
675 |
|
676 template <class MM> |
|
677 static void QT_FASTCALL rasterop_solid_SourceAndDestination(uint *dest, |
|
678 int length, |
|
679 uint color, |
|
680 uint const_alpha) |
|
681 { |
|
682 Q_UNUSED(const_alpha); |
|
683 |
|
684 color |= 0xff000000; |
|
685 |
|
686 if ((quintptr)(dest) & 0x7) { // align |
|
687 *dest++ &= color; |
|
688 --length; |
|
689 } |
|
690 |
|
691 const int length64 = length / 2; |
|
692 if (length64) { |
|
693 __m64 *dst64 = reinterpret_cast<__m64*>(dest); |
|
694 const __m64 color64 = _mm_set_pi32(color, color); |
|
695 |
|
696 int n = (length64 + 3) / 4; |
|
697 switch (length64 & 0x3) { |
|
698 case 0: do { *dst64 = _mm_and_si64(*dst64, color64); ++dst64; |
|
699 case 3: *dst64 = _mm_and_si64(*dst64, color64); ++dst64; |
|
700 case 2: *dst64 = _mm_and_si64(*dst64, color64); ++dst64; |
|
701 case 1: *dst64 = _mm_and_si64(*dst64, color64); ++dst64; |
|
702 } while (--n > 0); |
|
703 } |
|
704 } |
|
705 |
|
706 if (length & 0x1) { |
|
707 dest[length - 1] &= color; |
|
708 } |
|
709 |
|
710 MM::end(); |
|
711 } |
|
712 |
|
713 template <class MM> |
|
714 static void QT_FASTCALL rasterop_solid_SourceXorDestination(uint *dest, |
|
715 int length, |
|
716 uint color, |
|
717 uint const_alpha) |
|
718 { |
|
719 Q_UNUSED(const_alpha); |
|
720 |
|
721 color &= 0x00ffffff; |
|
722 |
|
723 if ((quintptr)(dest) & 0x7) { |
|
724 *dest++ ^= color; |
|
725 --length; |
|
726 } |
|
727 |
|
728 const int length64 = length / 2; |
|
729 if (length64) { |
|
730 __m64 *dst64 = reinterpret_cast<__m64*>(dest); |
|
731 const __m64 color64 = _mm_set_pi32(color, color); |
|
732 |
|
733 int n = (length64 + 3) / 4; |
|
734 switch (length64 & 0x3) { |
|
735 case 0: do { *dst64 = _mm_xor_si64(*dst64, color64); ++dst64; |
|
736 case 3: *dst64 = _mm_xor_si64(*dst64, color64); ++dst64; |
|
737 case 2: *dst64 = _mm_xor_si64(*dst64, color64); ++dst64; |
|
738 case 1: *dst64 = _mm_xor_si64(*dst64, color64); ++dst64; |
|
739 } while (--n > 0); |
|
740 } |
|
741 } |
|
742 |
|
743 if (length & 0x1) { |
|
744 dest[length - 1] ^= color; |
|
745 } |
|
746 |
|
747 MM::end(); |
|
748 } |
|
749 |
|
750 template <class MM> |
|
751 static void QT_FASTCALL rasterop_solid_SourceAndNotDestination(uint *dest, |
|
752 int length, |
|
753 uint color, |
|
754 uint const_alpha) |
|
755 { |
|
756 |
|
757 Q_UNUSED(const_alpha); |
|
758 |
|
759 if ((quintptr)(dest) & 0x7) { |
|
760 *dest = (color & ~(*dest)) | 0xff000000; |
|
761 ++dest; |
|
762 --length; |
|
763 } |
|
764 |
|
765 const int length64 = length / 2; |
|
766 if (length64) { |
|
767 __m64 *dst64 = reinterpret_cast<__m64*>(dest); |
|
768 const __m64 color64 = _mm_set_pi32(color, color); |
|
769 const m64 mmx_0xff000000 = _mm_set1_pi32(0xff000000); |
|
770 __m64 tmp1, tmp2, tmp3, tmp4; |
|
771 |
|
772 int n = (length64 + 3) / 4; |
|
773 switch (length64 & 0x3) { |
|
774 case 0: do { tmp1 = _mm_andnot_si64(*dst64, color64); |
|
775 *dst64++ = _mm_or_si64(tmp1, mmx_0xff000000); |
|
776 case 3: tmp2 = _mm_andnot_si64(*dst64, color64); |
|
777 *dst64++ = _mm_or_si64(tmp2, mmx_0xff000000); |
|
778 case 2: tmp3 = _mm_andnot_si64(*dst64, color64); |
|
779 *dst64++ = _mm_or_si64(tmp3, mmx_0xff000000); |
|
780 case 1: tmp4 = _mm_andnot_si64(*dst64, color64); |
|
781 *dst64++ = _mm_or_si64(tmp4, mmx_0xff000000); |
|
782 } while (--n > 0); |
|
783 } |
|
784 } |
|
785 |
|
786 if (length & 0x1) { |
|
787 dest[length - 1] = (color & ~(dest[length - 1])) | 0xff000000; |
|
788 } |
|
789 |
|
790 MM::end(); |
|
791 } |
|
792 |
|
793 template <class MM> |
|
794 static void QT_FASTCALL rasterop_solid_NotSourceAndNotDestination(uint *dest, |
|
795 int length, |
|
796 uint color, |
|
797 uint const_alpha) |
|
798 { |
|
799 rasterop_solid_SourceAndNotDestination<MM>(dest, length, |
|
800 ~color, const_alpha); |
|
801 } |
|
802 |
|
803 template <class MM> |
|
804 static void QT_FASTCALL rasterop_solid_NotSourceOrNotDestination(uint *dest, |
|
805 int length, |
|
806 uint color, |
|
807 uint const_alpha) |
|
808 { |
|
809 Q_UNUSED(const_alpha); |
|
810 color = ~color | 0xff000000; |
|
811 while (length--) { |
|
812 *dest = color | ~(*dest); |
|
813 ++dest; |
|
814 } |
|
815 } |
|
816 |
|
817 template <class MM> |
|
818 static void QT_FASTCALL rasterop_solid_NotSourceXorDestination(uint *dest, |
|
819 int length, |
|
820 uint color, |
|
821 uint const_alpha) |
|
822 { |
|
823 rasterop_solid_SourceXorDestination<MM>(dest, length, ~color, const_alpha); |
|
824 } |
|
825 |
|
826 template <class MM> |
|
827 static void QT_FASTCALL rasterop_solid_NotSource(uint *dest, int length, |
|
828 uint color, uint const_alpha) |
|
829 { |
|
830 Q_UNUSED(const_alpha); |
|
831 qt_memfill((quint32*)dest, ~color | 0xff000000, length); |
|
832 } |
|
833 |
|
834 template <class MM> |
|
835 static void QT_FASTCALL rasterop_solid_NotSourceAndDestination(uint *dest, |
|
836 int length, |
|
837 uint color, |
|
838 uint const_alpha) |
|
839 { |
|
840 rasterop_solid_SourceAndDestination<MM>(dest, length, |
|
841 ~color, const_alpha); |
|
842 } |
|
843 |
|
844 template <class MM> |
|
845 static inline void qt_blend_color_argb_x86(int count, const QSpan *spans, |
|
846 void *userData, |
|
847 CompositionFunctionSolid *solidFunc) |
|
848 { |
|
849 QSpanData *data = reinterpret_cast<QSpanData *>(userData); |
|
850 if (data->rasterBuffer->compositionMode == QPainter::CompositionMode_Source |
|
851 || (data->rasterBuffer->compositionMode == QPainter::CompositionMode_SourceOver |
|
852 && qAlpha(data->solid.color) == 255)) { |
|
853 // inline for performance |
|
854 C_FF; C_80; C_00; |
|
855 while (count--) { |
|
856 uint *target = ((uint *)data->rasterBuffer->scanLine(spans->y)) + spans->x; |
|
857 if (spans->coverage == 255) { |
|
858 qt_memfill(static_cast<quint32*>(target), quint32(data->solid.color), spans->len); |
|
859 } else { |
|
860 // dest = s * ca + d * (1 - sa*ca) --> dest = s * ca + d * (1-ca) |
|
861 m64 ca = MM::load_alpha(spans->coverage); |
|
862 m64 s = MM::byte_mul(MM::load(data->solid.color), ca); |
|
863 m64 ica = MM::negate(ca); |
|
864 for (int i = 0; i < spans->len; ++i) |
|
865 target[i] = MM::store(MM::add(s, MM::byte_mul(MM::load(target[i]), ica))); |
|
866 } |
|
867 ++spans; |
|
868 } |
|
869 MM::end(); |
|
870 return; |
|
871 } |
|
872 CompositionFunctionSolid func = solidFunc[data->rasterBuffer->compositionMode]; |
|
873 while (count--) { |
|
874 uint *target = ((uint *)data->rasterBuffer->scanLine(spans->y)) + spans->x; |
|
875 func(target, spans->len, data->solid.color, spans->coverage); |
|
876 ++spans; |
|
877 } |
|
878 } |
|
879 |
|
880 #ifdef QT_HAVE_MMX |
|
881 struct QMMXIntrinsics : public QMMXCommonIntrinsics |
|
882 { |
|
883 static inline void end() { |
|
884 #if !defined(Q_OS_WINCE) || defined(_X86_) |
|
885 _mm_empty(); |
|
886 #endif |
|
887 } |
|
888 }; |
|
889 #endif // QT_HAVE_MMX |
|
890 |
|
891 QT_END_NAMESPACE |
|
892 |
|
893 #endif // QDRAWHELPER_MMX_P_H |