--- a/src/gui/painting/qdrawhelper_sse2.cpp Fri Sep 17 08:34:18 2010 +0300
+++ b/src/gui/painting/qdrawhelper_sse2.cpp Mon Oct 04 01:19:32 2010 +0300
@@ -43,7 +43,6 @@
#ifdef QT_HAVE_SSE2
-#include <private/qsimd_p.h>
#include <private/qdrawingprimitive_sse2_p.h>
#include <private/qpaintengine_raster_p.h>
@@ -112,9 +111,7 @@
int x = 0;
// First, align dest to 16 bytes:
- const int offsetToAlignOn16Bytes = (4 - ((reinterpret_cast<quintptr>(dst) >> 2) & 0x3)) & 0x3;
- const int prologLength = qMin(w, offsetToAlignOn16Bytes);
- for (; x < prologLength; ++x) {
+ ALIGNMENT_PROLOGUE_16BYTES(dst, x, w) {
quint32 s = src[x];
s = BYTE_MUL(s, const_alpha);
dst[x] = INTERPOLATE_PIXEL_255(src[x], const_alpha, dst[x], one_minus_const_alpha);
@@ -145,7 +142,6 @@
void QT_FASTCALL comp_func_SourceOver_sse2(uint *destPixels, const uint *srcPixels, int length, uint const_alpha)
{
- Q_ASSERT(const_alpha >= 0);
Q_ASSERT(const_alpha < 256);
const quint32 *src = (const quint32 *) srcPixels;
@@ -164,6 +160,85 @@
}
}
+void QT_FASTCALL comp_func_Plus_sse2(uint *dst, const uint *src, int length, uint const_alpha)
+{
+ int x = 0;
+
+ if (const_alpha == 255) {
+ // 1) Prologue: align destination on 16 bytes
+ ALIGNMENT_PROLOGUE_16BYTES(dst, x, length)
+ dst[x] = comp_func_Plus_one_pixel(dst[x], src[x]);
+
+ // 2) composition with SSE2
+ for (; x < length - 3; x += 4) {
+ const __m128i srcVector = _mm_loadu_si128((__m128i *)&src[x]);
+ const __m128i dstVector = _mm_load_si128((__m128i *)&dst[x]);
+
+ const __m128i result = _mm_adds_epu8(srcVector, dstVector);
+ _mm_store_si128((__m128i *)&dst[x], result);
+ }
+
+ // 3) Epilogue:
+ for (; x < length; ++x)
+ dst[x] = comp_func_Plus_one_pixel(dst[x], src[x]);
+ } else {
+ const int one_minus_const_alpha = 255 - const_alpha;
+ const __m128i constAlphaVector = _mm_set1_epi16(const_alpha);
+ const __m128i oneMinusConstAlpha = _mm_set1_epi16(one_minus_const_alpha);
+
+ // 1) Prologue: align destination on 16 bytes
+ ALIGNMENT_PROLOGUE_16BYTES(dst, x, length)
+ dst[x] = comp_func_Plus_one_pixel_const_alpha(dst[x], src[x], const_alpha, one_minus_const_alpha);
+
+ const __m128i half = _mm_set1_epi16(0x80);
+ const __m128i colorMask = _mm_set1_epi32(0x00ff00ff);
+ // 2) composition with SSE2
+ for (; x < length - 3; x += 4) {
+ const __m128i srcVector = _mm_loadu_si128((__m128i *)&src[x]);
+ const __m128i dstVector = _mm_load_si128((__m128i *)&dst[x]);
+
+ __m128i result = _mm_adds_epu8(srcVector, dstVector);
+ INTERPOLATE_PIXEL_255_SSE2(result, result, dstVector, constAlphaVector, oneMinusConstAlpha, colorMask, half)
+ _mm_store_si128((__m128i *)&dst[x], result);
+ }
+
+ // 3) Epilogue:
+ for (; x < length; ++x)
+ dst[x] = comp_func_Plus_one_pixel_const_alpha(dst[x], src[x], const_alpha, one_minus_const_alpha);
+ }
+}
+
+void QT_FASTCALL comp_func_Source_sse2(uint *dst, const uint *src, int length, uint const_alpha)
+{
+ if (const_alpha == 255) {
+ ::memcpy(dst, src, length * sizeof(uint));
+ } else {
+ const int ialpha = 255 - const_alpha;
+
+ int x = 0;
+
+ // 1) prologue, align on 16 bytes
+ ALIGNMENT_PROLOGUE_16BYTES(dst, x, length)
+ dst[x] = INTERPOLATE_PIXEL_255(src[x], const_alpha, dst[x], ialpha);
+
+ // 2) interpolate pixels with SSE2
+ const __m128i half = _mm_set1_epi16(0x80);
+ const __m128i colorMask = _mm_set1_epi32(0x00ff00ff);
+ const __m128i constAlphaVector = _mm_set1_epi16(const_alpha);
+ const __m128i oneMinusConstAlpha = _mm_set1_epi16(ialpha);
+ for (; x < length - 3; x += 4) {
+ const __m128i srcVector = _mm_loadu_si128((__m128i *)&src[x]);
+ __m128i dstVector = _mm_load_si128((__m128i *)&dst[x]);
+ INTERPOLATE_PIXEL_255_SSE2(dstVector, srcVector, dstVector, constAlphaVector, oneMinusConstAlpha, colorMask, half)
+ _mm_store_si128((__m128i *)&dst[x], dstVector);
+ }
+
+ // 3) Epilogue
+ for (; x < length; ++x)
+ dst[x] = INTERPOLATE_PIXEL_255(src[x], const_alpha, dst[x], ialpha);
+ }
+}
+
void qt_memfill32_sse2(quint32 *dest, quint32 value, int count)
{
if (count < 7) {
@@ -236,6 +311,78 @@
}
}
+CompositionFunctionSolid qt_functionForModeSolid_onlySSE2[numCompositionFunctions] = {
+ comp_func_solid_SourceOver_sse2,
+ comp_func_solid_DestinationOver,
+ comp_func_solid_Clear,
+ comp_func_solid_Source,
+ comp_func_solid_Destination,
+ comp_func_solid_SourceIn,
+ comp_func_solid_DestinationIn,
+ comp_func_solid_SourceOut,
+ comp_func_solid_DestinationOut,
+ comp_func_solid_SourceAtop,
+ comp_func_solid_DestinationAtop,
+ comp_func_solid_XOR,
+ comp_func_solid_Plus,
+ comp_func_solid_Multiply,
+ comp_func_solid_Screen,
+ comp_func_solid_Overlay,
+ comp_func_solid_Darken,
+ comp_func_solid_Lighten,
+ comp_func_solid_ColorDodge,
+ comp_func_solid_ColorBurn,
+ comp_func_solid_HardLight,
+ comp_func_solid_SoftLight,
+ comp_func_solid_Difference,
+ comp_func_solid_Exclusion,
+ rasterop_solid_SourceOrDestination,
+ rasterop_solid_SourceAndDestination,
+ rasterop_solid_SourceXorDestination,
+ rasterop_solid_NotSourceAndNotDestination,
+ rasterop_solid_NotSourceOrNotDestination,
+ rasterop_solid_NotSourceXorDestination,
+ rasterop_solid_NotSource,
+ rasterop_solid_NotSourceAndDestination,
+ rasterop_solid_SourceAndNotDestination
+};
+
+CompositionFunction qt_functionForMode_onlySSE2[numCompositionFunctions] = {
+ comp_func_SourceOver_sse2,
+ comp_func_DestinationOver,
+ comp_func_Clear,
+ comp_func_Source_sse2,
+ comp_func_Destination,
+ comp_func_SourceIn,
+ comp_func_DestinationIn,
+ comp_func_SourceOut,
+ comp_func_DestinationOut,
+ comp_func_SourceAtop,
+ comp_func_DestinationAtop,
+ comp_func_XOR,
+ comp_func_Plus_sse2,
+ comp_func_Multiply,
+ comp_func_Screen,
+ comp_func_Overlay,
+ comp_func_Darken,
+ comp_func_Lighten,
+ comp_func_ColorDodge,
+ comp_func_ColorBurn,
+ comp_func_HardLight,
+ comp_func_SoftLight,
+ comp_func_Difference,
+ comp_func_Exclusion,
+ rasterop_SourceOrDestination,
+ rasterop_SourceAndDestination,
+ rasterop_SourceXorDestination,
+ rasterop_NotSourceAndNotDestination,
+ rasterop_NotSourceOrNotDestination,
+ rasterop_NotSourceXorDestination,
+ rasterop_NotSource,
+ rasterop_NotSourceAndDestination,
+ rasterop_SourceAndNotDestination
+};
+
void qt_memfill16_sse2(quint16 *dest, quint16 value, int count)
{
if (count < 3) {