src/gui/painting/qdrawhelper_mmx_p.h
changeset 0 1918ee327afb
child 4 3b1da2848fc7
child 7 f7bc934e204c
equal deleted inserted replaced
-1:000000000000 0:1918ee327afb
       
     1 /****************************************************************************
       
     2 **
       
     3 ** Copyright (C) 2009 Nokia Corporation and/or its subsidiary(-ies).
       
     4 ** All rights reserved.
       
     5 ** Contact: Nokia Corporation (qt-info@nokia.com)
       
     6 **
       
     7 ** This file is part of the QtGui module of the Qt Toolkit.
       
     8 **
       
     9 ** $QT_BEGIN_LICENSE:LGPL$
       
    10 ** No Commercial Usage
       
    11 ** This file contains pre-release code and may not be distributed.
       
    12 ** You may use this file in accordance with the terms and conditions
       
    13 ** contained in the Technology Preview License Agreement accompanying
       
    14 ** this package.
       
    15 **
       
    16 ** GNU Lesser General Public License Usage
       
    17 ** Alternatively, this file may be used under the terms of the GNU Lesser
       
    18 ** General Public License version 2.1 as published by the Free Software
       
    19 ** Foundation and appearing in the file LICENSE.LGPL included in the
       
    20 ** packaging of this file.  Please review the following information to
       
    21 ** ensure the GNU Lesser General Public License version 2.1 requirements
       
    22 ** will be met: http://www.gnu.org/licenses/old-licenses/lgpl-2.1.html.
       
    23 **
       
    24 ** In addition, as a special exception, Nokia gives you certain additional
       
    25 ** rights.  These rights are described in the Nokia Qt LGPL Exception
       
    26 ** version 1.1, included in the file LGPL_EXCEPTION.txt in this package.
       
    27 **
       
    28 ** If you have questions regarding the use of this file, please contact
       
    29 ** Nokia at qt-info@nokia.com.
       
    30 **
       
    31 **
       
    32 **
       
    33 **
       
    34 **
       
    35 **
       
    36 **
       
    37 **
       
    38 ** $QT_END_LICENSE$
       
    39 **
       
    40 ****************************************************************************/
       
    41 
       
    42 #ifndef QDRAWHELPER_MMX_P_H
       
    43 #define QDRAWHELPER_MMX_P_H
       
    44 
       
    45 //
       
    46 //  W A R N I N G
       
    47 //  -------------
       
    48 //
       
    49 // This file is not part of the Qt API.  It exists purely as an
       
    50 // implementation detail.  This header file may change from version to
       
    51 // version without notice, or even be removed.
       
    52 //
       
    53 // We mean it.
       
    54 //
       
    55 
       
    56 #include <private/qdrawhelper_p.h>
       
    57 #include <private/qdrawhelper_x86_p.h>
       
    58 #include <private/qpaintengine_raster_p.h>
       
    59 
       
    60 #ifdef QT_HAVE_MMX
       
    61 #include <mmintrin.h>
       
    62 #endif
       
    63 
       
    64 #define C_FF const m64 mmx_0x00ff = _mm_set1_pi16(0xff)
       
    65 #define C_80 const m64 mmx_0x0080 = _mm_set1_pi16(0x80)
       
    66 #define C_00 const m64 mmx_0x0000 = _mm_setzero_si64()
       
    67 
       
    68 #ifdef Q_CC_MSVC
       
    69 #  pragma warning(disable: 4799) // No EMMS at end of function
       
    70 #endif
       
    71 
       
    72 typedef __m64 m64;
       
    73 
       
    74 QT_BEGIN_NAMESPACE
       
    75 
       
    76 struct QMMXCommonIntrinsics
       
    77 {
       
    78     static inline m64 alpha(m64 x) {
       
    79         x = _mm_unpackhi_pi16(x, x);
       
    80         x = _mm_unpackhi_pi16(x, x);
       
    81         return x;
       
    82     }
       
    83 
       
    84     static inline m64 _negate(const m64 &x, const m64 &mmx_0x00ff) {
       
    85         return _mm_xor_si64(x, mmx_0x00ff);
       
    86     }
       
    87 
       
    88     static inline m64 add(const m64 &a, const m64 &b) {
       
    89         return  _mm_adds_pu16 (a, b);
       
    90     }
       
    91 
       
    92     static inline m64 _byte_mul(const m64 &a, const m64 &b,
       
    93                                 const m64 &mmx_0x0080)
       
    94     {
       
    95         m64 res = _mm_mullo_pi16(a, b);
       
    96         res = _mm_adds_pu16(res, mmx_0x0080);
       
    97         res = _mm_adds_pu16(res, _mm_srli_pi16 (res, 8));
       
    98         return _mm_srli_pi16(res, 8);
       
    99     }
       
   100 
       
   101     static inline m64 interpolate_pixel_256(const m64 &x, const m64 &a,
       
   102                                            const m64 &y, const m64 &b)
       
   103     {
       
   104         m64 res = _mm_adds_pu16(_mm_mullo_pi16(x, a), _mm_mullo_pi16(y, b));
       
   105         return _mm_srli_pi16(res, 8);
       
   106     }
       
   107 
       
   108     static inline m64 _interpolate_pixel_255(const m64 &x, const m64 &a,
       
   109                                              const m64 &y, const m64 &b,
       
   110                                              const m64 &mmx_0x0080)
       
   111     {
       
   112         m64 res = _mm_adds_pu16(_mm_mullo_pi16(x, a), _mm_mullo_pi16(y, b));
       
   113         res = _mm_adds_pu16(res, mmx_0x0080);
       
   114         res = _mm_adds_pu16(res, _mm_srli_pi16 (res, 8));
       
   115         return _mm_srli_pi16(res, 8);
       
   116     }
       
   117 
       
   118     static inline m64 _premul(m64 x, const m64 &mmx_0x0080) {
       
   119         m64 a = alpha(x);
       
   120         return _byte_mul(x, a, mmx_0x0080);
       
   121     }
       
   122 
       
   123     static inline m64 _load(uint x, const m64 &mmx_0x0000) {
       
   124         return _mm_unpacklo_pi8(_mm_cvtsi32_si64(x), mmx_0x0000);
       
   125     }
       
   126 
       
   127     static inline m64 _load_alpha(uint x, const m64 &) {
       
   128         x |= (x << 16);
       
   129         return _mm_set1_pi32(x);
       
   130     }
       
   131 
       
   132     static inline uint _store(const m64 &x, const m64 &mmx_0x0000) {
       
   133         return _mm_cvtsi64_si32(_mm_packs_pu16(x, mmx_0x0000));
       
   134     }
       
   135 };
       
   136 
       
   137 #define negate(x) _negate(x, mmx_0x00ff)
       
   138 #define byte_mul(a, b) _byte_mul(a, b, mmx_0x0080)
       
   139 #define interpolate_pixel_255(x, a, y, b) _interpolate_pixel_255(x, a, y, b, mmx_0x0080)
       
   140 #define premul(x) _premul(x, mmx_0x0080)
       
   141 #define load(x) _load(x, mmx_0x0000)
       
   142 #define load_alpha(x) _load_alpha(x, mmx_0x0000)
       
   143 #define store(x) _store(x, mmx_0x0000)
       
   144 
       
   145 /*
       
   146   result = 0
       
   147   d = d * cia
       
   148 */
       
   149 template <class MM>
       
   150 static void QT_FASTCALL comp_func_solid_Clear(uint *dest, int length, uint, uint const_alpha)
       
   151 {
       
   152     if (!length)
       
   153         return;
       
   154 
       
   155     if (const_alpha == 255) {
       
   156         qt_memfill(static_cast<quint32*>(dest), quint32(0), length);
       
   157     } else {
       
   158         C_FF; C_80; C_00;
       
   159         m64 ia = MM::negate(MM::load_alpha(const_alpha));
       
   160         for (int i = 0; i < length; ++i) {
       
   161             dest[i] = MM::store(MM::byte_mul(MM::load(dest[i]), ia));
       
   162         }
       
   163     }
       
   164     MM::end();
       
   165 }
       
   166 
       
   167 template <class MM>
       
   168 static void QT_FASTCALL comp_func_Clear(uint *dest, const uint *, int length, uint const_alpha)
       
   169 {
       
   170     if (const_alpha == 255) {
       
   171         qt_memfill(static_cast<quint32*>(dest), quint32(0), length);
       
   172     } else {
       
   173         C_FF; C_80; C_00;
       
   174         m64 ia = MM::negate(MM::load_alpha(const_alpha));
       
   175         for (int i = 0; i < length; ++i)
       
   176             dest[i] = MM::store(MM::byte_mul(MM::load(dest[i]), ia));
       
   177     }
       
   178     MM::end();
       
   179 }
       
   180 
       
   181 /*
       
   182   result = s
       
   183   dest = s * ca + d * cia
       
   184 */
       
   185 template <class MM>
       
   186 static void QT_FASTCALL comp_func_solid_Source(uint *dest, int length, uint src, uint const_alpha)
       
   187 {
       
   188     if (const_alpha == 255) {
       
   189         qt_memfill(static_cast<quint32*>(dest), quint32(src), length);
       
   190     } else {
       
   191         C_FF; C_80; C_00;
       
   192         const m64 a = MM::load_alpha(const_alpha);
       
   193         const m64 ia = MM::negate(a);
       
   194         const m64 s = MM::byte_mul(MM::load(src), a);
       
   195         for (int i = 0; i < length; ++i) {
       
   196             dest[i] = MM::store(MM::add(s, MM::byte_mul(MM::load(dest[i]), ia)));
       
   197         }
       
   198         MM::end();
       
   199     }
       
   200 }
       
   201 
       
   202 template <class MM>
       
   203 static void QT_FASTCALL comp_func_Source(uint *dest, const uint *src, int length, uint const_alpha)
       
   204 {
       
   205     if (const_alpha == 255) {
       
   206         ::memcpy(dest, src, length * sizeof(uint));
       
   207     } else {
       
   208         C_FF; C_80; C_00;
       
   209         const m64 a = MM::load_alpha(const_alpha);
       
   210         const m64 ia = MM::negate(a);
       
   211         for (int i = 0; i < length; ++i)
       
   212             dest[i] = MM::store(MM::interpolate_pixel_255(MM::load(src[i]), a,
       
   213                                                         MM::load(dest[i]), ia));
       
   214     }
       
   215     MM::end();
       
   216 }
       
   217 
       
   218 /*
       
   219   result = s + d * sia
       
   220   dest = (s + d * sia) * ca + d * cia
       
   221        = s * ca + d * (sia * ca + cia)
       
   222        = s * ca + d * (1 - sa*ca)
       
   223 */
       
   224 template <class MM>
       
   225 static void QT_FASTCALL comp_func_solid_SourceOver(uint *dest, int length, uint src, uint const_alpha)
       
   226 {
       
   227     if ((const_alpha & qAlpha(src)) == 255) {
       
   228         qt_memfill(static_cast<quint32*>(dest), quint32(src), length);
       
   229     } else {
       
   230         C_FF; C_80; C_00;
       
   231         m64 s = MM::load(src);
       
   232         if (const_alpha != 255) {
       
   233             m64 ca = MM::load_alpha(const_alpha);
       
   234             s = MM::byte_mul(s, ca);
       
   235         }
       
   236         m64 a = MM::negate(MM::alpha(s));
       
   237         for (int i = 0; i < length; ++i)
       
   238             dest[i] = MM::store(MM::add(s, MM::byte_mul(MM::load(dest[i]), a)));
       
   239         MM::end();
       
   240     }
       
   241 }
       
   242 
       
   243 template <class MM>
       
   244 static void QT_FASTCALL comp_func_SourceOver(uint *dest, const uint *src, int length, uint const_alpha)
       
   245 {
       
   246     C_FF; C_80; C_00;
       
   247     if (const_alpha == 255) {
       
   248         for (int i = 0; i < length; ++i) {
       
   249             if ((0xff000000 & src[i]) == 0xff000000) {
       
   250                 dest[i] = src[i];
       
   251             } else {
       
   252                 m64 s = MM::load(src[i]);
       
   253                 m64 ia = MM::negate(MM::alpha(s));
       
   254                 dest[i] = MM::store(MM::add(s, MM::byte_mul(MM::load(dest[i]), ia)));
       
   255             }
       
   256         }
       
   257     } else {
       
   258         m64 ca = MM::load_alpha(const_alpha);
       
   259         for (int i = 0; i < length; ++i) {
       
   260             m64 s = MM::byte_mul(MM::load(src[i]), ca);
       
   261             m64 ia = MM::negate(MM::alpha(s));
       
   262             dest[i] = MM::store(MM::add(s, MM::byte_mul(MM::load(dest[i]), ia)));
       
   263         }
       
   264     }
       
   265     MM::end();
       
   266 }
       
   267 
       
   268 /*
       
   269   result = d + s * dia
       
   270   dest = (d + s * dia) * ca + d * cia
       
   271        = d + s * dia * ca
       
   272 */
       
   273 template <class MM>
       
   274 static void QT_FASTCALL comp_func_solid_DestinationOver(uint *dest, int length, uint src, uint const_alpha)
       
   275 {
       
   276     C_FF; C_80; C_00;
       
   277     m64 s = MM::load(src);
       
   278     if (const_alpha != 255)
       
   279         s = MM::byte_mul(s, MM::load_alpha(const_alpha));
       
   280 
       
   281     for (int i = 0; i < length; ++i) {
       
   282         m64 d = MM::load(dest[i]);
       
   283         m64 dia = MM::negate(MM::alpha(d));
       
   284         dest[i] = MM::store(MM::add(d, MM::byte_mul(s, dia)));
       
   285     }
       
   286     MM::end();
       
   287 }
       
   288 
       
   289 template <class MM>
       
   290 static void QT_FASTCALL comp_func_DestinationOver(uint *dest, const uint *src, int length, uint const_alpha)
       
   291 {
       
   292     C_FF; C_80; C_00;
       
   293     if (const_alpha == 255) {
       
   294         for (int i = 0; i < length; ++i) {
       
   295             m64 d = MM::load(dest[i]);
       
   296             m64 ia = MM::negate(MM::alpha(d));
       
   297             dest[i] = MM::store(MM::add(d, MM::byte_mul(MM::load(src[i]), ia)));
       
   298         }
       
   299     } else {
       
   300         m64 ca = MM::load_alpha(const_alpha);
       
   301         for (int i = 0; i < length; ++i) {
       
   302             m64 d = MM::load(dest[i]);
       
   303             m64 dia = MM::negate(MM::alpha(d));
       
   304             dia = MM::byte_mul(dia, ca);
       
   305             dest[i] = MM::store(MM::add(d, MM::byte_mul(MM::load(src[i]), dia)));
       
   306         }
       
   307     }
       
   308     MM::end();
       
   309 }
       
   310 
       
   311 /*
       
   312   result = s * da
       
   313   dest = s * da * ca + d * cia
       
   314 */
       
   315 template <class MM>
       
   316 static void QT_FASTCALL comp_func_solid_SourceIn(uint *dest, int length, uint src, uint const_alpha)
       
   317 {
       
   318     C_80; C_00;
       
   319     if (const_alpha == 255) {
       
   320         m64 s = MM::load(src);
       
   321         for (int i = 0; i < length; ++i) {
       
   322             m64 da = MM::alpha(MM::load(dest[i]));
       
   323             dest[i] = MM::store(MM::byte_mul(s, da));
       
   324         }
       
   325     } else {
       
   326         C_FF;
       
   327         m64 s = MM::load(src);
       
   328         m64 ca = MM::load_alpha(const_alpha);
       
   329         s = MM::byte_mul(s, ca);
       
   330         m64 cia = MM::negate(ca);
       
   331         for (int i = 0; i < length; ++i) {
       
   332             m64 d = MM::load(dest[i]);
       
   333             dest[i] = MM::store(MM::interpolate_pixel_255(s, MM::alpha(d), d, cia));
       
   334         }
       
   335     }
       
   336     MM::end();
       
   337 }
       
   338 
       
   339 template <class MM>
       
   340 static void QT_FASTCALL comp_func_SourceIn(uint *dest, const uint *src, int length, uint const_alpha)
       
   341 {
       
   342     C_FF; C_80; C_00;
       
   343     if (const_alpha == 255) {
       
   344         for (int i = 0; i < length; ++i) {
       
   345             m64 a = MM::alpha(MM::load(dest[i]));
       
   346             dest[i] = MM::store(MM::byte_mul(MM::load(src[i]), a));
       
   347         }
       
   348     } else {
       
   349         m64 ca = MM::load_alpha(const_alpha);
       
   350         m64 cia = MM::negate(ca);
       
   351         for (int i = 0; i < length; ++i) {
       
   352             m64 d = MM::load(dest[i]);
       
   353             m64 da = MM::byte_mul(MM::alpha(d), ca);
       
   354             dest[i] = MM::store(MM::interpolate_pixel_255(
       
   355                                    MM::load(src[i]), da, d, cia));
       
   356         }
       
   357     }
       
   358     MM::end();
       
   359 }
       
   360 
       
   361 /*
       
   362   result = d * sa
       
   363   dest = d * sa * ca + d * cia
       
   364        = d * (sa * ca + cia)
       
   365 */
       
   366 template <class MM>
       
   367 static void QT_FASTCALL comp_func_solid_DestinationIn(uint *dest, int length, uint src, uint const_alpha)
       
   368 {
       
   369     C_80; C_00;
       
   370     m64 a = MM::alpha(MM::load(src));
       
   371     if (const_alpha != 255) {
       
   372         C_FF;
       
   373         m64 ca = MM::load_alpha(const_alpha);
       
   374         m64 cia = MM::negate(ca);
       
   375         a = MM::byte_mul(a, ca);
       
   376         a = MM::add(a, cia);
       
   377     }
       
   378     for (int i = 0; i < length; ++i)
       
   379         dest[i] = MM::store(MM::byte_mul(MM::load(dest[i]), a));
       
   380     MM::end();
       
   381 }
       
   382 
       
   383 template <class MM>
       
   384 static void QT_FASTCALL comp_func_DestinationIn(uint *dest, const uint *src, int length, uint const_alpha)
       
   385 {
       
   386     C_FF; C_80; C_00;
       
   387     if (const_alpha == 255) {
       
   388         for (int i = 0; i < length; ++i) {
       
   389             m64 a = MM::alpha(MM::load(src[i]));
       
   390             dest[i] = MM::store(MM::byte_mul(MM::load(dest[i]), a));
       
   391         }
       
   392     } else {
       
   393         m64 ca = MM::load_alpha(const_alpha);
       
   394         m64 cia = MM::negate(ca);
       
   395         for (int i = 0; i < length; ++i) {
       
   396             m64 d = MM::load(dest[i]);
       
   397             m64 a = MM::alpha(MM::load(src[i]));
       
   398             a = MM::byte_mul(a, ca);
       
   399             a = MM::add(a, cia);
       
   400             dest[i] = MM::store(MM::byte_mul(d, a));
       
   401         }
       
   402     }
       
   403     MM::end();
       
   404 }
       
   405 
       
   406 /*
       
   407   result = s * dia
       
   408   dest = s * dia * ca + d * cia
       
   409 */
       
   410 template <class MM>
       
   411 static void QT_FASTCALL comp_func_solid_SourceOut(uint *dest, int length, uint src, uint const_alpha)
       
   412 {
       
   413     C_FF; C_80; C_00;
       
   414     m64 s = MM::load(src);
       
   415     if (const_alpha == 255) {
       
   416         for (int i = 0; i < length; ++i) {
       
   417             m64 dia = MM::negate(MM::alpha(MM::load(dest[i])));
       
   418             dest[i] = MM::store(MM::byte_mul(s, dia));
       
   419         }
       
   420     } else {
       
   421         m64 ca = MM::load_alpha(const_alpha);
       
   422         m64 cia = MM::negate(ca);
       
   423         s = MM::byte_mul(s, ca);
       
   424         for (int i = 0; i < length; ++i) {
       
   425             m64 d = MM::load(dest[i]);
       
   426             dest[i] = MM::store(MM::interpolate_pixel_255(s, MM::negate(MM::alpha(d)), d, cia));
       
   427         }
       
   428     }
       
   429     MM::end();
       
   430 }
       
   431 
       
   432 template <class MM>
       
   433 static void QT_FASTCALL comp_func_SourceOut(uint *dest, const uint *src, int length, uint const_alpha)
       
   434 {
       
   435     C_FF; C_80; C_00;
       
   436     if (const_alpha == 255) {
       
   437         for (int i = 0; i < length; ++i) {
       
   438             m64 ia = MM::negate(MM::alpha(MM::load(dest[i])));
       
   439             dest[i] = MM::store(MM::byte_mul(MM::load(src[i]), ia));
       
   440         }
       
   441     } else {
       
   442         m64 ca = MM::load_alpha(const_alpha);
       
   443         m64 cia = MM::negate(ca);
       
   444         for (int i = 0; i < length; ++i) {
       
   445             m64 d = MM::load(dest[i]);
       
   446             m64 dia = MM::byte_mul(MM::negate(MM::alpha(d)), ca);
       
   447             dest[i] = MM::store(MM::interpolate_pixel_255(MM::load(src[i]), dia, d, cia));
       
   448         }
       
   449     }
       
   450     MM::end();
       
   451 }
       
   452 
       
   453 /*
       
   454   result = d * sia
       
   455   dest = d * sia * ca + d * cia
       
   456        = d * (sia * ca + cia)
       
   457 */
       
   458 template <class MM>
       
   459 static void QT_FASTCALL comp_func_solid_DestinationOut(uint *dest, int length, uint src, uint const_alpha)
       
   460 {
       
   461     C_FF; C_80; C_00;
       
   462     m64 a = MM::negate(MM::alpha(MM::load(src)));
       
   463     if (const_alpha != 255) {
       
   464         m64 ca = MM::load_alpha(const_alpha);
       
   465         a = MM::byte_mul(a, ca);
       
   466         a = MM::add(a, MM::negate(ca));
       
   467     }
       
   468     for (int i = 0; i < length; ++i)
       
   469         dest[i] = MM::store(MM::byte_mul(MM::load(dest[i]), a));
       
   470     MM::end();
       
   471 }
       
   472 
       
   473 template <class MM>
       
   474 static void QT_FASTCALL comp_func_DestinationOut(uint *dest, const uint *src, int length, uint const_alpha)
       
   475 {
       
   476     C_FF; C_80; C_00;
       
   477     if (const_alpha == 255) {
       
   478         for (int i = 0; i < length; ++i) {
       
   479             m64 a = MM::negate(MM::alpha(MM::load(src[i])));
       
   480             dest[i] = MM::store(MM::byte_mul(MM::load(dest[i]), a));
       
   481         }
       
   482     } else {
       
   483         m64 ca = MM::load_alpha(const_alpha);
       
   484         m64 cia = MM::negate(ca);
       
   485         for (int i = 0; i < length; ++i) {
       
   486             m64 d = MM::load(dest[i]);
       
   487             m64 a = MM::negate(MM::alpha(MM::load(src[i])));
       
   488             a = MM::byte_mul(a, ca);
       
   489             a = MM::add(a, cia);
       
   490             dest[i] = MM::store(MM::byte_mul(d, a));
       
   491         }
       
   492     }
       
   493     MM::end();
       
   494 }
       
   495 
       
   496 /*
       
   497   result = s*da + d*sia
       
   498   dest = s*da*ca + d*sia*ca + d *cia
       
   499        = s*ca * da + d * (sia*ca + cia)
       
   500        = s*ca * da + d * (1 - sa*ca)
       
   501 */
       
   502 template <class MM>
       
   503 static void QT_FASTCALL comp_func_solid_SourceAtop(uint *dest, int length, uint src, uint const_alpha)
       
   504 {
       
   505     C_FF; C_80; C_00;
       
   506     m64 s = MM::load(src);
       
   507     if (const_alpha != 255) {
       
   508         m64 ca = MM::load_alpha(const_alpha);
       
   509         s = MM::byte_mul(s, ca);
       
   510     }
       
   511     m64 a = MM::negate(MM::alpha(s));
       
   512     for (int i = 0; i < length; ++i) {
       
   513         m64 d = MM::load(dest[i]);
       
   514         dest[i] = MM::store(MM::interpolate_pixel_255(s, MM::alpha(d), d, a));
       
   515     }
       
   516     MM::end();
       
   517 }
       
   518 
       
   519 template <class MM>
       
   520 static void QT_FASTCALL comp_func_SourceAtop(uint *dest, const uint *src, int length, uint const_alpha)
       
   521 {
       
   522     C_FF; C_80; C_00;
       
   523     if (const_alpha == 255) {
       
   524         for (int i = 0; i < length; ++i) {
       
   525             m64 s = MM::load(src[i]);
       
   526             m64 d = MM::load(dest[i]);
       
   527             dest[i] = MM::store(MM::interpolate_pixel_255(s, MM::alpha(d), d,
       
   528                                                         MM::negate(MM::alpha(s))));
       
   529         }
       
   530     } else {
       
   531         m64 ca = MM::load_alpha(const_alpha);
       
   532         for (int i = 0; i < length; ++i) {
       
   533             m64 s = MM::load(src[i]);
       
   534             s = MM::byte_mul(s, ca);
       
   535             m64 d = MM::load(dest[i]);
       
   536             dest[i] = MM::store(MM::interpolate_pixel_255(s, MM::alpha(d), d,
       
   537                                                         MM::negate(MM::alpha(s))));
       
   538         }
       
   539     }
       
   540     MM::end();
       
   541 }
       
   542 
       
   543 /*
       
   544   result = d*sa + s*dia
       
   545   dest = d*sa*ca + s*dia*ca + d *cia
       
   546        = s*ca * dia + d * (sa*ca + cia)
       
   547 */
       
   548 template <class MM>
       
   549 static void QT_FASTCALL comp_func_solid_DestinationAtop(uint *dest, int length, uint src, uint const_alpha)
       
   550 {
       
   551     C_FF; C_80; C_00;
       
   552     m64 s = MM::load(src);
       
   553     m64 a = MM::alpha(s);
       
   554     if (const_alpha != 255) {
       
   555         m64 ca = MM::load_alpha(const_alpha);
       
   556         s = MM::byte_mul(s, ca);
       
   557         a = MM::alpha(s);
       
   558         a = MM::add(a, MM::negate(ca));
       
   559     }
       
   560     for (int i = 0; i < length; ++i) {
       
   561         m64 d = MM::load(dest[i]);
       
   562         dest[i] = MM::store(MM::interpolate_pixel_255(s, MM::negate(MM::alpha(d)), d, a));
       
   563     }
       
   564     MM::end();
       
   565 }
       
   566 
       
   567 template <class MM>
       
   568 static void QT_FASTCALL comp_func_DestinationAtop(uint *dest, const uint *src, int length, uint const_alpha)
       
   569 {
       
   570     C_FF; C_80; C_00;
       
   571     if (const_alpha == 255) {
       
   572         for (int i = 0; i < length; ++i) {
       
   573             m64 s = MM::load(src[i]);
       
   574             m64 d = MM::load(dest[i]);
       
   575             dest[i] = MM::store(MM::interpolate_pixel_255(d, MM::alpha(s), s,
       
   576                                                         MM::negate(MM::alpha(d))));
       
   577         }
       
   578     } else {
       
   579         m64 ca = MM::load_alpha(const_alpha);
       
   580         for (int i = 0; i < length; ++i) {
       
   581             m64 s = MM::load(src[i]);
       
   582             s = MM::byte_mul(s, ca);
       
   583             m64 d = MM::load(dest[i]);
       
   584             m64 a = MM::alpha(s);
       
   585             a = MM::add(a, MM::negate(ca));
       
   586             dest[i] = MM::store(MM::interpolate_pixel_255(s, MM::negate(MM::alpha(d)),
       
   587                                                         d, a));
       
   588         }
       
   589     }
       
   590     MM::end();
       
   591 }
       
   592 
       
   593 /*
       
   594   result = d*sia + s*dia
       
   595   dest = d*sia*ca + s*dia*ca + d *cia
       
   596        = s*ca * dia + d * (sia*ca + cia)
       
   597        = s*ca * dia + d * (1 - sa*ca)
       
   598 */
       
   599 template <class MM>
       
   600 static void QT_FASTCALL comp_func_solid_XOR(uint *dest, int length, uint src, uint const_alpha)
       
   601 {
       
   602     C_FF; C_80; C_00;
       
   603     m64 s = MM::load(src);
       
   604     if (const_alpha != 255) {
       
   605         m64 ca = MM::load_alpha(const_alpha);
       
   606         s = MM::byte_mul(s, ca);
       
   607     }
       
   608     m64 a = MM::negate(MM::alpha(s));
       
   609     for (int i = 0; i < length; ++i) {
       
   610         m64 d = MM::load(dest[i]);
       
   611         dest[i] = MM::store(MM::interpolate_pixel_255(s, MM::negate(MM::alpha(d)),
       
   612                                                     d, a));
       
   613     }
       
   614     MM::end();
       
   615 }
       
   616 
       
   617 template <class MM>
       
   618 static void QT_FASTCALL comp_func_XOR(uint *dest, const uint *src, int length, uint const_alpha)
       
   619 {
       
   620     C_FF; C_80; C_00;
       
   621     if (const_alpha == 255) {
       
   622         for (int i = 0; i < length; ++i) {
       
   623             m64 s = MM::load(src[i]);
       
   624             m64 d = MM::load(dest[i]);
       
   625             dest[i] = MM::store(MM::interpolate_pixel_255(s, MM::negate(MM::alpha(d)),
       
   626                                                         d, MM::negate(MM::alpha(s))));
       
   627         }
       
   628     } else {
       
   629         m64 ca = MM::load_alpha(const_alpha);
       
   630         for (int i = 0; i < length; ++i) {
       
   631             m64 s = MM::load(src[i]);
       
   632             s = MM::byte_mul(s, ca);
       
   633             m64 d = MM::load(dest[i]);
       
   634             dest[i] = MM::store(MM::interpolate_pixel_255(s, MM::negate(MM::alpha(d)),
       
   635                                                         d, MM::negate(MM::alpha(s))));
       
   636         }
       
   637     }
       
   638     MM::end();
       
   639 }
       
   640 
       
   641 template <class MM>
       
   642 static void QT_FASTCALL rasterop_solid_SourceOrDestination(uint *dest,
       
   643                                                            int length,
       
   644                                                            uint color,
       
   645                                                            uint const_alpha)
       
   646 {
       
   647     Q_UNUSED(const_alpha);
       
   648 
       
   649     if ((quintptr)(dest) & 0x7) {
       
   650         *dest++ |= color;
       
   651         --length;
       
   652     }
       
   653 
       
   654     const int length64 = length / 2;
       
   655     if (length64) {
       
   656         __m64 *dst64 = reinterpret_cast<__m64*>(dest);
       
   657         const __m64 color64 = _mm_set_pi32(color, color);
       
   658 
       
   659         int n = (length64 + 3) / 4;
       
   660         switch (length64 & 0x3) {
       
   661         case 0: do { *dst64 = _mm_or_si64(*dst64, color64); ++dst64;
       
   662         case 3:      *dst64 = _mm_or_si64(*dst64, color64); ++dst64;
       
   663         case 2:      *dst64 = _mm_or_si64(*dst64, color64); ++dst64;
       
   664         case 1:      *dst64 = _mm_or_si64(*dst64, color64); ++dst64;
       
   665         } while (--n > 0);
       
   666         }
       
   667     }
       
   668 
       
   669     if (length & 0x1) {
       
   670         dest[length - 1] |= color;
       
   671     }
       
   672 
       
   673     MM::end();
       
   674 }
       
   675 
       
   676 template <class MM>
       
   677 static void QT_FASTCALL rasterop_solid_SourceAndDestination(uint *dest,
       
   678                                                             int length,
       
   679                                                             uint color,
       
   680                                                             uint const_alpha)
       
   681 {
       
   682     Q_UNUSED(const_alpha);
       
   683 
       
   684     color |= 0xff000000;
       
   685 
       
   686     if ((quintptr)(dest) & 0x7) { // align
       
   687         *dest++ &= color;
       
   688         --length;
       
   689     }
       
   690 
       
   691     const int length64 = length / 2;
       
   692     if (length64) {
       
   693         __m64 *dst64 = reinterpret_cast<__m64*>(dest);
       
   694         const __m64 color64 = _mm_set_pi32(color, color);
       
   695 
       
   696         int n = (length64 + 3) / 4;
       
   697         switch (length64 & 0x3) {
       
   698         case 0: do { *dst64 = _mm_and_si64(*dst64, color64); ++dst64;
       
   699         case 3:      *dst64 = _mm_and_si64(*dst64, color64); ++dst64;
       
   700         case 2:      *dst64 = _mm_and_si64(*dst64, color64); ++dst64;
       
   701         case 1:      *dst64 = _mm_and_si64(*dst64, color64); ++dst64;
       
   702         } while (--n > 0);
       
   703         }
       
   704     }
       
   705 
       
   706     if (length & 0x1) {
       
   707         dest[length - 1] &= color;
       
   708     }
       
   709 
       
   710     MM::end();
       
   711 }
       
   712 
       
   713 template <class MM>
       
   714 static void QT_FASTCALL rasterop_solid_SourceXorDestination(uint *dest,
       
   715                                                             int length,
       
   716                                                             uint color,
       
   717                                                             uint const_alpha)
       
   718 {
       
   719     Q_UNUSED(const_alpha);
       
   720 
       
   721     color &= 0x00ffffff;
       
   722 
       
   723     if ((quintptr)(dest) & 0x7) {
       
   724         *dest++ ^= color;
       
   725         --length;
       
   726     }
       
   727 
       
   728     const int length64 = length / 2;
       
   729     if (length64) {
       
   730         __m64 *dst64 = reinterpret_cast<__m64*>(dest);
       
   731         const __m64 color64 = _mm_set_pi32(color, color);
       
   732 
       
   733         int n = (length64 + 3) / 4;
       
   734         switch (length64 & 0x3) {
       
   735         case 0: do { *dst64 = _mm_xor_si64(*dst64, color64); ++dst64;
       
   736         case 3:      *dst64 = _mm_xor_si64(*dst64, color64); ++dst64;
       
   737         case 2:      *dst64 = _mm_xor_si64(*dst64, color64); ++dst64;
       
   738         case 1:      *dst64 = _mm_xor_si64(*dst64, color64); ++dst64;
       
   739         } while (--n > 0);
       
   740         }
       
   741     }
       
   742 
       
   743     if (length & 0x1) {
       
   744         dest[length - 1] ^= color;
       
   745     }
       
   746 
       
   747     MM::end();
       
   748 }
       
   749 
       
   750 template <class MM>
       
   751 static void QT_FASTCALL rasterop_solid_SourceAndNotDestination(uint *dest,
       
   752                                                                int length,
       
   753                                                                uint color,
       
   754                                                                uint const_alpha)
       
   755 {
       
   756 
       
   757     Q_UNUSED(const_alpha);
       
   758 
       
   759     if ((quintptr)(dest) & 0x7) {
       
   760         *dest = (color & ~(*dest)) | 0xff000000;
       
   761         ++dest;
       
   762         --length;
       
   763     }
       
   764 
       
   765     const int length64 = length / 2;
       
   766     if (length64) {
       
   767         __m64 *dst64 = reinterpret_cast<__m64*>(dest);
       
   768         const __m64 color64 = _mm_set_pi32(color, color);
       
   769         const m64 mmx_0xff000000 = _mm_set1_pi32(0xff000000);
       
   770         __m64 tmp1, tmp2, tmp3, tmp4;
       
   771 
       
   772         int n = (length64 + 3) / 4;
       
   773         switch (length64 & 0x3) {
       
   774         case 0: do { tmp1 = _mm_andnot_si64(*dst64, color64);
       
   775                      *dst64++ = _mm_or_si64(tmp1, mmx_0xff000000);
       
   776         case 3:      tmp2 = _mm_andnot_si64(*dst64, color64);
       
   777                      *dst64++ = _mm_or_si64(tmp2, mmx_0xff000000);
       
   778         case 2:      tmp3 = _mm_andnot_si64(*dst64, color64);
       
   779                      *dst64++ = _mm_or_si64(tmp3, mmx_0xff000000);
       
   780         case 1:      tmp4 = _mm_andnot_si64(*dst64, color64);
       
   781                      *dst64++ = _mm_or_si64(tmp4, mmx_0xff000000);
       
   782         } while (--n > 0);
       
   783         }
       
   784     }
       
   785 
       
   786     if (length & 0x1) {
       
   787         dest[length - 1] = (color & ~(dest[length - 1])) | 0xff000000;
       
   788     }
       
   789 
       
   790     MM::end();
       
   791 }
       
   792 
       
   793 template <class MM>
       
   794 static void QT_FASTCALL rasterop_solid_NotSourceAndNotDestination(uint *dest,
       
   795                                                                   int length,
       
   796                                                                   uint color,
       
   797                                                                   uint const_alpha)
       
   798 {
       
   799     rasterop_solid_SourceAndNotDestination<MM>(dest, length,
       
   800                                                ~color, const_alpha);
       
   801 }
       
   802 
       
   803 template <class MM>
       
   804 static void QT_FASTCALL rasterop_solid_NotSourceOrNotDestination(uint *dest,
       
   805                                                                  int length,
       
   806                                                                  uint color,
       
   807                                                                  uint const_alpha)
       
   808 {
       
   809     Q_UNUSED(const_alpha);
       
   810     color = ~color | 0xff000000;
       
   811     while (length--) {
       
   812         *dest = color | ~(*dest);
       
   813         ++dest;
       
   814     }
       
   815 }
       
   816 
       
   817 template <class MM>
       
   818 static void QT_FASTCALL rasterop_solid_NotSourceXorDestination(uint *dest,
       
   819                                                                int length,
       
   820                                                                uint color,
       
   821                                                                uint const_alpha)
       
   822 {
       
   823     rasterop_solid_SourceXorDestination<MM>(dest, length, ~color, const_alpha);
       
   824 }
       
   825 
       
   826 template <class MM>
       
   827 static void QT_FASTCALL rasterop_solid_NotSource(uint *dest, int length,
       
   828                                                  uint color, uint const_alpha)
       
   829 {
       
   830     Q_UNUSED(const_alpha);
       
   831     qt_memfill((quint32*)dest, ~color | 0xff000000, length);
       
   832 }
       
   833 
       
   834 template <class MM>
       
   835 static void QT_FASTCALL rasterop_solid_NotSourceAndDestination(uint *dest,
       
   836                                                                int length,
       
   837                                                                uint color,
       
   838                                                                uint const_alpha)
       
   839 {
       
   840     rasterop_solid_SourceAndDestination<MM>(dest, length,
       
   841                                             ~color, const_alpha);
       
   842 }
       
   843 
       
   844 template <class MM>
       
   845 static inline void qt_blend_color_argb_x86(int count, const QSpan *spans,
       
   846                                            void *userData,
       
   847                                            CompositionFunctionSolid *solidFunc)
       
   848 {
       
   849     QSpanData *data = reinterpret_cast<QSpanData *>(userData);
       
   850     if (data->rasterBuffer->compositionMode == QPainter::CompositionMode_Source
       
   851         || (data->rasterBuffer->compositionMode == QPainter::CompositionMode_SourceOver
       
   852             && qAlpha(data->solid.color) == 255)) {
       
   853         // inline for performance
       
   854         C_FF; C_80; C_00;
       
   855         while (count--) {
       
   856             uint *target = ((uint *)data->rasterBuffer->scanLine(spans->y)) + spans->x;
       
   857             if (spans->coverage == 255) {
       
   858                 qt_memfill(static_cast<quint32*>(target), quint32(data->solid.color), spans->len);
       
   859             } else {
       
   860                 // dest = s * ca + d * (1 - sa*ca) --> dest = s * ca + d * (1-ca)
       
   861                 m64 ca = MM::load_alpha(spans->coverage);
       
   862                 m64 s = MM::byte_mul(MM::load(data->solid.color), ca);
       
   863                 m64 ica = MM::negate(ca);
       
   864                 for (int i = 0; i < spans->len; ++i)
       
   865                     target[i] = MM::store(MM::add(s, MM::byte_mul(MM::load(target[i]), ica)));
       
   866             }
       
   867             ++spans;
       
   868         }
       
   869         MM::end();
       
   870         return;
       
   871     }
       
   872     CompositionFunctionSolid func = solidFunc[data->rasterBuffer->compositionMode];
       
   873     while (count--) {
       
   874         uint *target = ((uint *)data->rasterBuffer->scanLine(spans->y)) + spans->x;
       
   875         func(target, spans->len, data->solid.color, spans->coverage);
       
   876         ++spans;
       
   877     }
       
   878 }
       
   879 
       
   880 #ifdef QT_HAVE_MMX
       
   881 struct QMMXIntrinsics : public QMMXCommonIntrinsics
       
   882 {
       
   883     static inline void end() {
       
   884 #if !defined(Q_OS_WINCE) || defined(_X86_)
       
   885        _mm_empty();
       
   886 #endif
       
   887     }
       
   888 };
       
   889 #endif // QT_HAVE_MMX
       
   890 
       
   891 QT_END_NAMESPACE
       
   892 
       
   893 #endif // QDRAWHELPER_MMX_P_H