src/gui/painting/qdrawhelper_sse2.cpp
changeset 0 1918ee327afb
child 4 3b1da2848fc7
equal deleted inserted replaced
-1:000000000000 0:1918ee327afb
       
     1 /****************************************************************************
       
     2 **
       
     3 ** Copyright (C) 2009 Nokia Corporation and/or its subsidiary(-ies).
       
     4 ** All rights reserved.
       
     5 ** Contact: Nokia Corporation (qt-info@nokia.com)
       
     6 **
       
     7 ** This file is part of the QtGui module of the Qt Toolkit.
       
     8 **
       
     9 ** $QT_BEGIN_LICENSE:LGPL$
       
    10 ** No Commercial Usage
       
    11 ** This file contains pre-release code and may not be distributed.
       
    12 ** You may use this file in accordance with the terms and conditions
       
    13 ** contained in the Technology Preview License Agreement accompanying
       
    14 ** this package.
       
    15 **
       
    16 ** GNU Lesser General Public License Usage
       
    17 ** Alternatively, this file may be used under the terms of the GNU Lesser
       
    18 ** General Public License version 2.1 as published by the Free Software
       
    19 ** Foundation and appearing in the file LICENSE.LGPL included in the
       
    20 ** packaging of this file.  Please review the following information to
       
    21 ** ensure the GNU Lesser General Public License version 2.1 requirements
       
    22 ** will be met: http://www.gnu.org/licenses/old-licenses/lgpl-2.1.html.
       
    23 **
       
    24 ** In addition, as a special exception, Nokia gives you certain additional
       
    25 ** rights.  These rights are described in the Nokia Qt LGPL Exception
       
    26 ** version 1.1, included in the file LGPL_EXCEPTION.txt in this package.
       
    27 **
       
    28 ** If you have questions regarding the use of this file, please contact
       
    29 ** Nokia at qt-info@nokia.com.
       
    30 **
       
    31 **
       
    32 **
       
    33 **
       
    34 **
       
    35 **
       
    36 **
       
    37 **
       
    38 ** $QT_END_LICENSE$
       
    39 **
       
    40 ****************************************************************************/
       
    41 
       
    42 #include <private/qdrawhelper_x86_p.h>
       
    43 
       
    44 #ifdef QT_HAVE_SSE2
       
    45 
       
    46 #include <private/qpaintengine_raster_p.h>
       
    47 
       
    48 #ifdef QT_LINUXBASE
       
    49 // this is an evil hack - the posix_memalign declaration in LSB
       
    50 // is wrong - see http://bugs.linuxbase.org/show_bug.cgi?id=2431
       
    51 #  define posix_memalign _lsb_hack_posix_memalign
       
    52 #  include <emmintrin.h>
       
    53 #  undef posix_memalign
       
    54 #else
       
    55 #  include <emmintrin.h>
       
    56 #endif
       
    57 
       
    58 QT_BEGIN_NAMESPACE
       
    59 
       
    60 void qt_memfill32_sse2(quint32 *dest, quint32 value, int count)
       
    61 {
       
    62     if (count < 7) {
       
    63         switch (count) {
       
    64         case 6: *dest++ = value;
       
    65         case 5: *dest++ = value;
       
    66         case 4: *dest++ = value;
       
    67         case 3: *dest++ = value;
       
    68         case 2: *dest++ = value;
       
    69         case 1: *dest   = value;
       
    70         }
       
    71         return;
       
    72     };
       
    73 
       
    74     const int align = (quintptr)(dest) & 0xf;
       
    75     switch (align) {
       
    76     case 4:  *dest++ = value; --count;
       
    77     case 8:  *dest++ = value; --count;
       
    78     case 12: *dest++ = value; --count;
       
    79     }
       
    80 
       
    81     int count128 = count / 4;
       
    82     __m128i *dst128 = reinterpret_cast<__m128i*>(dest);
       
    83     const __m128i value128 = _mm_set_epi32(value, value, value, value);
       
    84 
       
    85     int n = (count128 + 3) / 4;
       
    86     switch (count128 & 0x3) {
       
    87     case 0: do { _mm_store_si128(dst128++, value128);
       
    88     case 3:      _mm_store_si128(dst128++, value128);
       
    89     case 2:      _mm_store_si128(dst128++, value128);
       
    90     case 1:      _mm_store_si128(dst128++, value128);
       
    91     } while (--n > 0);
       
    92     }
       
    93 
       
    94     const int rest = count & 0x3;
       
    95     if (rest) {
       
    96         switch (rest) {
       
    97         case 3: dest[count - 3] = value;
       
    98         case 2: dest[count - 2] = value;
       
    99         case 1: dest[count - 1] = value;
       
   100         }
       
   101     }
       
   102 }
       
   103 
       
   104 void qt_memfill16_sse2(quint16 *dest, quint16 value, int count)
       
   105 {
       
   106     if (count < 3) {
       
   107         switch (count) {
       
   108         case 2: *dest++ = value;
       
   109         case 1: *dest = value;
       
   110         }
       
   111         return;
       
   112     }
       
   113 
       
   114     const int align = (quintptr)(dest) & 0x3;
       
   115     switch (align) {
       
   116     case 2: *dest++ = value; --count;
       
   117     }
       
   118 
       
   119     const quint32 value32 = (value << 16) | value;
       
   120     qt_memfill32_sse2(reinterpret_cast<quint32*>(dest), value32, count / 2);
       
   121 
       
   122     if (count & 0x1)
       
   123         dest[count - 1] = value;
       
   124 }
       
   125 
       
   126 void qt_bitmapblit32_sse2(QRasterBuffer *rasterBuffer, int x, int y,
       
   127                           quint32 color,
       
   128                           const uchar *src, int width, int height, int stride)
       
   129 {
       
   130     quint32 *dest = reinterpret_cast<quint32*>(rasterBuffer->scanLine(y)) + x;
       
   131     const int destStride = rasterBuffer->bytesPerLine() / sizeof(quint32);
       
   132 
       
   133     const __m128i c128 = _mm_set1_epi32(color);
       
   134     const __m128i maskmask1 = _mm_set_epi32(0x10101010, 0x20202020,
       
   135                                             0x40404040, 0x80808080);
       
   136     const __m128i maskadd1 = _mm_set_epi32(0x70707070, 0x60606060,
       
   137                                            0x40404040, 0x00000000);
       
   138 
       
   139     if (width > 4) {
       
   140         const __m128i maskmask2 = _mm_set_epi32(0x01010101, 0x02020202,
       
   141                                                 0x04040404, 0x08080808);
       
   142         const __m128i maskadd2 = _mm_set_epi32(0x7f7f7f7f, 0x7e7e7e7e,
       
   143                                                0x7c7c7c7c, 0x78787878);
       
   144         while (height--) {
       
   145             for (int x = 0; x < width; x += 8) {
       
   146                 const quint8 s = src[x >> 3];
       
   147                 if (!s)
       
   148                     continue;
       
   149                 __m128i mask1 = _mm_set1_epi8(s);
       
   150                 __m128i mask2 = mask1;
       
   151 
       
   152                 mask1 = _mm_and_si128(mask1, maskmask1);
       
   153                 mask1 = _mm_add_epi8(mask1, maskadd1);
       
   154                 _mm_maskmoveu_si128(c128, mask1, (char*)(dest + x));
       
   155                 mask2 = _mm_and_si128(mask2, maskmask2);
       
   156                 mask2 = _mm_add_epi8(mask2, maskadd2);
       
   157                 _mm_maskmoveu_si128(c128, mask2, (char*)(dest + x + 4));
       
   158             }
       
   159             dest += destStride;
       
   160             src += stride;
       
   161         }
       
   162     } else {
       
   163         while (height--) {
       
   164             const quint8 s = *src;
       
   165             if (s) {
       
   166                 __m128i mask1 = _mm_set1_epi8(s);
       
   167                 mask1 = _mm_and_si128(mask1, maskmask1);
       
   168                 mask1 = _mm_add_epi8(mask1, maskadd1);
       
   169                 _mm_maskmoveu_si128(c128, mask1, (char*)(dest));
       
   170             }
       
   171             dest += destStride;
       
   172             src += stride;
       
   173         }
       
   174     }
       
   175 }
       
   176 
       
   177 void qt_bitmapblit16_sse2(QRasterBuffer *rasterBuffer, int x, int y,
       
   178                           quint32 color,
       
   179                           const uchar *src, int width, int height, int stride)
       
   180 {
       
   181     const quint16 c = qt_colorConvert<quint16, quint32>(color, 0);
       
   182     quint16 *dest = reinterpret_cast<quint16*>(rasterBuffer->scanLine(y)) + x;
       
   183     const int destStride = rasterBuffer->bytesPerLine() / sizeof(quint16);
       
   184 
       
   185     const __m128i c128 = _mm_set1_epi16(c);
       
   186 #if defined(Q_CC_MSVC)
       
   187 #  pragma warning(disable: 4309) // truncation of constant value
       
   188 #endif
       
   189     const __m128i maskmask = _mm_set_epi16(0x0101, 0x0202, 0x0404, 0x0808,
       
   190                                            0x1010, 0x2020, 0x4040, 0x8080);
       
   191     const __m128i maskadd = _mm_set_epi16(0x7f7f, 0x7e7e, 0x7c7c, 0x7878,
       
   192                                           0x7070, 0x6060, 0x4040, 0x0000);
       
   193 
       
   194     while (height--) {
       
   195         for (int x = 0; x < width; x += 8) {
       
   196             const quint8 s = src[x >> 3];
       
   197             if (!s)
       
   198                 continue;
       
   199             __m128i mask = _mm_set1_epi8(s);
       
   200             mask = _mm_and_si128(mask, maskmask);
       
   201             mask = _mm_add_epi8(mask, maskadd);
       
   202             _mm_maskmoveu_si128(c128, mask, (char*)(dest + x));
       
   203         }
       
   204         dest += destStride;
       
   205         src += stride;
       
   206     }
       
   207 }
       
   208 
       
   209 QT_END_NAMESPACE
       
   210 
       
   211 #endif // QT_HAVE_SSE2