genericopenlibs/liboil/src/splat_sse.c
changeset 18 47c74d1534e1
equal deleted inserted replaced
0:e4d67989cc36 18:47c74d1534e1
       
     1 /*
       
     2  * Copyright (c) 2005
       
     3  *	Eric Anholt.  All rights reserved.
       
     4  *
       
     5  * Redistribution and use in source and binary forms, with or without
       
     6  * modification, are permitted provided that the following conditions
       
     7  * are met:
       
     8  * 1. Redistributions of source code must retain the above copyright
       
     9  *    notice, this list of conditions and the following disclaimer.
       
    10  * 2. Redistributions in binary form must reproduce the above copyright
       
    11  *    notice, this list of conditions and the following disclaimer in the
       
    12  *    documentation and/or other materials provided with the distribution.
       
    13  *
       
    14  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND
       
    15  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
       
    16  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
       
    17  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR BE LIABLE
       
    18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
       
    19  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
       
    20  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
       
    21  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
       
    22  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
       
    23  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
       
    24  * SUCH DAMAGE.
       
    25  */
       
    26 //Portions Copyright (c)  2008-2009 Nokia Corporation and/or its subsidiary(-ies). All rights reserved. 
       
    27 
       
    28 #ifdef HAVE_CONFIG_H
       
    29 #include "config.h"
       
    30 #endif
       
    31 #include <liboil/liboilclasses.h>
       
    32 #include <liboil/liboilfunction.h>
       
    33 #include <emmintrin.h>
       
    34 
       
    35 #define SSE_FUNCTION __attribute__((force_align_arg_pointer))
       
    36 
       
    37 SSE_FUNCTION static void
       
    38 splat_u32_ns_sse (uint32_t *dest, const uint32_t *param, int n)
       
    39 {
       
    40   __m128i v;
       
    41 
       
    42   v = _mm_set1_epi32(*param);
       
    43 
       
    44   for (; ((long)dest & 15) && (n > 0); n--) {
       
    45     *dest++ = *param;
       
    46   }
       
    47   for (; n >= 4; n -= 4) {
       
    48     _mm_store_si128((__m128i *)dest, v);
       
    49     dest += 4;
       
    50   }
       
    51   for (; n > 0; n--) {
       
    52     *dest++ = *param;
       
    53   }
       
    54 }
       
    55 OIL_DEFINE_IMPL_FULL (splat_u32_ns_sse, splat_u32_ns, OIL_IMPL_FLAG_SSE2);
       
    56 
       
    57 SSE_FUNCTION static void
       
    58 splat_u32_ns_sse_unroll2 (uint32_t *dest, const uint32_t *param, int n)
       
    59 {
       
    60   __m128i v;
       
    61 
       
    62   v = _mm_set1_epi32(*param);
       
    63 
       
    64   for (; ((long)dest & 15) && (n > 0); n--) {
       
    65     *dest++ = *param;
       
    66   }
       
    67   for (; n >= 8; n -= 8) {
       
    68     _mm_store_si128((__m128i *)dest, v);
       
    69     _mm_store_si128((__m128i *)(dest + 4), v);
       
    70     dest += 8;
       
    71   }
       
    72   for (; n >= 4; n -= 4) {
       
    73     _mm_store_si128((__m128i *)dest, v);
       
    74     dest += 4;
       
    75   }
       
    76   for (; n > 0; n--) {
       
    77     *dest++ = *param;
       
    78   }
       
    79 }
       
    80 OIL_DEFINE_IMPL_FULL (splat_u32_ns_sse_unroll2, splat_u32_ns, OIL_IMPL_FLAG_SSE2);
       
    81 
       
    82 SSE_FUNCTION static void
       
    83 splat_u8_ns_sse (uint8_t *dest, const uint8_t *param, int n)
       
    84 {
       
    85   __m128i v;
       
    86 
       
    87   v = _mm_set1_epi8(*param);
       
    88 
       
    89   for (; ((long)dest & 15) && (n > 0); n--) {
       
    90     *dest++ = *param;
       
    91   }
       
    92   for (; n >= 16; n -= 16) {
       
    93     _mm_store_si128((__m128i *)dest, v);
       
    94     dest += 16;
       
    95   }
       
    96   for (; n > 0; n--) {
       
    97     *dest++ = *param;
       
    98   }
       
    99 }
       
   100 OIL_DEFINE_IMPL_FULL (splat_u8_ns_sse, splat_u8_ns, OIL_IMPL_FLAG_SSE2);
       
   101 
       
   102 SSE_FUNCTION static void
       
   103 splat_u8_ns_sse_unroll2 (uint8_t *dest, const uint8_t *param, int n)
       
   104 {
       
   105   __m128i v;
       
   106 
       
   107   v = _mm_set1_epi8(*param);
       
   108 
       
   109   for (; ((long)dest & 15) && (n > 0); n--) {
       
   110     *dest++ = *param;
       
   111   }
       
   112   for (; n >= 32; n -= 32) {
       
   113     _mm_store_si128((__m128i *)dest, v);
       
   114     _mm_store_si128((__m128i *)(dest + 16), v);
       
   115     dest += 32;
       
   116   }
       
   117   for (; n >= 16; n -= 16) {
       
   118     _mm_store_si128((__m128i *)dest, v);
       
   119     dest += 16;
       
   120   }
       
   121   for (; n > 0; n--) {
       
   122     *dest++ = *param;
       
   123   }
       
   124 }
       
   125 OIL_DEFINE_IMPL_FULL (splat_u8_ns_sse_unroll2, splat_u8_ns, OIL_IMPL_FLAG_SSE2);
       
   126 
       
   127 
       
   128 #ifdef	__SYMBIAN32__
       
   129  
       
   130 OilFunctionImpl* __oil_function_impl_splat_u32_ns_sse, splat_u32_ns() {
       
   131 		return &_oil_function_impl_splat_u32_ns_sse, splat_u32_ns;
       
   132 }
       
   133 #endif
       
   134 
       
   135 #ifdef	__SYMBIAN32__
       
   136  
       
   137 OilFunctionImpl* __oil_function_impl_splat_u32_ns_sse_unroll2, splat_u32_ns() {
       
   138 		return &_oil_function_impl_splat_u32_ns_sse_unroll2, splat_u32_ns;
       
   139 }
       
   140 #endif
       
   141 
       
   142 #ifdef	__SYMBIAN32__
       
   143  
       
   144 OilFunctionImpl* __oil_function_impl_splat_u8_ns_sse, splat_u8_ns() {
       
   145 		return &_oil_function_impl_splat_u8_ns_sse, splat_u8_ns;
       
   146 }
       
   147 #endif
       
   148 
       
   149 #ifdef	__SYMBIAN32__
       
   150  
       
   151 OilFunctionImpl* __oil_function_impl_splat_u8_ns_sse_unroll2, splat_u8_ns() {
       
   152 		return &_oil_function_impl_splat_u8_ns_sse_unroll2, splat_u8_ns;
       
   153 }
       
   154 #endif
       
   155