genericopenlibs/liboil/src/clamp_sse.c
author William Roberts <williamr@symbian.org>
Fri, 23 Jul 2010 16:09:54 +0100
branchGCC_SURGE
changeset 47 d7383dba13ba
parent 18 47c74d1534e1
permissions -rw-r--r--
Reapply fix for EXPORT_C problem in backend.dll, which got lost in the merge - bug 2971

/*
 * Copyright (c) 2005
 *	Eric Anholt.  All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 */
//Portions Copyright (c)  2008-2009 Nokia Corporation and/or its subsidiary(-ies). All rights reserved. 

#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
#include "liboil/liboilclasses.h"
#include "liboil/liboilfunction.h"
#include <emmintrin.h>
#include <xmmintrin.h>

/* TODO: If we have gcc 4.2 or above, do this. Otherwise, disable all SSE use */
#define SSE_FUNCTION __attribute__((force_align_arg_pointer))

SSE_FUNCTION static void
clamp_u8_sse (uint8_t *dest, uint8_t *src1, int n, uint8_t *src2_1,
    uint8_t *src3_1)
{
  __m128i xmm1, xmm2;
  uint8_t min = *src2_1;
  uint8_t max = *src3_1;

  /* Initial operations to align the destination pointer */
  for (; ((long)dest & 15) && (n > 0); n--) {
    uint8_t x = *src1++;
    if (x < min)
      x = min;
    if (x > max)
      x = max;
    *dest++ = x;
  }
  xmm1 = _mm_set1_epi8(min);
  xmm2 = _mm_set1_epi8(max);
  for (; n >= 16; n -= 16) {
    __m128i xmm0;
    xmm0 = _mm_loadu_si128((__m128i *)src1);
    xmm0 = _mm_max_epu8(xmm0, xmm1);
    xmm0 = _mm_min_epu8(xmm0, xmm2);
    _mm_store_si128((__m128i *)dest, xmm0);
    dest += 16;
    src1 += 16;
  }
  for (; n > 0; n--) {
    uint8_t x = *src1++;
    if (x < min)
      x = min;
    if (x > max)
      x = max;
    *dest++ = x;
  }
}
OIL_DEFINE_IMPL_FULL (clamp_u8_sse, clamp_u8, OIL_IMPL_FLAG_SSE2);

SSE_FUNCTION static void
clamp_s16_sse (int16_t *dest, int16_t *src1, int n, int16_t *src2_1,
    int16_t *src3_1)
{
  __m128i xmm1, xmm2;
  int16_t min = *src2_1;
  int16_t max = *src3_1;

  /* Initial operations to align the destination pointer */
  for (; ((long)dest & 15) && (n > 0); n--) {
    int16_t x = *src1++;
    if (x < min)
      x = min;
    if (x > max)
      x = max;
    *dest++ = x;
  }
  xmm1 = _mm_set1_epi16(min);
  xmm2 = _mm_set1_epi16(max);
  for (; n >= 8; n -= 8) {
    __m128i xmm0;
    xmm0 = _mm_loadu_si128((__m128i *)src1);
    xmm0 = _mm_max_epi16(xmm0, xmm1);
    xmm0 = _mm_min_epi16(xmm0, xmm2);
    _mm_store_si128((__m128i *)dest, xmm0);
    dest += 8;
    src1 += 8;
  }
  for (; n > 0; n--) {
    int16_t x = *src1++;
    if (x < min)
      x = min;
    if (x > max)
      x = max;
    *dest++ = x;
  }
}
OIL_DEFINE_IMPL_FULL (clamp_s16_sse, clamp_s16, OIL_IMPL_FLAG_SSE2);

SSE_FUNCTION static void
clamp_f32_sse (float *dest, const float *src1, int n, const float *src2_1,
    const float *src3_1)
{
  __m128 xmm1, xmm2;
  float min = *src2_1;
  float max = *src3_1;

  /* Initial operations to align the destination pointer */
  for (; ((long)dest & 15) && (n > 0); n--) {
    float x = *src1++;
    if (x < min)
      x = min;
    if (x > max)
      x = max;
    *dest++ = x;
  }
  xmm1 = _mm_set_ps1(min);
  xmm2 = _mm_set_ps1(max);
  for (; n >= 4; n -= 4) {
    __m128 xmm0;
    xmm0 = _mm_loadu_ps(src1);
    xmm0 = _mm_max_ps(xmm0, xmm1);
    xmm0 = _mm_min_ps(xmm0, xmm2);
    _mm_store_ps(dest, xmm0);
    dest += 4;
    src1 += 4;
  }
  for (; n > 0; n--) {
    float x = *src1++;
    if (x < min)
      x = min;
    if (x > max)
      x = max;
    *dest++ = x;
  }
}
OIL_DEFINE_IMPL_FULL (clamp_f32_sse, clamp_f32, OIL_IMPL_FLAG_SSE);

SSE_FUNCTION static void
clamp_f64_sse (double *dest, const double *src1, int n, const double *src2_1,
    const double *src3_1)
{
  __m128d xmm1, xmm2;
  double min = *src2_1;
  double max = *src3_1;

  /* Initial operations to align the destination pointer */
  for (; ((long)dest & 15) && (n > 0); n--) {
    double x = *src1++;
    if (x < min)
      x = min;
    if (x > max)
      x = max;
    *dest++ = x;
  }
  xmm1 = _mm_set1_pd(min);
  xmm2 = _mm_set1_pd(max);
  for (; n >= 2; n -= 2) {
    __m128d xmm0;
    xmm0 = _mm_loadu_pd(src1);
    xmm0 = _mm_max_pd(xmm0, xmm1);
    xmm0 = _mm_min_pd(xmm0, xmm2);
    _mm_store_pd(dest, xmm0);
    dest += 2;
    src1 += 2;
  }
  for (; n > 0; n--) {
    double x = *src1++;
    if (x < min)
      x = min;
    if (x > max)
      x = max;
    *dest++ = x;
  }
}
OIL_DEFINE_IMPL_FULL (clamp_f64_sse, clamp_f64,
    OIL_IMPL_FLAG_SSE | OIL_IMPL_FLAG_SSE2);

SSE_FUNCTION static void
clamplow_u8_sse (uint8_t *dest, const uint8_t *src1, int n,
    const uint8_t *src2_1)
{
  __m128i xmm1;
  uint8_t min = *src2_1;

  /* Initial operations to align the destination pointer */
  for (; ((long)dest & 15) && (n > 0); n--) {
    uint8_t x = *src1++;
    if (x < min)
      x = min;
    *dest++ = x;
  }
  xmm1 = _mm_set1_epi8(min);
  for (; n >= 16; n -= 16) {
    __m128i xmm0;
    xmm0 = _mm_loadu_si128((__m128i *)src1);
    xmm0 = _mm_max_epu8(xmm0, xmm1);
    _mm_store_si128((__m128i *)dest, xmm0);
    dest += 16;
    src1 += 16;
  }
  for (; n > 0; n--) {
    uint8_t x = *src1++;
    if (x < min)
      x = min;
    *dest++ = x;
  }
}
OIL_DEFINE_IMPL_FULL (clamplow_u8_sse, clamplow_u8, OIL_IMPL_FLAG_SSE2);

SSE_FUNCTION static void
clamplow_s16_sse (int16_t *dest, const int16_t *src1, int n,
    const int16_t *src2_1)
{
  __m128i xmm1;
  int16_t min = *src2_1;

  /* Initial operations to align the destination pointer */
  for (; ((long)dest & 15) && (n > 0); n--) {
    int16_t x = *src1++;
    if (x < min)
      x = min;
    *dest++ = x;
  }
  xmm1 = _mm_set1_epi16(min);
  for (; n >= 8; n -= 8) {
    __m128i xmm0;
    xmm0 = _mm_loadu_si128((__m128i *)src1);
    xmm0 = _mm_max_epi16(xmm0, xmm1);
    _mm_store_si128((__m128i *)dest, xmm0);
    dest += 8;
    src1 += 8;
  }
  for (; n > 0; n--) {
    int16_t x = *src1++;
    if (x < min)
      x = min;
    *dest++ = x;
  }
}
OIL_DEFINE_IMPL_FULL (clamplow_s16_sse, clamplow_s16, OIL_IMPL_FLAG_SSE2);

SSE_FUNCTION static void
clamplow_f32_sse (float *dest, const float *src1, int n, const float *src2_1)
{
  __m128 xmm1;
  float min = *src2_1;

  /* Initial operations to align the destination pointer */
  for (; ((long)dest & 15) && (n > 0); n--) {
    float x = *src1++;
    if (x < min)
      x = min;
    *dest++ = x;
  }
  xmm1 = _mm_set_ps1(min);
  for (; n >= 4; n -= 4) {
    __m128 xmm0;
    xmm0 = _mm_loadu_ps(src1);
    xmm0 = _mm_max_ps(xmm0, xmm1);
    _mm_store_ps(dest, xmm0);
    dest += 4;
    src1 += 4;
  }
  for (; n > 0; n--) {
    float x = *src1++;
    if (x < min)
      x = min;
    *dest++ = x;
  }
}
OIL_DEFINE_IMPL_FULL (clamplow_f32_sse, clamplow_f32, OIL_IMPL_FLAG_SSE);

SSE_FUNCTION static void
clamplow_f64_sse (double *dest, const double *src1, int n, const double *src2_1)
{
  __m128d xmm1;
  double min = *src2_1;

  /* Initial operations to align the destination pointer */
  for (; ((long)dest & 15) && (n > 0); n--) {
    double x = *src1++;
    if (x < min)
      x = min;
    *dest++ = x;
  }
  xmm1 = _mm_set1_pd(min);
  for (; n >= 2; n -= 2) {
    __m128d xmm0;
    xmm0 = _mm_loadu_pd(src1);
    xmm0 = _mm_max_pd(xmm0, xmm1);
    _mm_store_pd(dest, xmm0);
    dest += 2;
    src1 += 2;
  }
  for (; n > 0; n--) {
    double x = *src1++;
    if (x < min)
      x = min;
    *dest++ = x;
  }
}
OIL_DEFINE_IMPL_FULL (clamplow_f64_sse, clamplow_f64,
    OIL_IMPL_FLAG_SSE | OIL_IMPL_FLAG_SSE2);

SSE_FUNCTION static void
clamphigh_u8_sse (uint8_t *dest, const uint8_t *src1, int n,
    const uint8_t *src2_1)
{
  __m128i xmm1;
  uint8_t max = *src2_1;

  /* Initial operations to align the destination pointer */
  for (; ((long)dest & 15) && (n > 0); n--) {
    uint8_t x = *src1++;
    if (x > max)
      x = max;
    *dest++ = x;
  }
  xmm1 = _mm_set1_epi8(max);
  for (; n >= 16; n -= 16) {
    __m128i xmm0;
    xmm0 = _mm_loadu_si128((__m128i *)src1);
    xmm0 = _mm_min_epu8(xmm0, xmm1);
    _mm_store_si128((__m128i *)dest, xmm0);
    dest += 16;
    src1 += 16;
  }
  for (; n > 0; n--) {
    uint8_t x = *src1++;
    if (x > max)
      x = max;
    *dest++ = x;
  }
}
OIL_DEFINE_IMPL_FULL (clamphigh_u8_sse, clamphigh_u8, OIL_IMPL_FLAG_SSE2);

SSE_FUNCTION static void
clamphigh_s16_sse (int16_t *dest, const int16_t *src1, int n,
    const int16_t *src2_1)
{
  __m128i xmm1;
  int16_t max = *src2_1;

  /* Initial operations to align the destination pointer */
  for (; ((long)dest & 15) && (n > 0); n--) {
    int16_t x = *src1++;
    if (x > max)
      x = max;
    *dest++ = x;
  }
  xmm1 = _mm_set1_epi16(max);
  for (; n >= 8; n -= 8) {
    __m128i xmm0;
    xmm0 = _mm_loadu_si128((__m128i *)src1);
    xmm0 = _mm_min_epi16(xmm0, xmm1);
    _mm_store_si128((__m128i *)dest, xmm0);
    dest += 8;
    src1 += 8;
  }
  for (; n > 0; n--) {
    int16_t x = *src1++;
    if (x > max)
      x = max;
    *dest++ = x;
  }
}
OIL_DEFINE_IMPL_FULL (clamphigh_s16_sse, clamphigh_s16, OIL_IMPL_FLAG_SSE2);

SSE_FUNCTION static void
clamphigh_f32_sse (float *dest, const float *src1, int n, const float *src2_1)
{
  __m128 xmm1;
  float max = *src2_1;

  /* Initial operations to align the destination pointer */
  for (; ((long)dest & 15) && (n > 0); n--) {
    float x = *src1++;
    if (x > max)
      x = max;
    *dest++ = x;
  }
  xmm1 = _mm_set_ps1(max);
  for (; n >= 4; n -= 4) {
    __m128 xmm0;
    xmm0 = _mm_loadu_ps(src1);
    xmm0 = _mm_min_ps(xmm0, xmm1);
    _mm_store_ps(dest, xmm0);
    dest += 4;
    src1 += 4;
  }
  for (; n > 0; n--) {
    float x = *src1++;
    if (x > max)
      x = max;
    *dest++ = x;
  }
}
OIL_DEFINE_IMPL_FULL (clamphigh_f32_sse, clamphigh_f32, OIL_IMPL_FLAG_SSE);

SSE_FUNCTION static void
clamphigh_f64_sse (double *dest, const double *src1, int n, const double *src2_1)
{
  __m128d xmm1;
  double max = *src2_1;

  /* Initial operations to align the destination pointer */
  for (; ((long)dest & 15) && (n > 0); n--) {
    double x = *src1++;
    if (x > max)
      x = max;
    *dest++ = x;
  }
  xmm1 = _mm_set1_pd(max);
  for (; n >= 2; n -= 2) {
    __m128d xmm0;
    xmm0 = _mm_loadu_pd(src1);
    xmm0 = _mm_min_pd(xmm0, xmm1);
    _mm_store_pd(dest, xmm0);
    dest += 2;
    src1 += 2;
  }
  for (; n > 0; n--) {
    double x = *src1++;
    if (x > max)
      x = max;
    *dest++ = x;
  }
}
OIL_DEFINE_IMPL_FULL (clamphigh_f64_sse, clamphigh_f64,
    OIL_IMPL_FLAG_SSE | OIL_IMPL_FLAG_SSE2);


#ifdef	__SYMBIAN32__
 
OilFunctionImpl* __oil_function_impl_clamp_u8_sse, clamp_u8() {
		return &_oil_function_impl_clamp_u8_sse, clamp_u8;
}
#endif

#ifdef	__SYMBIAN32__
 
OilFunctionImpl* __oil_function_impl_clamp_s16_sse, clamp_s16() {
		return &_oil_function_impl_clamp_s16_sse, clamp_s16;
}
#endif

#ifdef	__SYMBIAN32__
 
OilFunctionImpl* __oil_function_impl_clamp_f32_sse, clamp_f32() {
		return &_oil_function_impl_clamp_f32_sse, clamp_f32;
}
#endif

#ifdef	__SYMBIAN32__
 
OilFunctionImpl* __oil_function_impl_clamp_f64_sse, clamp_f64() {
		return &_oil_function_impl_clamp_f64_sse, clamp_f64;
}
#endif

#ifdef	__SYMBIAN32__
 
OilFunctionImpl* __oil_function_impl_clamplow_u8_sse, clamplow_u8() {
		return &_oil_function_impl_clamplow_u8_sse, clamplow_u8;
}
#endif

#ifdef	__SYMBIAN32__
 
OilFunctionImpl* __oil_function_impl_clamplow_s16_sse, clamplow_s16() {
		return &_oil_function_impl_clamplow_s16_sse, clamplow_s16;
}
#endif

#ifdef	__SYMBIAN32__
 
OilFunctionImpl* __oil_function_impl_clamplow_f32_sse, clamplow_f32() {
		return &_oil_function_impl_clamplow_f32_sse, clamplow_f32;
}
#endif

#ifdef	__SYMBIAN32__
 
OilFunctionImpl* __oil_function_impl_clamplow_f64_sse, clamplow_f64() {
		return &_oil_function_impl_clamplow_f64_sse, clamplow_f64;
}
#endif

#ifdef	__SYMBIAN32__
 
OilFunctionImpl* __oil_function_impl_clamphigh_u8_sse, clamphigh_u8() {
		return &_oil_function_impl_clamphigh_u8_sse, clamphigh_u8;
}
#endif

#ifdef	__SYMBIAN32__
 
OilFunctionImpl* __oil_function_impl_clamphigh_s16_sse, clamphigh_s16() {
		return &_oil_function_impl_clamphigh_s16_sse, clamphigh_s16;
}
#endif

#ifdef	__SYMBIAN32__
 
OilFunctionImpl* __oil_function_impl_clamphigh_f32_sse, clamphigh_f32() {
		return &_oil_function_impl_clamphigh_f32_sse, clamphigh_f32;
}
#endif

#ifdef	__SYMBIAN32__
 
OilFunctionImpl* __oil_function_impl_clamphigh_f64_sse, clamphigh_f64() {
		return &_oil_function_impl_clamphigh_f64_sse, clamphigh_f64;
}
#endif