genericopenlibs/liboil/src/math/generate_math.pl
author hgs
Tue, 02 Nov 2010 19:23:22 +0530
changeset 79 564bc7b7ad27
parent 18 47c74d1534e1
permissions -rw-r--r--
201043

#!/usr/bin/perl
#



print <<EOF
/* This file is autogenerated.  Do not edit. */
/*
 * LIBOIL - Library of Optimized Inner Loops
 * Copyright (c) 2005 David A. Schleef <ds@schleef.org>
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 
 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT,
 * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
 * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

#ifdef HAVE_CONFIG_H
#include "config.h"
#endif

#include <math.h>

#include <liboil/liboil.h>
#include <liboil/liboilclasses.h>

EOF
;


sub binary_pointer
{
	my $kernel = shift;
	my $precision = shift;
	my $type = "oil_type_$precision";
	my $operator = shift;

	print <<EOF
static void
${kernel}_${precision}_pointer (${type} *dest, ${type} *src1, ${type} *src2, int n)
{
  while (n) {
    *dest = *src1 ${operator} *src2;
    dest++;
    src1++;
    src2++;
    n--;
  }
}
OIL_DEFINE_IMPL (${kernel}_${precision}_pointer, ${kernel}_${precision});

EOF
;
}

sub binary_unroll2
{
	my $kernel = shift;
	my $precision = shift;
	my $type = "oil_type_$precision";
	my $operator = shift;

	print <<EOF
static void
${kernel}_${precision}_unroll2 (${type} *dest, ${type} *src1, ${type} *src2, int n)
{
  int i;

  if (n & 1) {
    dest[0] = src1[0] ${operator} src2[0];
    dest++;
    src1++;
    src2++;
    n--;
  }
  for(i=0;i<n;i+=2){
    dest[i] = src1[i] ${operator} src2[i];
    dest[i+1] = src1[i+1] ${operator} src2[i+1];
  }
}
OIL_DEFINE_IMPL (${kernel}_${precision}_unroll2, ${kernel}_${precision});

EOF
;
}

sub binary_unroll4a
{
	my $kernel = shift;
	my $precision = shift;
	my $type = "oil_type_$precision";
	my $operator = shift;

	print <<EOF
static void
${kernel}_${precision}_unroll4a (${type} *dest, ${type} *src1, ${type} *src2, int n)
{
  int i;

  while (n & 3) {
    dest[0] = src1[0] ${operator} src2[0];
    dest++;
    src1++;
    src2++;
    n--;
  }
  for(i=0;i<n;i+=4){
    dest[i] = src1[i] ${operator} src2[i];
    dest[i+1] = src1[i+1] ${operator} src2[i+1];
    dest[i+2] = src1[i+2] ${operator} src2[i+2];
    dest[i+3] = src1[i+3] ${operator} src2[i+3];
  }
}
OIL_DEFINE_IMPL (${kernel}_${precision}_unroll4a, ${kernel}_${precision});

EOF
;
}

sub binary_unroll4b
{
	my $kernel = shift;
	my $precision = shift;
	my $type = "oil_type_$precision";
	my $operator = shift;

	print <<EOF
static void
${kernel}_${precision}_unroll4b (${type} *dest, ${type} *src1, ${type} *src2, int n)
{
  int i;

  for(i=0;i<(n&(~0x3));i+=4){
    dest[i+0] = src1[i+0] ${operator} src2[i+0];
    dest[i+1] = src1[i+1] ${operator} src2[i+1];
    dest[i+2] = src1[i+2] ${operator} src2[i+2];
    dest[i+3] = src1[i+3] ${operator} src2[i+3];
  }
  for(;i<n;i++){
    dest[i] = src1[i] ${operator} src2[i];
  }
}
OIL_DEFINE_IMPL (${kernel}_${precision}_unroll4b, ${kernel}_${precision});

EOF
;
}

sub binary_unroll4c
{
	my $kernel = shift;
	my $precision = shift;
	my $type = "oil_type_$precision";
	my $operator = shift;

	print <<EOF
static void
${kernel}_${precision}_unroll4c (${type} *dest, ${type} *src1, ${type} *src2, int n)
{
  int i;

  for(i=0;i<(n&(~0x3));i+=4){
    *dest++ = *src1++ ${operator} *src2++;
    *dest++ = *src1++ ${operator} *src2++;
    *dest++ = *src1++ ${operator} *src2++;
    *dest++ = *src1++ ${operator} *src2++;
  }
  for(;i<n;i++){
    *dest++ = *src1++ ${operator} *src2++;
  }
}
OIL_DEFINE_IMPL (${kernel}_${precision}_unroll4c, ${kernel}_${precision});

EOF
;
}

my %binary_operators = (
 "add" => "+",
 "subtract" => "-",
 "multiply" => "*",
 "divide" => "/"
);

my @types = ( "f32", "f64" );

while ( ($name, $op) = each %binary_operators ) {
  foreach $prec (@types) {
    binary_pointer($name, $prec, $op);
    binary_unroll2($name, $prec, $op);
    binary_unroll4a($name, $prec, $op);
    binary_unroll4b($name, $prec, $op);
    binary_unroll4c($name, $prec, $op);
  }
}

exit 0;

binary_pointer("subtract", "f32", "-");
binary_unroll2("subtract", "f32", "-");
binary_unroll4a("subtract", "f32", "-");
binary_unroll4b("subtract", "f32", "-");
binary_unroll4c("subtract", "f32", "-");

binary_pointer("add", "f32", "+");
binary_unroll2("add", "f32", "+");
binary_unroll4a("add", "f32", "+");
binary_unroll4b("add", "f32", "+");
binary_unroll4c("add", "f32", "+");

binary_pointer("multiply", "f32", "*");
binary_unroll2("multiply", "f32", "*");
binary_unroll4a("multiply", "f32", "*");
binary_unroll4b("multiply", "f32", "*");
binary_unroll4c("multiply", "f32", "*");

binary_pointer("divide", "f32", "/");
binary_unroll2("divide", "f32", "/");
binary_unroll4a("divide", "f32", "/");
binary_unroll4b("divide", "f32", "/");
binary_unroll4c("divide", "f32", "/");

binary_pointer("subtract", "f64", "-");
binary_unroll2("subtract", "f64", "-");
binary_unroll4a("subtract", "f64", "-");
binary_unroll4b("subtract", "f64", "-");
binary_unroll4c("subtract", "f64", "-");

binary_pointer("add", "f64", "+");
binary_unroll2("add", "f64", "+");
binary_unroll4a("add", "f64", "+");
binary_unroll4b("add", "f64", "+");
binary_unroll4c("add", "f64", "+");

binary_pointer("multiply", "f64", "*");
binary_unroll2("multiply", "f64", "*");
binary_unroll4a("multiply", "f64", "*");
binary_unroll4b("multiply", "f64", "*");
binary_unroll4c("multiply", "f64", "*");

binary_pointer("divide", "f64", "/");
binary_unroll2("divide", "f64", "/");
binary_unroll4a("divide", "f64", "/");
binary_unroll4b("divide", "f64", "/");
binary_unroll4c("divide", "f64", "/");

$blah = "
static void
subtract_f32_ref (float *dest, float *src1, float *src2, int n)
{
  int i;

  for(i=0;i<n;i++){
    dest[i] = src1[i] - src2[i];
  }
}
OIL_DEFINE_IMPL (subtract_f32_ref, subtract_f32);

static void
multiply_f32_ref (float *dest, float *src1, float *src2, int n)
{
  int i;

  for(i=0;i<n;i++){
    dest[i] = src1[i] * src2[i];
  }
}
OIL_DEFINE_IMPL (multiply_f32_ref, multiply_f32);

static void
divide_f32_ref (float *dest, float *src1, float *src2, int n)
{
  int i;

  for(i=0;i<n;i++){
    dest[i] = src1[i] / src2[i];
  }
}
OIL_DEFINE_IMPL_REF (divide_f32_ref, divide_f32);

static void
minimum_f32_ref (float *dest, float *src1, float *src2, int n)
{
  int i;

  for(i=0;i<n;i++){
    dest[i] = (src1[i] < src2[i]) ? src1[i] : src2[i];
  }
}
OIL_DEFINE_IMPL_REF (minimum_f32_ref, minimum_f32);

static void
maximum_f32_ref (float *dest, float *src1, float *src2, int n)
{
  int i;

  for(i=0;i<n;i++){
    dest[i] = (src1[i] > src2[i]) ? src1[i] : src2[i];
  }
}
OIL_DEFINE_IMPL_REF (maximum_f32_ref, maximum_f32);

static void
negative_f32_ref (float *dest, float *src1, int n)
{
  int i;

  for(i=0;i<n;i++){
    dest[i] = -src1[i];
  }
}
OIL_DEFINE_IMPL_REF (negative_f32_ref, negative_f32);

static void
inverse_f32_ref (float *dest, float *src1, int n)
{
  int i;

  for(i=0;i<n;i++){
    dest[i] = 1.0/src1[i];
  }
}
OIL_DEFINE_IMPL_REF (inverse_f32_ref, inverse_f32);

static void
sign_f32_ref (float *dest, float *src1, int n)
{
  int i;

  for(i=0;i<n;i++){
    dest[i] = (src1[i] < 0) ? -src1[i] : src1[i];
  }
}
OIL_DEFINE_IMPL_REF (sign_f32_ref, sign_f32);

static void
floor_f32_ref (float *dest, float *src1, int n)
{
  int i;

  for(i=0;i<n;i++){
    dest[i] = floor(src1[i]);
  }
}
OIL_DEFINE_IMPL_REF (floor_f32_ref, floor_f32);



static void
scalaradd_f32_ns_ref (float *dest, float *src1, float *src2, int n)
{
  int i;

  for(i=0;i<n;i++){
    dest[i] = src1[i] + src2[0];
  }
}
OIL_DEFINE_IMPL_REF (scalaradd_f32_ns_ref, scalaradd_f32_ns);

static void
scalarmultiply_f32_ns_ref (float *dest, float *src1, float *src2, int n)
{
  int i;

  for(i=0;i<n;i++){
    dest[i] = src1[i] * src2[0];
  }
}
OIL_DEFINE_IMPL_REF (scalarmultiply_f32_ns_ref, scalarmultiply_f32_ns);


";