genericopenlibs/liboil/src/math/generate_math.pl
branchRCL_3
changeset 56 acd3cd4aaceb
equal deleted inserted replaced
54:4332f0f7be53 56:acd3cd4aaceb
       
     1 #!/usr/bin/perl
       
     2 #
       
     3 
       
     4 
       
     5 
       
     6 print <<EOF
       
     7 /* This file is autogenerated.  Do not edit. */
       
     8 /*
       
     9  * LIBOIL - Library of Optimized Inner Loops
       
    10  * Copyright (c) 2005 David A. Schleef <ds@schleef.org>
       
    11  * All rights reserved.
       
    12  *
       
    13  * Redistribution and use in source and binary forms, with or without
       
    14  * modification, are permitted provided that the following conditions
       
    15  * are met:
       
    16  * 1. Redistributions of source code must retain the above copyright
       
    17  *    notice, this list of conditions and the following disclaimer.
       
    18  * 2. Redistributions in binary form must reproduce the above copyright
       
    19  *    notice, this list of conditions and the following disclaimer in the
       
    20  *    documentation and/or other materials provided with the distribution.
       
    21  * 
       
    22  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
       
    23  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
       
    24  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
       
    25  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT,
       
    26  * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
       
    27  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
       
    28  * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
       
    29  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
       
    30  * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
       
    31  * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
       
    32  * POSSIBILITY OF SUCH DAMAGE.
       
    33  */
       
    34 
       
    35 #ifdef HAVE_CONFIG_H
       
    36 #include "config.h"
       
    37 #endif
       
    38 
       
    39 #include <math.h>
       
    40 
       
    41 #include <liboil/liboil.h>
       
    42 #include <liboil/liboilclasses.h>
       
    43 
       
    44 EOF
       
    45 ;
       
    46 
       
    47 
       
    48 sub binary_pointer
       
    49 {
       
    50 	my $kernel = shift;
       
    51 	my $precision = shift;
       
    52 	my $type = "oil_type_$precision";
       
    53 	my $operator = shift;
       
    54 
       
    55 	print <<EOF
       
    56 static void
       
    57 ${kernel}_${precision}_pointer (${type} *dest, ${type} *src1, ${type} *src2, int n)
       
    58 {
       
    59   while (n) {
       
    60     *dest = *src1 ${operator} *src2;
       
    61     dest++;
       
    62     src1++;
       
    63     src2++;
       
    64     n--;
       
    65   }
       
    66 }
       
    67 OIL_DEFINE_IMPL (${kernel}_${precision}_pointer, ${kernel}_${precision});
       
    68 
       
    69 EOF
       
    70 ;
       
    71 }
       
    72 
       
    73 sub binary_unroll2
       
    74 {
       
    75 	my $kernel = shift;
       
    76 	my $precision = shift;
       
    77 	my $type = "oil_type_$precision";
       
    78 	my $operator = shift;
       
    79 
       
    80 	print <<EOF
       
    81 static void
       
    82 ${kernel}_${precision}_unroll2 (${type} *dest, ${type} *src1, ${type} *src2, int n)
       
    83 {
       
    84   int i;
       
    85 
       
    86   if (n & 1) {
       
    87     dest[0] = src1[0] ${operator} src2[0];
       
    88     dest++;
       
    89     src1++;
       
    90     src2++;
       
    91     n--;
       
    92   }
       
    93   for(i=0;i<n;i+=2){
       
    94     dest[i] = src1[i] ${operator} src2[i];
       
    95     dest[i+1] = src1[i+1] ${operator} src2[i+1];
       
    96   }
       
    97 }
       
    98 OIL_DEFINE_IMPL (${kernel}_${precision}_unroll2, ${kernel}_${precision});
       
    99 
       
   100 EOF
       
   101 ;
       
   102 }
       
   103 
       
   104 sub binary_unroll4a
       
   105 {
       
   106 	my $kernel = shift;
       
   107 	my $precision = shift;
       
   108 	my $type = "oil_type_$precision";
       
   109 	my $operator = shift;
       
   110 
       
   111 	print <<EOF
       
   112 static void
       
   113 ${kernel}_${precision}_unroll4a (${type} *dest, ${type} *src1, ${type} *src2, int n)
       
   114 {
       
   115   int i;
       
   116 
       
   117   while (n & 3) {
       
   118     dest[0] = src1[0] ${operator} src2[0];
       
   119     dest++;
       
   120     src1++;
       
   121     src2++;
       
   122     n--;
       
   123   }
       
   124   for(i=0;i<n;i+=4){
       
   125     dest[i] = src1[i] ${operator} src2[i];
       
   126     dest[i+1] = src1[i+1] ${operator} src2[i+1];
       
   127     dest[i+2] = src1[i+2] ${operator} src2[i+2];
       
   128     dest[i+3] = src1[i+3] ${operator} src2[i+3];
       
   129   }
       
   130 }
       
   131 OIL_DEFINE_IMPL (${kernel}_${precision}_unroll4a, ${kernel}_${precision});
       
   132 
       
   133 EOF
       
   134 ;
       
   135 }
       
   136 
       
   137 sub binary_unroll4b
       
   138 {
       
   139 	my $kernel = shift;
       
   140 	my $precision = shift;
       
   141 	my $type = "oil_type_$precision";
       
   142 	my $operator = shift;
       
   143 
       
   144 	print <<EOF
       
   145 static void
       
   146 ${kernel}_${precision}_unroll4b (${type} *dest, ${type} *src1, ${type} *src2, int n)
       
   147 {
       
   148   int i;
       
   149 
       
   150   for(i=0;i<(n&(~0x3));i+=4){
       
   151     dest[i+0] = src1[i+0] ${operator} src2[i+0];
       
   152     dest[i+1] = src1[i+1] ${operator} src2[i+1];
       
   153     dest[i+2] = src1[i+2] ${operator} src2[i+2];
       
   154     dest[i+3] = src1[i+3] ${operator} src2[i+3];
       
   155   }
       
   156   for(;i<n;i++){
       
   157     dest[i] = src1[i] ${operator} src2[i];
       
   158   }
       
   159 }
       
   160 OIL_DEFINE_IMPL (${kernel}_${precision}_unroll4b, ${kernel}_${precision});
       
   161 
       
   162 EOF
       
   163 ;
       
   164 }
       
   165 
       
   166 sub binary_unroll4c
       
   167 {
       
   168 	my $kernel = shift;
       
   169 	my $precision = shift;
       
   170 	my $type = "oil_type_$precision";
       
   171 	my $operator = shift;
       
   172 
       
   173 	print <<EOF
       
   174 static void
       
   175 ${kernel}_${precision}_unroll4c (${type} *dest, ${type} *src1, ${type} *src2, int n)
       
   176 {
       
   177   int i;
       
   178 
       
   179   for(i=0;i<(n&(~0x3));i+=4){
       
   180     *dest++ = *src1++ ${operator} *src2++;
       
   181     *dest++ = *src1++ ${operator} *src2++;
       
   182     *dest++ = *src1++ ${operator} *src2++;
       
   183     *dest++ = *src1++ ${operator} *src2++;
       
   184   }
       
   185   for(;i<n;i++){
       
   186     *dest++ = *src1++ ${operator} *src2++;
       
   187   }
       
   188 }
       
   189 OIL_DEFINE_IMPL (${kernel}_${precision}_unroll4c, ${kernel}_${precision});
       
   190 
       
   191 EOF
       
   192 ;
       
   193 }
       
   194 
       
   195 my %binary_operators = (
       
   196  "add" => "+",
       
   197  "subtract" => "-",
       
   198  "multiply" => "*",
       
   199  "divide" => "/"
       
   200 );
       
   201 
       
   202 my @types = ( "f32", "f64" );
       
   203 
       
   204 while ( ($name, $op) = each %binary_operators ) {
       
   205   foreach $prec (@types) {
       
   206     binary_pointer($name, $prec, $op);
       
   207     binary_unroll2($name, $prec, $op);
       
   208     binary_unroll4a($name, $prec, $op);
       
   209     binary_unroll4b($name, $prec, $op);
       
   210     binary_unroll4c($name, $prec, $op);
       
   211   }
       
   212 }
       
   213 
       
   214 exit 0;
       
   215 
       
   216 binary_pointer("subtract", "f32", "-");
       
   217 binary_unroll2("subtract", "f32", "-");
       
   218 binary_unroll4a("subtract", "f32", "-");
       
   219 binary_unroll4b("subtract", "f32", "-");
       
   220 binary_unroll4c("subtract", "f32", "-");
       
   221 
       
   222 binary_pointer("add", "f32", "+");
       
   223 binary_unroll2("add", "f32", "+");
       
   224 binary_unroll4a("add", "f32", "+");
       
   225 binary_unroll4b("add", "f32", "+");
       
   226 binary_unroll4c("add", "f32", "+");
       
   227 
       
   228 binary_pointer("multiply", "f32", "*");
       
   229 binary_unroll2("multiply", "f32", "*");
       
   230 binary_unroll4a("multiply", "f32", "*");
       
   231 binary_unroll4b("multiply", "f32", "*");
       
   232 binary_unroll4c("multiply", "f32", "*");
       
   233 
       
   234 binary_pointer("divide", "f32", "/");
       
   235 binary_unroll2("divide", "f32", "/");
       
   236 binary_unroll4a("divide", "f32", "/");
       
   237 binary_unroll4b("divide", "f32", "/");
       
   238 binary_unroll4c("divide", "f32", "/");
       
   239 
       
   240 binary_pointer("subtract", "f64", "-");
       
   241 binary_unroll2("subtract", "f64", "-");
       
   242 binary_unroll4a("subtract", "f64", "-");
       
   243 binary_unroll4b("subtract", "f64", "-");
       
   244 binary_unroll4c("subtract", "f64", "-");
       
   245 
       
   246 binary_pointer("add", "f64", "+");
       
   247 binary_unroll2("add", "f64", "+");
       
   248 binary_unroll4a("add", "f64", "+");
       
   249 binary_unroll4b("add", "f64", "+");
       
   250 binary_unroll4c("add", "f64", "+");
       
   251 
       
   252 binary_pointer("multiply", "f64", "*");
       
   253 binary_unroll2("multiply", "f64", "*");
       
   254 binary_unroll4a("multiply", "f64", "*");
       
   255 binary_unroll4b("multiply", "f64", "*");
       
   256 binary_unroll4c("multiply", "f64", "*");
       
   257 
       
   258 binary_pointer("divide", "f64", "/");
       
   259 binary_unroll2("divide", "f64", "/");
       
   260 binary_unroll4a("divide", "f64", "/");
       
   261 binary_unroll4b("divide", "f64", "/");
       
   262 binary_unroll4c("divide", "f64", "/");
       
   263 
       
   264 $blah = "
       
   265 static void
       
   266 subtract_f32_ref (float *dest, float *src1, float *src2, int n)
       
   267 {
       
   268   int i;
       
   269 
       
   270   for(i=0;i<n;i++){
       
   271     dest[i] = src1[i] - src2[i];
       
   272   }
       
   273 }
       
   274 OIL_DEFINE_IMPL (subtract_f32_ref, subtract_f32);
       
   275 
       
   276 static void
       
   277 multiply_f32_ref (float *dest, float *src1, float *src2, int n)
       
   278 {
       
   279   int i;
       
   280 
       
   281   for(i=0;i<n;i++){
       
   282     dest[i] = src1[i] * src2[i];
       
   283   }
       
   284 }
       
   285 OIL_DEFINE_IMPL (multiply_f32_ref, multiply_f32);
       
   286 
       
   287 static void
       
   288 divide_f32_ref (float *dest, float *src1, float *src2, int n)
       
   289 {
       
   290   int i;
       
   291 
       
   292   for(i=0;i<n;i++){
       
   293     dest[i] = src1[i] / src2[i];
       
   294   }
       
   295 }
       
   296 OIL_DEFINE_IMPL_REF (divide_f32_ref, divide_f32);
       
   297 
       
   298 static void
       
   299 minimum_f32_ref (float *dest, float *src1, float *src2, int n)
       
   300 {
       
   301   int i;
       
   302 
       
   303   for(i=0;i<n;i++){
       
   304     dest[i] = (src1[i] < src2[i]) ? src1[i] : src2[i];
       
   305   }
       
   306 }
       
   307 OIL_DEFINE_IMPL_REF (minimum_f32_ref, minimum_f32);
       
   308 
       
   309 static void
       
   310 maximum_f32_ref (float *dest, float *src1, float *src2, int n)
       
   311 {
       
   312   int i;
       
   313 
       
   314   for(i=0;i<n;i++){
       
   315     dest[i] = (src1[i] > src2[i]) ? src1[i] : src2[i];
       
   316   }
       
   317 }
       
   318 OIL_DEFINE_IMPL_REF (maximum_f32_ref, maximum_f32);
       
   319 
       
   320 static void
       
   321 negative_f32_ref (float *dest, float *src1, int n)
       
   322 {
       
   323   int i;
       
   324 
       
   325   for(i=0;i<n;i++){
       
   326     dest[i] = -src1[i];
       
   327   }
       
   328 }
       
   329 OIL_DEFINE_IMPL_REF (negative_f32_ref, negative_f32);
       
   330 
       
   331 static void
       
   332 inverse_f32_ref (float *dest, float *src1, int n)
       
   333 {
       
   334   int i;
       
   335 
       
   336   for(i=0;i<n;i++){
       
   337     dest[i] = 1.0/src1[i];
       
   338   }
       
   339 }
       
   340 OIL_DEFINE_IMPL_REF (inverse_f32_ref, inverse_f32);
       
   341 
       
   342 static void
       
   343 sign_f32_ref (float *dest, float *src1, int n)
       
   344 {
       
   345   int i;
       
   346 
       
   347   for(i=0;i<n;i++){
       
   348     dest[i] = (src1[i] < 0) ? -src1[i] : src1[i];
       
   349   }
       
   350 }
       
   351 OIL_DEFINE_IMPL_REF (sign_f32_ref, sign_f32);
       
   352 
       
   353 static void
       
   354 floor_f32_ref (float *dest, float *src1, int n)
       
   355 {
       
   356   int i;
       
   357 
       
   358   for(i=0;i<n;i++){
       
   359     dest[i] = floor(src1[i]);
       
   360   }
       
   361 }
       
   362 OIL_DEFINE_IMPL_REF (floor_f32_ref, floor_f32);
       
   363 
       
   364 
       
   365 
       
   366 static void
       
   367 scalaradd_f32_ns_ref (float *dest, float *src1, float *src2, int n)
       
   368 {
       
   369   int i;
       
   370 
       
   371   for(i=0;i<n;i++){
       
   372     dest[i] = src1[i] + src2[0];
       
   373   }
       
   374 }
       
   375 OIL_DEFINE_IMPL_REF (scalaradd_f32_ns_ref, scalaradd_f32_ns);
       
   376 
       
   377 static void
       
   378 scalarmultiply_f32_ns_ref (float *dest, float *src1, float *src2, int n)
       
   379 {
       
   380   int i;
       
   381 
       
   382   for(i=0;i<n;i++){
       
   383     dest[i] = src1[i] * src2[0];
       
   384   }
       
   385 }
       
   386 OIL_DEFINE_IMPL_REF (scalarmultiply_f32_ns_ref, scalarmultiply_f32_ns);
       
   387 
       
   388 
       
   389 ";