genericopenlibs/liboil/src/liboilcpu-x86.c
changeset 18 47c74d1534e1
equal deleted inserted replaced
0:e4d67989cc36 18:47c74d1534e1
       
     1 /*
       
     2  * LIBOIL - Library of Optimized Inner Loops
       
     3  * Copyright (c) 2003,2004 David A. Schleef <ds@schleef.org>
       
     4  * All rights reserved.
       
     5  *
       
     6  * Redistribution and use in source and binary forms, with or without
       
     7  * modification, are permitted provided that the following conditions
       
     8  * are met:
       
     9  * 1. Redistributions of source code must retain the above copyright
       
    10  *    notice, this list of conditions and the following disclaimer.
       
    11  * 2. Redistributions in binary form must reproduce the above copyright
       
    12  *    notice, this list of conditions and the following disclaimer in the
       
    13  *    documentation and/or other materials provided with the distribution.
       
    14  * 
       
    15  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
       
    16  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
       
    17  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
       
    18  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT,
       
    19  * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
       
    20  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
       
    21  * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
       
    22  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
       
    23  * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
       
    24  * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
       
    25  * POSSIBILITY OF SUCH DAMAGE.
       
    26  */
       
    27 
       
    28 #ifdef HAVE_CONFIG_H
       
    29 #include "config.h"
       
    30 #endif
       
    31 #include <liboil/liboilfunction.h>
       
    32 #include <liboil/liboildebug.h>
       
    33 #include <liboil/liboilcpu.h>
       
    34 #include <liboil/liboilfault.h>
       
    35 #include <liboil/liboilutils.h>
       
    36 
       
    37 #ifdef HAVE_UNISTD_H
       
    38 #include <unistd.h>
       
    39 #endif
       
    40 #include <fcntl.h>
       
    41 #include <stdlib.h>
       
    42 #include <string.h>
       
    43 #include <stdio.h>
       
    44 #include <setjmp.h>
       
    45 #include <signal.h>
       
    46 #ifdef HAVE_SYS_TIME_H
       
    47 #include <sys/time.h>
       
    48 #endif
       
    49 #include <time.h>
       
    50 
       
    51 #if defined(__FreeBSD__) || defined(__APPLE__)
       
    52 #include <sys/types.h>
       
    53 #include <sys/sysctl.h>
       
    54 #endif
       
    55 
       
    56 #ifdef __sun
       
    57 #include <sys/auxv.h>
       
    58 #endif
       
    59 
       
    60 /***** i386, amd64 *****/
       
    61 
       
    62 #if defined(__sun)
       
    63 #define USE_I386_GETISAX
       
    64 #else
       
    65 #define USE_I386_CPUID
       
    66 #endif
       
    67 
       
    68 
       
    69 #ifdef USE_I386_CPUINFO
       
    70 static void
       
    71 oil_cpu_i386_getflags_cpuinfo (char *cpuinfo)
       
    72 {
       
    73   char *cpuinfo_flags;
       
    74   char **flags;
       
    75   char **f;
       
    76 
       
    77   cpuinfo_flags = get_tag_value (cpuinfo, "flags");
       
    78   if (cpuinfo_flags == NULL) {
       
    79     free (cpuinfo);
       
    80     return;
       
    81   }
       
    82 
       
    83   flags = strsplit(cpuinfo_flags);
       
    84   for (f = flags; *f; f++) {
       
    85     if (strcmp (*f, "cmov") == 0) {
       
    86       OIL_DEBUG ("cpu flag %s", *f);
       
    87       oil_cpu_flags |= OIL_IMPL_FLAG_CMOV;
       
    88     }
       
    89     if (strcmp (*f, "mmx") == 0) {
       
    90       OIL_DEBUG ("cpu flag %s", *f);
       
    91       oil_cpu_flags |= OIL_IMPL_FLAG_MMX;
       
    92     }
       
    93     if (strcmp (*f, "sse") == 0) {
       
    94       OIL_DEBUG ("cpu flag %s", *f);
       
    95       oil_cpu_flags |= OIL_IMPL_FLAG_SSE;
       
    96     }
       
    97     if (strcmp (*f, "mmxext") == 0) {
       
    98       OIL_DEBUG ("cpu flag %s", *f);
       
    99       oil_cpu_flags |= OIL_IMPL_FLAG_MMXEXT;
       
   100     }
       
   101     if (strcmp (*f, "sse2") == 0) {
       
   102       OIL_DEBUG ("cpu flag %s", *f);
       
   103       oil_cpu_flags |= OIL_IMPL_FLAG_SSE2;
       
   104       oil_cpu_flags |= OIL_IMPL_FLAG_MMXEXT;
       
   105     }
       
   106     if (strcmp (*f, "3dnow") == 0) {
       
   107       OIL_DEBUG ("cpu flag %s", *f);
       
   108       oil_cpu_flags |= OIL_IMPL_FLAG_3DNOW;
       
   109     }
       
   110     if (strcmp (*f, "3dnowext") == 0) {
       
   111       OIL_DEBUG ("cpu flag %s", *f);
       
   112       oil_cpu_flags |= OIL_IMPL_FLAG_3DNOWEXT;
       
   113     }
       
   114     if (strcmp (*f, "sse3") == 0) {
       
   115       OIL_DEBUG ("cpu flag %s", *f);
       
   116       oil_cpu_flags |= OIL_IMPL_FLAG_SSE3;
       
   117       oil_cpu_flags |= OIL_IMPL_FLAG_SSE2;
       
   118       oil_cpu_flags |= OIL_IMPL_FLAG_MMXEXT;
       
   119     }
       
   120     if (strcmp (*f, "ssse3") == 0) {
       
   121       OIL_DEBUG ("cpu flag %s", *f);
       
   122       oil_cpu_flags |= OIL_IMPL_FLAG_SSSE3;
       
   123       oil_cpu_flags |= OIL_IMPL_FLAG_SSE3;
       
   124       oil_cpu_flags |= OIL_IMPL_FLAG_SSE2;
       
   125       oil_cpu_flags |= OIL_IMPL_FLAG_MMXEXT;
       
   126     }
       
   127 
       
   128     free (*f);
       
   129   }
       
   130   free (flags);
       
   131   free (cpuinfo);
       
   132   free (cpuinfo_flags);
       
   133 }
       
   134 #endif
       
   135 
       
   136 #ifdef HAVE_GCC_ASM      
       
   137 static unsigned long
       
   138 oil_profile_stamp_rdtsc(void)
       
   139 {
       
   140 	unsigned long ts;
       
   141 	__asm__ __volatile__("rdtsc\n" : "=a" (ts) : : "edx");
       
   142 	return ts;
       
   143 }
       
   144 #endif
       
   145 
       
   146 #ifdef USE_I386_CPUID
       
   147 #ifdef __i386__
       
   148 static void
       
   149 get_cpuid (uint32_t op, uint32_t *a, uint32_t *b, uint32_t *c, uint32_t *d)
       
   150 {
       
   151 /*               
       
   152   __asm__ (
       
   153       "  pushl %%ebx\n"
       
   154       "  cpuid\n"
       
   155       "  mov %%ebx, %%esi\n"
       
   156       "  popl %%ebx\n"
       
   157       : "=a" (*a), "=S" (*b), "=c" (*c), "=d" (*d)
       
   158       : "0" (op));
       
   159 */  
       
   160 }
       
   161 #endif
       
   162 
       
   163 #ifdef __amd64__
       
   164 static void
       
   165 get_cpuid (uint32_t op, uint32_t *a, uint32_t *b, uint32_t *c, uint32_t *d)
       
   166 {
       
   167   __asm__ (
       
   168       "  pushq %%rbx\n"
       
   169       "  cpuid\n"
       
   170       "  mov %%ebx, %%esi\n"
       
   171       "  popq %%rbx\n"
       
   172       : "=a" (*a), "=S" (*b), "=c" (*c), "=d" (*d)
       
   173       : "0" (op));
       
   174 }
       
   175 #endif
       
   176 
       
   177 static void
       
   178 test_cpuid (void *ignored)
       
   179 {
       
   180   uint32_t eax, ebx, ecx, edx;
       
   181 
       
   182   get_cpuid (0x00000000, &eax, &ebx, &ecx, &edx);
       
   183 }
       
   184 
       
   185 static void
       
   186 oil_cpu_detect_cpuid (void)
       
   187 {
       
   188   uint32_t eax, ebx, ecx, edx;
       
   189   uint32_t level;
       
   190   char vendor[13] = { 0 };
       
   191   int ret;
       
   192 
       
   193   oil_fault_check_enable ();
       
   194   ret = oil_fault_check_try(test_cpuid, NULL);
       
   195   oil_fault_check_disable ();
       
   196   if (!ret) {
       
   197     /* CPU thinks cpuid is an illegal instruction. */
       
   198     return;
       
   199   }
       
   200 
       
   201   get_cpuid (0x00000000, &level, (uint32_t *)(vendor+0),
       
   202       (uint32_t *)(vendor+8), (uint32_t *)(vendor+4));
       
   203 
       
   204   OIL_DEBUG("cpuid %d %s", level, vendor);
       
   205 
       
   206   if (level < 1) {
       
   207     return;
       
   208   }
       
   209 
       
   210   get_cpuid (0x00000001, &eax, &ebx, &ecx, &edx);
       
   211 
       
   212 #ifdef HAVE_GCC_ASM
       
   213   if (edx & (1<<4)) {
       
   214     _oil_profile_stamp = oil_profile_stamp_rdtsc;
       
   215   }
       
   216 #endif
       
   217 
       
   218   /* Intel flags */
       
   219   if (edx & (1<<15)) {
       
   220     oil_cpu_flags |= OIL_IMPL_FLAG_CMOV;
       
   221   }
       
   222   if (edx & (1<<23)) {
       
   223     oil_cpu_flags |= OIL_IMPL_FLAG_MMX;
       
   224   }
       
   225   if (edx & (1<<25)) {
       
   226     oil_cpu_flags |= OIL_IMPL_FLAG_SSE;
       
   227   }
       
   228   if (edx & (1<<26)) {
       
   229     oil_cpu_flags |= OIL_IMPL_FLAG_SSE2;
       
   230     oil_cpu_flags |= OIL_IMPL_FLAG_MMXEXT;
       
   231   }
       
   232   if (ecx & (1<<0)) {
       
   233     oil_cpu_flags |= OIL_IMPL_FLAG_SSE3;
       
   234   }
       
   235   
       
   236   if (memcmp (vendor, "AuthenticAMD", 12) == 0) {
       
   237     get_cpuid (0x80000001, &eax, &ebx, &ecx, &edx);
       
   238 
       
   239     /* AMD flags */
       
   240     if (edx & (1<<22)) {
       
   241       oil_cpu_flags |= OIL_IMPL_FLAG_MMXEXT;
       
   242     }
       
   243     if (edx & (1<<31)) {
       
   244       oil_cpu_flags |= OIL_IMPL_FLAG_3DNOW;
       
   245     }
       
   246     if (edx & (1<<30)) {
       
   247       oil_cpu_flags |= OIL_IMPL_FLAG_3DNOWEXT;
       
   248     }
       
   249 
       
   250     get_cpuid (0x80000005, &eax, &ebx, &ecx, &edx);
       
   251 
       
   252     OIL_INFO("L1 D-cache: %d kbytes, %d-way, %d lines/tag, %d line size",
       
   253         (ecx>>24)&0xff, (ecx>>16)&0xff, (ecx>>8)&0xff, ecx&0xff);
       
   254     OIL_INFO("L1 I-cache: %d kbytes, %d-way, %d lines/tag, %d line size",
       
   255         (edx>>24)&0xff, (edx>>16)&0xff, (edx>>8)&0xff, edx&0xff);
       
   256 
       
   257     get_cpuid (0x80000006, &eax, &ebx, &ecx, &edx);
       
   258     OIL_INFO("L2 cache: %d kbytes, %d assoc, %d lines/tag, %d line size",
       
   259         (ecx>>16)&0xffff, (ecx>>12)&0xf, (ecx>>8)&0xf, ecx&0xff);
       
   260   }
       
   261 }
       
   262 #endif
       
   263 
       
   264 #ifdef USE_I386_GETISAX
       
   265 static void
       
   266 oil_cpu_detect_getisax (void)
       
   267 {
       
   268   uint_t ui;
       
   269 
       
   270   getisax (&ui, 1);
       
   271 
       
   272   if (ui & AV_386_CMOV) {
       
   273      oil_cpu_flags |= OIL_IMPL_FLAG_CMOV;
       
   274   }
       
   275   if (ui & AV_386_MMX) {
       
   276      oil_cpu_flags |= OIL_IMPL_FLAG_MMX;
       
   277   }
       
   278   if (ui & AV_386_SSE) {
       
   279      oil_cpu_flags |= OIL_IMPL_FLAG_SSE;
       
   280   }
       
   281   if (ui & AV_386_SSE2) {
       
   282      oil_cpu_flags |= OIL_IMPL_FLAG_SSE2;
       
   283      oil_cpu_flags |= OIL_IMPL_FLAG_MMXEXT;
       
   284   }
       
   285   if (ui & AV_386_SSE3) {
       
   286      oil_cpu_flags |= OIL_IMPL_FLAG_SSE3;
       
   287   }
       
   288   if (ui & AV_386_AMD_3DNow) {
       
   289     oil_cpu_flags |= OIL_IMPL_FLAG_3DNOW;
       
   290   }
       
   291   if (ui & AV_386_AMD_3DNowx) {
       
   292     oil_cpu_flags |= OIL_IMPL_FLAG_3DNOWEXT;
       
   293   }
       
   294   if (ui & AV_386_AMD_MMX) {
       
   295     oil_cpu_flags |= OIL_IMPL_FLAG_MMXEXT;
       
   296   }
       
   297 }
       
   298 #endif
       
   299 
       
   300 /* Reduce the set of CPU capabilities detected by whatever detection mechanism
       
   301  * was chosen, according to kernel limitations.  SSE requires kernel support for
       
   302  * use.
       
   303  */
       
   304 static void
       
   305 oil_cpu_detect_kernel_support (void)
       
   306 {
       
   307 #if defined(__FreeBSD__) || defined(__FreeBSD_kernel__) || defined(__APPLE__)
       
   308   int ret, enabled;
       
   309   size_t len;
       
   310 
       
   311   len = sizeof(enabled);
       
   312   ret = sysctlbyname("hw.instruction_sse", &enabled, &len, NULL, 0);
       
   313   if (ret || !enabled) {
       
   314     oil_cpu_flags &= ~(OIL_IMPL_FLAG_SSE | OIL_IMPL_FLAG_SSE2 |
       
   315 		       OIL_IMPL_FLAG_MMXEXT | OIL_IMPL_FLAG_SSE3);
       
   316   }
       
   317 #elif defined(__linux__)
       
   318   /*
       
   319    * Might also want to grow a check for the old RedHat + Linux 2.2
       
   320    * unmasked SSE FPU exception bug.  Other than that, if /proc/cpuinfo
       
   321    * reported SSE, then it's safe.
       
   322    */
       
   323 #elif defined(__sun)
       
   324   /* Solaris is OK */
       
   325 #elif defined(__NetBSD__)
       
   326   /* NetBSD is OK */
       
   327 #else
       
   328    
       
   329   OIL_WARNING("Operating system is not known to support SSE.  "
       
   330       "Assuming it does, which might cause problems");
       
   331 #if 0
       
   332   oil_cpu_flags &= ~(OIL_IMPL_FLAG_SSE | OIL_IMPL_FLAG_SSE2 |
       
   333 		     OIL_IMPL_FLAG_MMXEXT | OIL_IMPL_FLAG_SSE3);
       
   334 #endif
       
   335 #endif
       
   336 }
       
   337 
       
   338 void
       
   339 oil_cpu_detect_arch(void)
       
   340 {
       
   341 #ifdef USE_I386_CPUID
       
   342   oil_cpu_detect_cpuid ();
       
   343 #endif
       
   344 #ifdef USE_I386_GETISAX
       
   345   oil_cpu_detect_getisax ();
       
   346 #endif
       
   347 #ifdef USE_I386_CPUINFO
       
   348   oil_cpu_detect_cpuinfo ();
       
   349 #endif
       
   350 
       
   351   oil_cpu_detect_kernel_support ();
       
   352 }
       
   353 
       
   354