src/corelib/tools/qsimd.cpp
changeset 37 758a864f9613
parent 33 3e2da88830cd
equal deleted inserted replaced
36:ef0373b55136 37:758a864f9613
    39 **
    39 **
    40 ****************************************************************************/
    40 ****************************************************************************/
    41 
    41 
    42 #include "qsimd_p.h"
    42 #include "qsimd_p.h"
    43 #include <QByteArray>
    43 #include <QByteArray>
       
    44 #include <stdio.h>
    44 
    45 
    45 #if defined(Q_OS_WINCE)
    46 #if defined(Q_OS_WINCE)
    46 #include <windows.h>
    47 #include <windows.h>
    47 #endif
    48 #endif
    48 
    49 
       
    50 #if defined(Q_OS_WIN64) && !defined(Q_CC_GNU)
       
    51 #include <intrin.h>
       
    52 #endif
       
    53 
       
    54 #if defined(Q_OS_LINUX) && defined(__arm__)
       
    55 #include "private/qcore_unix_p.h"
       
    56 
       
    57 // the kernel header definitions for HWCAP_*
       
    58 // (the ones we need/may need anyway)
       
    59 
       
    60 // copied from <asm/hwcap.h> (ARM)
       
    61 #define HWCAP_IWMMXT    512
       
    62 #define HWCAP_CRUNCH    1024
       
    63 #define HWCAP_THUMBEE   2048
       
    64 #define HWCAP_NEON      4096
       
    65 #define HWCAP_VFPv3     8192
       
    66 #define HWCAP_VFPv3D16  16384
       
    67 
       
    68 // copied from <linux/auxvec.h>
       
    69 #define AT_HWCAP  16    /* arch dependent hints at CPU capabilities */
       
    70 
       
    71 #endif
       
    72 
    49 QT_BEGIN_NAMESPACE
    73 QT_BEGIN_NAMESPACE
    50 
    74 
    51 uint qDetectCPUFeatures()
       
    52 {
       
    53     static uint features = 0xffffffff;
       
    54     if (features != 0xffffffff)
       
    55         return features;
       
    56 
       
    57 #if defined (Q_OS_WINCE)
    75 #if defined (Q_OS_WINCE)
       
    76 static inline uint detectProcessorFeatures()
       
    77 {
       
    78     uint features = 0;
       
    79 
    58 #if defined (ARM)
    80 #if defined (ARM)
    59     if (IsProcessorFeaturePresent(PF_ARM_INTEL_WMMX)) {
    81     if (IsProcessorFeaturePresent(PF_ARM_INTEL_WMMX)) {
    60         features = IWMMXT;
    82         features = IWMMXT;
    61         return features;
    83         return features;
    62     }
    84     }
    72 #endif
    94 #endif
    73     return features;
    95     return features;
    74 #endif
    96 #endif
    75     features = 0;
    97     features = 0;
    76     return features;
    98     return features;
    77 #elif defined(QT_HAVE_IWMMXT)
    99 }
       
   100 
       
   101 #elif defined(__arm__) || defined(__arm) || defined(QT_HAVE_IWMMXT) || defined(QT_HAVE_NEON)
       
   102 static inline uint detectProcessorFeatures()
       
   103 {
       
   104     uint features = 0;
       
   105 
       
   106 #if defined(Q_OS_LINUX)
       
   107     int auxv = ::qt_safe_open("/proc/self/auxv", O_RDONLY);
       
   108     if (auxv != -1) {
       
   109         unsigned long vector[64];
       
   110         int nread;
       
   111         while (features == 0) {
       
   112             nread = ::qt_safe_read(auxv, (char *)vector, sizeof vector);
       
   113             if (nread <= 0) {
       
   114                 // EOF or error
       
   115                 break;
       
   116             }
       
   117 
       
   118             int max = nread / (sizeof vector[0]);
       
   119             for (int i = 0; i < max; i += 2)
       
   120                 if (vector[i] == AT_HWCAP) {
       
   121                     if (vector[i+1] & HWCAP_IWMMXT)
       
   122                         features |= IWMMXT;
       
   123                     if (vector[i+1] & HWCAP_NEON)
       
   124                         features |= NEON;
       
   125                     break;
       
   126                 }
       
   127         }
       
   128 
       
   129         ::qt_safe_close(auxv);
       
   130         return features;
       
   131     }
       
   132     // fall back if /proc/self/auxv wasn't found
       
   133 #endif
       
   134 
       
   135 #if defined(QT_HAVE_IWMMXT)
    78     // runtime detection only available when running as a previlegied process
   136     // runtime detection only available when running as a previlegied process
    79     static const bool doIWMMXT = !qgetenv("QT_NO_IWMMXT").toInt();
   137     features = IWMMXT;
    80     features = doIWMMXT ? IWMMXT : 0;
       
    81     return features;
       
    82 #elif defined(QT_HAVE_NEON)
   138 #elif defined(QT_HAVE_NEON)
    83     static const bool doNEON = !qgetenv("QT_NO_NEON").toInt();
   139     features = NEON;
    84     features = doNEON ? NEON : 0;
   140 #endif
    85     return features;
   141 
    86 #else
   142     return features;
    87     features = 0;
   143 }
    88 #if defined(__x86_64__) || defined(Q_OS_WIN64)
   144 
    89     features = MMX|SSE|SSE2|CMOV;
       
    90 #elif defined(__ia64__)
       
    91     features = MMX|SSE|SSE2;
       
    92 #elif defined(__i386__) || defined(_M_IX86)
   145 #elif defined(__i386__) || defined(_M_IX86)
       
   146 static inline uint detectProcessorFeatures()
       
   147 {
       
   148     uint features = 0;
       
   149 
    93     unsigned int extended_result = 0;
   150     unsigned int extended_result = 0;
       
   151     unsigned int feature_result = 0;
    94     uint result = 0;
   152     uint result = 0;
    95     /* see p. 118 of amd64 instruction set manual Vol3 */
   153     /* see p. 118 of amd64 instruction set manual Vol3 */
    96 #if defined(Q_CC_GNU)
   154 #if defined(Q_CC_GNU)
    97     asm ("push %%ebx\n"
   155     long cpuid_supported, tmp1;
    98          "pushf\n"
   156     asm ("pushf\n"
    99          "pop %%eax\n"
   157          "pop %0\n"
   100          "mov %%eax, %%ebx\n"
   158          "mov %0, %1\n"
   101          "xor $0x00200000, %%eax\n"
   159          "xor $0x00200000, %0\n"
   102          "push %%eax\n"
   160          "push %0\n"
   103          "popf\n"
   161          "popf\n"
   104          "pushf\n"
   162          "pushf\n"
   105          "pop %%eax\n"
   163          "pop %0\n"
   106          "xor %%edx, %%edx\n"
   164          "xor %1, %0\n" // %eax is now 0 if CPUID is not supported
   107          "xor %%ebx, %%eax\n"
   165          : "=a" (cpuid_supported), "=r" (tmp1)
   108          "jz 1f\n"
   166          );
   109 
   167     if (cpuid_supported) {
   110          "mov $0x00000001, %%eax\n"
   168         asm ("xchg %%ebx, %2\n"
   111          "cpuid\n"
   169              "cpuid\n"
   112          "1:\n"
   170              "xchg %%ebx, %2\n"
   113          "pop %%ebx\n"
   171             : "=c" (feature_result), "=d" (result), "=&r" (tmp1)
   114          "mov %%edx, %0\n"
   172             : "a" (1));
   115         : "=r" (result)
   173 
   116         :
   174         asm ("xchg %%ebx, %1\n"
   117         : "%eax", "%ecx", "%edx"
   175              "cpuid\n"
   118         );
   176              "cmp $0x80000000, %%eax\n"
   119 
   177              "jnbe 1f\n"
   120     asm ("push %%ebx\n"
   178              "xor %0, %0\n"
   121          "pushf\n"
   179              "jmp 2f\n"
   122          "pop %%eax\n"
   180              "1:\n"
   123          "mov %%eax, %%ebx\n"
   181              "mov $0x80000001, %%eax\n"
   124          "xor $0x00200000, %%eax\n"
   182              "cpuid\n"
   125          "push %%eax\n"
   183              "2:\n"
   126          "popf\n"
   184              "xchg %%ebx, %1\n"
   127          "pushf\n"
   185             : "=d" (extended_result), "=&r" (tmp1)
   128          "pop %%eax\n"
   186             : "a" (0x80000000)
   129          "xor %%edx, %%edx\n"
   187             : "%ecx"
   130          "xor %%ebx, %%eax\n"
   188             );
   131          "jz 2f\n"
   189     }
   132 
   190 
   133          "mov $0x80000000, %%eax\n"
       
   134          "cpuid\n"
       
   135          "cmp $0x80000000, %%eax\n"
       
   136          "jbe 2f\n"
       
   137          "mov $0x80000001, %%eax\n"
       
   138          "cpuid\n"
       
   139          "2:\n"
       
   140          "pop %%ebx\n"
       
   141          "mov %%edx, %0\n"
       
   142         : "=r" (extended_result)
       
   143         :
       
   144         : "%eax", "%ecx", "%edx"
       
   145         );
       
   146 #elif defined (Q_OS_WIN)
   191 #elif defined (Q_OS_WIN)
   147     _asm {
   192     _asm {
   148         push eax
   193         push eax
   149         push ebx
   194         push ebx
   150         push ecx
   195         push ecx
   162         jz skip
   207         jz skip
   163 
   208 
   164         mov eax, 1
   209         mov eax, 1
   165         cpuid
   210         cpuid
   166         mov result, edx
   211         mov result, edx
       
   212         mov feature_result, ecx
   167     skip:
   213     skip:
   168         pop edx
   214         pop edx
   169         pop ecx
   215         pop ecx
   170         pop ebx
   216         pop ebx
   171         pop eax
   217         pop eax
   200         pop ecx
   246         pop ecx
   201         pop ebx
   247         pop ebx
   202         pop eax
   248         pop eax
   203     }
   249     }
   204 #endif
   250 #endif
       
   251 
   205 
   252 
   206     // result now contains the standard feature bits
   253     // result now contains the standard feature bits
   207     if (result & (1u << 15))
   254     if (result & (1u << 15))
   208         features |= CMOV;
   255         features |= CMOV;
   209     if (result & (1u << 23))
   256     if (result & (1u << 23))
   216         features |= MMX3DNOWEXT;
   263         features |= MMX3DNOWEXT;
   217     if (result & (1u << 25))
   264     if (result & (1u << 25))
   218         features |= SSE;
   265         features |= SSE;
   219     if (result & (1u << 26))
   266     if (result & (1u << 26))
   220         features |= SSE2;
   267         features |= SSE2;
   221     if (extended_result & (1u))
   268     if (feature_result & (1u))
   222         features |= SSE3;
   269         features |= SSE3;
   223     if (extended_result & (1u << 9))
   270     if (feature_result & (1u << 9))
   224         features |= SSSE3;
   271         features |= SSSE3;
   225     if (extended_result & (1u << 19))
   272     if (feature_result & (1u << 19))
   226         features |= SSE4_1;
   273         features |= SSE4_1;
   227     if (extended_result & (1u << 20))
   274     if (feature_result & (1u << 20))
   228         features |= SSE4_2;
   275         features |= SSE4_2;
   229     if (extended_result & (1u << 28))
   276     if (feature_result & (1u << 28))
   230         features |= AVX;
   277         features |= AVX;
   231 
   278 
   232 #endif // i386
   279     return features;
   233 
   280 }
   234 #if defined(QT_HAVE_MMX)
   281 
   235     if (qgetenv("QT_NO_MMX").toInt())
   282 #elif defined(__x86_64) || defined(Q_OS_WIN64)
   236         features ^= MMX;
   283 static inline uint detectProcessorFeatures()
   237 #endif
   284 {
   238     if (qgetenv("QT_NO_MMXEXT").toInt())
   285     uint features = MMX|SSE|SSE2|CMOV;
   239         features ^= MMXEXT;
   286     uint feature_result = 0;
   240 
   287 
   241 #if defined(QT_HAVE_3DNOW)
   288 #if defined(Q_CC_GNU)
   242     if (qgetenv("QT_NO_3DNOW").toInt())
   289     asm ("cpuid"
   243         features ^= MMX3DNOW;
   290         : "=c" (feature_result)
   244 #endif
   291         : "a" (1)
   245     if (qgetenv("QT_NO_3DNOWEXT").toInt())
   292         : "%ebx", "%edx"
   246         features ^= MMX3DNOWEXT;
   293         );
   247 
   294 #elif defined (Q_OS_WIN64)
   248 #if defined(QT_HAVE_SSE)
   295     {
   249     if (qgetenv("QT_NO_SSE").toInt())
   296        int info[4];
   250         features ^= SSE;
   297        __cpuid(info, 1);
   251 #endif
   298        feature_result = info[2];
   252 #if defined(QT_HAVE_SSE2)
   299     }
   253     if (qgetenv("QT_NO_SSE2").toInt())
   300 #endif
   254         features ^= SSE2;
   301 
   255 #endif
   302     if (feature_result & (1u))
   256 
   303         features |= SSE3;
   257     return features;
   304     if (feature_result & (1u << 9))
   258 #endif
   305         features |= SSSE3;
       
   306     if (feature_result & (1u << 19))
       
   307         features |= SSE4_1;
       
   308     if (feature_result & (1u << 20))
       
   309         features |= SSE4_2;
       
   310     if (feature_result & (1u << 28))
       
   311         features |= AVX;
       
   312 
       
   313     return features;
       
   314 }
       
   315 
       
   316 #elif defined(__ia64__)
       
   317 static inline uint detectProcessorFeatures()
       
   318 {
       
   319     return MMX|SSE|SSE2;
       
   320 }
       
   321 
       
   322 #else
       
   323 static inline uint detectProcessorFeatures()
       
   324 {
       
   325     return 0;
       
   326 }
       
   327 #endif
       
   328 
       
   329 /*
       
   330  * Use kdesdk/scripts/generate_string_table.pl to update the table below.
       
   331  * Here's the data (don't forget the ONE leading space):
       
   332  mmx
       
   333  mmxext
       
   334  mmx3dnow
       
   335  mmx3dnowext
       
   336  sse
       
   337  sse2
       
   338  cmov
       
   339  iwmmxt
       
   340  neon
       
   341  sse3
       
   342  ssse3
       
   343  sse4.1
       
   344  sse4.2
       
   345  avx
       
   346   */
       
   347 
       
   348 // begin generated
       
   349 static const char features_string[] =
       
   350     " mmx\0"
       
   351     " mmxext\0"
       
   352     " mmx3dnow\0"
       
   353     " mmx3dnowext\0"
       
   354     " sse\0"
       
   355     " sse2\0"
       
   356     " cmov\0"
       
   357     " iwmmxt\0"
       
   358     " neon\0"
       
   359     " sse3\0"
       
   360     " ssse3\0"
       
   361     " sse4.1\0"
       
   362     " sse4.2\0"
       
   363     " avx\0"
       
   364     "\0";
       
   365 
       
   366 static const int features_indices[] = {
       
   367        0,    5,   13,   23,   36,   41,   47,   53,
       
   368       61,   67,   73,   80,   88,   96,   -1
       
   369 };
       
   370 // end generated
       
   371 
       
   372 const int features_count = (sizeof features_indices - 1) / (sizeof features_indices[0]);
       
   373 
       
   374 uint qDetectCPUFeatures()
       
   375 {
       
   376     static QBasicAtomicInt features = Q_BASIC_ATOMIC_INITIALIZER(-1);
       
   377     if (features != -1)
       
   378         return features;
       
   379 
       
   380     uint f = detectProcessorFeatures();
       
   381     QByteArray disable = qgetenv("QT_NO_CPU_FEATURE");
       
   382     if (!disable.isEmpty()) {
       
   383         disable.prepend(' ');
       
   384         for (int i = 0; i < features_count; ++i) {
       
   385             if (disable.contains(features_string + features_indices[i]))
       
   386                 f &= ~(1 << i);
       
   387         }
       
   388     }
       
   389 
       
   390     features = f;
       
   391     return features;
       
   392 }
       
   393 
       
   394 void qDumpCPUFeatures()
       
   395 {
       
   396     uint features = qDetectCPUFeatures();
       
   397     printf("Processor features: ");
       
   398     for (int i = 0; i < features_count; ++i) {
       
   399         if (features & (1 << i))
       
   400             printf("%s", features_string + features_indices[i]);
       
   401     }
       
   402     puts("");
   259 }
   403 }
   260 
   404 
   261 QT_END_NAMESPACE
   405 QT_END_NAMESPACE