diff -r ef0373b55136 -r 758a864f9613 src/corelib/tools/qsimd.cpp --- a/src/corelib/tools/qsimd.cpp Fri Sep 17 08:34:18 2010 +0300 +++ b/src/corelib/tools/qsimd.cpp Mon Oct 04 01:19:32 2010 +0300 @@ -41,20 +41,42 @@ #include "qsimd_p.h" #include +#include #if defined(Q_OS_WINCE) #include #endif +#if defined(Q_OS_WIN64) && !defined(Q_CC_GNU) +#include +#endif + +#if defined(Q_OS_LINUX) && defined(__arm__) +#include "private/qcore_unix_p.h" + +// the kernel header definitions for HWCAP_* +// (the ones we need/may need anyway) + +// copied from (ARM) +#define HWCAP_IWMMXT 512 +#define HWCAP_CRUNCH 1024 +#define HWCAP_THUMBEE 2048 +#define HWCAP_NEON 4096 +#define HWCAP_VFPv3 8192 +#define HWCAP_VFPv3D16 16384 + +// copied from +#define AT_HWCAP 16 /* arch dependent hints at CPU capabilities */ + +#endif + QT_BEGIN_NAMESPACE -uint qDetectCPUFeatures() +#if defined (Q_OS_WINCE) +static inline uint detectProcessorFeatures() { - static uint features = 0xffffffff; - if (features != 0xffffffff) - return features; + uint features = 0; -#if defined (Q_OS_WINCE) #if defined (ARM) if (IsProcessorFeaturePresent(PF_ARM_INTEL_WMMX)) { features = IWMMXT; @@ -74,75 +96,98 @@ #endif features = 0; return features; -#elif defined(QT_HAVE_IWMMXT) +} + +#elif defined(__arm__) || defined(__arm) || defined(QT_HAVE_IWMMXT) || defined(QT_HAVE_NEON) +static inline uint detectProcessorFeatures() +{ + uint features = 0; + +#if defined(Q_OS_LINUX) + int auxv = ::qt_safe_open("/proc/self/auxv", O_RDONLY); + if (auxv != -1) { + unsigned long vector[64]; + int nread; + while (features == 0) { + nread = ::qt_safe_read(auxv, (char *)vector, sizeof vector); + if (nread <= 0) { + // EOF or error + break; + } + + int max = nread / (sizeof vector[0]); + for (int i = 0; i < max; i += 2) + if (vector[i] == AT_HWCAP) { + if (vector[i+1] & HWCAP_IWMMXT) + features |= IWMMXT; + if (vector[i+1] & HWCAP_NEON) + features |= NEON; + break; + } + } + + ::qt_safe_close(auxv); + return features; + } + // fall back if /proc/self/auxv wasn't found +#endif + +#if defined(QT_HAVE_IWMMXT) // runtime detection only available when running as a previlegied process - static const bool doIWMMXT = !qgetenv("QT_NO_IWMMXT").toInt(); - features = doIWMMXT ? IWMMXT : 0; - return features; + features = IWMMXT; #elif defined(QT_HAVE_NEON) - static const bool doNEON = !qgetenv("QT_NO_NEON").toInt(); - features = doNEON ? NEON : 0; + features = NEON; +#endif + return features; -#else - features = 0; -#if defined(__x86_64__) || defined(Q_OS_WIN64) - features = MMX|SSE|SSE2|CMOV; -#elif defined(__ia64__) - features = MMX|SSE|SSE2; +} + #elif defined(__i386__) || defined(_M_IX86) +static inline uint detectProcessorFeatures() +{ + uint features = 0; + unsigned int extended_result = 0; + unsigned int feature_result = 0; uint result = 0; /* see p. 118 of amd64 instruction set manual Vol3 */ #if defined(Q_CC_GNU) - asm ("push %%ebx\n" - "pushf\n" - "pop %%eax\n" - "mov %%eax, %%ebx\n" - "xor $0x00200000, %%eax\n" - "push %%eax\n" + long cpuid_supported, tmp1; + asm ("pushf\n" + "pop %0\n" + "mov %0, %1\n" + "xor $0x00200000, %0\n" + "push %0\n" "popf\n" "pushf\n" - "pop %%eax\n" - "xor %%edx, %%edx\n" - "xor %%ebx, %%eax\n" - "jz 1f\n" - - "mov $0x00000001, %%eax\n" - "cpuid\n" - "1:\n" - "pop %%ebx\n" - "mov %%edx, %0\n" - : "=r" (result) - : - : "%eax", "%ecx", "%edx" - ); + "pop %0\n" + "xor %1, %0\n" // %eax is now 0 if CPUID is not supported + : "=a" (cpuid_supported), "=r" (tmp1) + ); + if (cpuid_supported) { + asm ("xchg %%ebx, %2\n" + "cpuid\n" + "xchg %%ebx, %2\n" + : "=c" (feature_result), "=d" (result), "=&r" (tmp1) + : "a" (1)); - asm ("push %%ebx\n" - "pushf\n" - "pop %%eax\n" - "mov %%eax, %%ebx\n" - "xor $0x00200000, %%eax\n" - "push %%eax\n" - "popf\n" - "pushf\n" - "pop %%eax\n" - "xor %%edx, %%edx\n" - "xor %%ebx, %%eax\n" - "jz 2f\n" + asm ("xchg %%ebx, %1\n" + "cpuid\n" + "cmp $0x80000000, %%eax\n" + "jnbe 1f\n" + "xor %0, %0\n" + "jmp 2f\n" + "1:\n" + "mov $0x80000001, %%eax\n" + "cpuid\n" + "2:\n" + "xchg %%ebx, %1\n" + : "=d" (extended_result), "=&r" (tmp1) + : "a" (0x80000000) + : "%ecx" + ); + } - "mov $0x80000000, %%eax\n" - "cpuid\n" - "cmp $0x80000000, %%eax\n" - "jbe 2f\n" - "mov $0x80000001, %%eax\n" - "cpuid\n" - "2:\n" - "pop %%ebx\n" - "mov %%edx, %0\n" - : "=r" (extended_result) - : - : "%eax", "%ecx", "%edx" - ); #elif defined (Q_OS_WIN) _asm { push eax @@ -164,6 +209,7 @@ mov eax, 1 cpuid mov result, edx + mov feature_result, ecx skip: pop edx pop ecx @@ -203,6 +249,7 @@ } #endif + // result now contains the standard feature bits if (result & (1u << 15)) features |= CMOV; @@ -218,44 +265,141 @@ features |= SSE; if (result & (1u << 26)) features |= SSE2; - if (extended_result & (1u)) + if (feature_result & (1u)) features |= SSE3; - if (extended_result & (1u << 9)) + if (feature_result & (1u << 9)) features |= SSSE3; - if (extended_result & (1u << 19)) + if (feature_result & (1u << 19)) features |= SSE4_1; - if (extended_result & (1u << 20)) + if (feature_result & (1u << 20)) features |= SSE4_2; - if (extended_result & (1u << 28)) + if (feature_result & (1u << 28)) + features |= AVX; + + return features; +} + +#elif defined(__x86_64) || defined(Q_OS_WIN64) +static inline uint detectProcessorFeatures() +{ + uint features = MMX|SSE|SSE2|CMOV; + uint feature_result = 0; + +#if defined(Q_CC_GNU) + asm ("cpuid" + : "=c" (feature_result) + : "a" (1) + : "%ebx", "%edx" + ); +#elif defined (Q_OS_WIN64) + { + int info[4]; + __cpuid(info, 1); + feature_result = info[2]; + } +#endif + + if (feature_result & (1u)) + features |= SSE3; + if (feature_result & (1u << 9)) + features |= SSSE3; + if (feature_result & (1u << 19)) + features |= SSE4_1; + if (feature_result & (1u << 20)) + features |= SSE4_2; + if (feature_result & (1u << 28)) features |= AVX; -#endif // i386 - -#if defined(QT_HAVE_MMX) - if (qgetenv("QT_NO_MMX").toInt()) - features ^= MMX; -#endif - if (qgetenv("QT_NO_MMXEXT").toInt()) - features ^= MMXEXT; + return features; +} -#if defined(QT_HAVE_3DNOW) - if (qgetenv("QT_NO_3DNOW").toInt()) - features ^= MMX3DNOW; -#endif - if (qgetenv("QT_NO_3DNOWEXT").toInt()) - features ^= MMX3DNOWEXT; +#elif defined(__ia64__) +static inline uint detectProcessorFeatures() +{ + return MMX|SSE|SSE2; +} -#if defined(QT_HAVE_SSE) - if (qgetenv("QT_NO_SSE").toInt()) - features ^= SSE; -#endif -#if defined(QT_HAVE_SSE2) - if (qgetenv("QT_NO_SSE2").toInt()) - features ^= SSE2; +#else +static inline uint detectProcessorFeatures() +{ + return 0; +} #endif +/* + * Use kdesdk/scripts/generate_string_table.pl to update the table below. + * Here's the data (don't forget the ONE leading space): + mmx + mmxext + mmx3dnow + mmx3dnowext + sse + sse2 + cmov + iwmmxt + neon + sse3 + ssse3 + sse4.1 + sse4.2 + avx + */ + +// begin generated +static const char features_string[] = + " mmx\0" + " mmxext\0" + " mmx3dnow\0" + " mmx3dnowext\0" + " sse\0" + " sse2\0" + " cmov\0" + " iwmmxt\0" + " neon\0" + " sse3\0" + " ssse3\0" + " sse4.1\0" + " sse4.2\0" + " avx\0" + "\0"; + +static const int features_indices[] = { + 0, 5, 13, 23, 36, 41, 47, 53, + 61, 67, 73, 80, 88, 96, -1 +}; +// end generated + +const int features_count = (sizeof features_indices - 1) / (sizeof features_indices[0]); + +uint qDetectCPUFeatures() +{ + static QBasicAtomicInt features = Q_BASIC_ATOMIC_INITIALIZER(-1); + if (features != -1) + return features; + + uint f = detectProcessorFeatures(); + QByteArray disable = qgetenv("QT_NO_CPU_FEATURE"); + if (!disable.isEmpty()) { + disable.prepend(' '); + for (int i = 0; i < features_count; ++i) { + if (disable.contains(features_string + features_indices[i])) + f &= ~(1 << i); + } + } + + features = f; return features; -#endif +} + +void qDumpCPUFeatures() +{ + uint features = qDetectCPUFeatures(); + printf("Processor features: "); + for (int i = 0; i < features_count; ++i) { + if (features & (1 << i)) + printf("%s", features_string + features_indices[i]); + } + puts(""); } QT_END_NAMESPACE