39 ** |
39 ** |
40 ****************************************************************************/ |
40 ****************************************************************************/ |
41 |
41 |
42 #include "qsimd_p.h" |
42 #include "qsimd_p.h" |
43 #include <QByteArray> |
43 #include <QByteArray> |
|
44 #include <stdio.h> |
44 |
45 |
45 #if defined(Q_OS_WINCE) |
46 #if defined(Q_OS_WINCE) |
46 #include <windows.h> |
47 #include <windows.h> |
47 #endif |
48 #endif |
48 |
49 |
|
50 #if defined(Q_OS_WIN64) && !defined(Q_CC_GNU) |
|
51 #include <intrin.h> |
|
52 #endif |
|
53 |
|
54 #if defined(Q_OS_LINUX) && defined(__arm__) |
|
55 #include "private/qcore_unix_p.h" |
|
56 |
|
57 // the kernel header definitions for HWCAP_* |
|
58 // (the ones we need/may need anyway) |
|
59 |
|
60 // copied from <asm/hwcap.h> (ARM) |
|
61 #define HWCAP_IWMMXT 512 |
|
62 #define HWCAP_CRUNCH 1024 |
|
63 #define HWCAP_THUMBEE 2048 |
|
64 #define HWCAP_NEON 4096 |
|
65 #define HWCAP_VFPv3 8192 |
|
66 #define HWCAP_VFPv3D16 16384 |
|
67 |
|
68 // copied from <linux/auxvec.h> |
|
69 #define AT_HWCAP 16 /* arch dependent hints at CPU capabilities */ |
|
70 |
|
71 #endif |
|
72 |
49 QT_BEGIN_NAMESPACE |
73 QT_BEGIN_NAMESPACE |
50 |
74 |
51 uint qDetectCPUFeatures() |
|
52 { |
|
53 static uint features = 0xffffffff; |
|
54 if (features != 0xffffffff) |
|
55 return features; |
|
56 |
|
57 #if defined (Q_OS_WINCE) |
75 #if defined (Q_OS_WINCE) |
|
76 static inline uint detectProcessorFeatures() |
|
77 { |
|
78 uint features = 0; |
|
79 |
58 #if defined (ARM) |
80 #if defined (ARM) |
59 if (IsProcessorFeaturePresent(PF_ARM_INTEL_WMMX)) { |
81 if (IsProcessorFeaturePresent(PF_ARM_INTEL_WMMX)) { |
60 features = IWMMXT; |
82 features = IWMMXT; |
61 return features; |
83 return features; |
62 } |
84 } |
72 #endif |
94 #endif |
73 return features; |
95 return features; |
74 #endif |
96 #endif |
75 features = 0; |
97 features = 0; |
76 return features; |
98 return features; |
77 #elif defined(QT_HAVE_IWMMXT) |
99 } |
|
100 |
|
101 #elif defined(__arm__) || defined(__arm) || defined(QT_HAVE_IWMMXT) || defined(QT_HAVE_NEON) |
|
102 static inline uint detectProcessorFeatures() |
|
103 { |
|
104 uint features = 0; |
|
105 |
|
106 #if defined(Q_OS_LINUX) |
|
107 int auxv = ::qt_safe_open("/proc/self/auxv", O_RDONLY); |
|
108 if (auxv != -1) { |
|
109 unsigned long vector[64]; |
|
110 int nread; |
|
111 while (features == 0) { |
|
112 nread = ::qt_safe_read(auxv, (char *)vector, sizeof vector); |
|
113 if (nread <= 0) { |
|
114 // EOF or error |
|
115 break; |
|
116 } |
|
117 |
|
118 int max = nread / (sizeof vector[0]); |
|
119 for (int i = 0; i < max; i += 2) |
|
120 if (vector[i] == AT_HWCAP) { |
|
121 if (vector[i+1] & HWCAP_IWMMXT) |
|
122 features |= IWMMXT; |
|
123 if (vector[i+1] & HWCAP_NEON) |
|
124 features |= NEON; |
|
125 break; |
|
126 } |
|
127 } |
|
128 |
|
129 ::qt_safe_close(auxv); |
|
130 return features; |
|
131 } |
|
132 // fall back if /proc/self/auxv wasn't found |
|
133 #endif |
|
134 |
|
135 #if defined(QT_HAVE_IWMMXT) |
78 // runtime detection only available when running as a previlegied process |
136 // runtime detection only available when running as a previlegied process |
79 static const bool doIWMMXT = !qgetenv("QT_NO_IWMMXT").toInt(); |
137 features = IWMMXT; |
80 features = doIWMMXT ? IWMMXT : 0; |
|
81 return features; |
|
82 #elif defined(QT_HAVE_NEON) |
138 #elif defined(QT_HAVE_NEON) |
83 static const bool doNEON = !qgetenv("QT_NO_NEON").toInt(); |
139 features = NEON; |
84 features = doNEON ? NEON : 0; |
140 #endif |
85 return features; |
141 |
86 #else |
142 return features; |
87 features = 0; |
143 } |
88 #if defined(__x86_64__) || defined(Q_OS_WIN64) |
144 |
89 features = MMX|SSE|SSE2|CMOV; |
|
90 #elif defined(__ia64__) |
|
91 features = MMX|SSE|SSE2; |
|
92 #elif defined(__i386__) || defined(_M_IX86) |
145 #elif defined(__i386__) || defined(_M_IX86) |
|
146 static inline uint detectProcessorFeatures() |
|
147 { |
|
148 uint features = 0; |
|
149 |
93 unsigned int extended_result = 0; |
150 unsigned int extended_result = 0; |
|
151 unsigned int feature_result = 0; |
94 uint result = 0; |
152 uint result = 0; |
95 /* see p. 118 of amd64 instruction set manual Vol3 */ |
153 /* see p. 118 of amd64 instruction set manual Vol3 */ |
96 #if defined(Q_CC_GNU) |
154 #if defined(Q_CC_GNU) |
97 asm ("push %%ebx\n" |
155 long cpuid_supported, tmp1; |
98 "pushf\n" |
156 asm ("pushf\n" |
99 "pop %%eax\n" |
157 "pop %0\n" |
100 "mov %%eax, %%ebx\n" |
158 "mov %0, %1\n" |
101 "xor $0x00200000, %%eax\n" |
159 "xor $0x00200000, %0\n" |
102 "push %%eax\n" |
160 "push %0\n" |
103 "popf\n" |
161 "popf\n" |
104 "pushf\n" |
162 "pushf\n" |
105 "pop %%eax\n" |
163 "pop %0\n" |
106 "xor %%edx, %%edx\n" |
164 "xor %1, %0\n" // %eax is now 0 if CPUID is not supported |
107 "xor %%ebx, %%eax\n" |
165 : "=a" (cpuid_supported), "=r" (tmp1) |
108 "jz 1f\n" |
166 ); |
109 |
167 if (cpuid_supported) { |
110 "mov $0x00000001, %%eax\n" |
168 asm ("xchg %%ebx, %2\n" |
111 "cpuid\n" |
169 "cpuid\n" |
112 "1:\n" |
170 "xchg %%ebx, %2\n" |
113 "pop %%ebx\n" |
171 : "=c" (feature_result), "=d" (result), "=&r" (tmp1) |
114 "mov %%edx, %0\n" |
172 : "a" (1)); |
115 : "=r" (result) |
173 |
116 : |
174 asm ("xchg %%ebx, %1\n" |
117 : "%eax", "%ecx", "%edx" |
175 "cpuid\n" |
118 ); |
176 "cmp $0x80000000, %%eax\n" |
119 |
177 "jnbe 1f\n" |
120 asm ("push %%ebx\n" |
178 "xor %0, %0\n" |
121 "pushf\n" |
179 "jmp 2f\n" |
122 "pop %%eax\n" |
180 "1:\n" |
123 "mov %%eax, %%ebx\n" |
181 "mov $0x80000001, %%eax\n" |
124 "xor $0x00200000, %%eax\n" |
182 "cpuid\n" |
125 "push %%eax\n" |
183 "2:\n" |
126 "popf\n" |
184 "xchg %%ebx, %1\n" |
127 "pushf\n" |
185 : "=d" (extended_result), "=&r" (tmp1) |
128 "pop %%eax\n" |
186 : "a" (0x80000000) |
129 "xor %%edx, %%edx\n" |
187 : "%ecx" |
130 "xor %%ebx, %%eax\n" |
188 ); |
131 "jz 2f\n" |
189 } |
132 |
190 |
133 "mov $0x80000000, %%eax\n" |
|
134 "cpuid\n" |
|
135 "cmp $0x80000000, %%eax\n" |
|
136 "jbe 2f\n" |
|
137 "mov $0x80000001, %%eax\n" |
|
138 "cpuid\n" |
|
139 "2:\n" |
|
140 "pop %%ebx\n" |
|
141 "mov %%edx, %0\n" |
|
142 : "=r" (extended_result) |
|
143 : |
|
144 : "%eax", "%ecx", "%edx" |
|
145 ); |
|
146 #elif defined (Q_OS_WIN) |
191 #elif defined (Q_OS_WIN) |
147 _asm { |
192 _asm { |
148 push eax |
193 push eax |
149 push ebx |
194 push ebx |
150 push ecx |
195 push ecx |
216 features |= MMX3DNOWEXT; |
263 features |= MMX3DNOWEXT; |
217 if (result & (1u << 25)) |
264 if (result & (1u << 25)) |
218 features |= SSE; |
265 features |= SSE; |
219 if (result & (1u << 26)) |
266 if (result & (1u << 26)) |
220 features |= SSE2; |
267 features |= SSE2; |
221 if (extended_result & (1u)) |
268 if (feature_result & (1u)) |
222 features |= SSE3; |
269 features |= SSE3; |
223 if (extended_result & (1u << 9)) |
270 if (feature_result & (1u << 9)) |
224 features |= SSSE3; |
271 features |= SSSE3; |
225 if (extended_result & (1u << 19)) |
272 if (feature_result & (1u << 19)) |
226 features |= SSE4_1; |
273 features |= SSE4_1; |
227 if (extended_result & (1u << 20)) |
274 if (feature_result & (1u << 20)) |
228 features |= SSE4_2; |
275 features |= SSE4_2; |
229 if (extended_result & (1u << 28)) |
276 if (feature_result & (1u << 28)) |
230 features |= AVX; |
277 features |= AVX; |
231 |
278 |
232 #endif // i386 |
279 return features; |
233 |
280 } |
234 #if defined(QT_HAVE_MMX) |
281 |
235 if (qgetenv("QT_NO_MMX").toInt()) |
282 #elif defined(__x86_64) || defined(Q_OS_WIN64) |
236 features ^= MMX; |
283 static inline uint detectProcessorFeatures() |
237 #endif |
284 { |
238 if (qgetenv("QT_NO_MMXEXT").toInt()) |
285 uint features = MMX|SSE|SSE2|CMOV; |
239 features ^= MMXEXT; |
286 uint feature_result = 0; |
240 |
287 |
241 #if defined(QT_HAVE_3DNOW) |
288 #if defined(Q_CC_GNU) |
242 if (qgetenv("QT_NO_3DNOW").toInt()) |
289 asm ("cpuid" |
243 features ^= MMX3DNOW; |
290 : "=c" (feature_result) |
244 #endif |
291 : "a" (1) |
245 if (qgetenv("QT_NO_3DNOWEXT").toInt()) |
292 : "%ebx", "%edx" |
246 features ^= MMX3DNOWEXT; |
293 ); |
247 |
294 #elif defined (Q_OS_WIN64) |
248 #if defined(QT_HAVE_SSE) |
295 { |
249 if (qgetenv("QT_NO_SSE").toInt()) |
296 int info[4]; |
250 features ^= SSE; |
297 __cpuid(info, 1); |
251 #endif |
298 feature_result = info[2]; |
252 #if defined(QT_HAVE_SSE2) |
299 } |
253 if (qgetenv("QT_NO_SSE2").toInt()) |
300 #endif |
254 features ^= SSE2; |
301 |
255 #endif |
302 if (feature_result & (1u)) |
256 |
303 features |= SSE3; |
257 return features; |
304 if (feature_result & (1u << 9)) |
258 #endif |
305 features |= SSSE3; |
|
306 if (feature_result & (1u << 19)) |
|
307 features |= SSE4_1; |
|
308 if (feature_result & (1u << 20)) |
|
309 features |= SSE4_2; |
|
310 if (feature_result & (1u << 28)) |
|
311 features |= AVX; |
|
312 |
|
313 return features; |
|
314 } |
|
315 |
|
316 #elif defined(__ia64__) |
|
317 static inline uint detectProcessorFeatures() |
|
318 { |
|
319 return MMX|SSE|SSE2; |
|
320 } |
|
321 |
|
322 #else |
|
323 static inline uint detectProcessorFeatures() |
|
324 { |
|
325 return 0; |
|
326 } |
|
327 #endif |
|
328 |
|
329 /* |
|
330 * Use kdesdk/scripts/generate_string_table.pl to update the table below. |
|
331 * Here's the data (don't forget the ONE leading space): |
|
332 mmx |
|
333 mmxext |
|
334 mmx3dnow |
|
335 mmx3dnowext |
|
336 sse |
|
337 sse2 |
|
338 cmov |
|
339 iwmmxt |
|
340 neon |
|
341 sse3 |
|
342 ssse3 |
|
343 sse4.1 |
|
344 sse4.2 |
|
345 avx |
|
346 */ |
|
347 |
|
348 // begin generated |
|
349 static const char features_string[] = |
|
350 " mmx\0" |
|
351 " mmxext\0" |
|
352 " mmx3dnow\0" |
|
353 " mmx3dnowext\0" |
|
354 " sse\0" |
|
355 " sse2\0" |
|
356 " cmov\0" |
|
357 " iwmmxt\0" |
|
358 " neon\0" |
|
359 " sse3\0" |
|
360 " ssse3\0" |
|
361 " sse4.1\0" |
|
362 " sse4.2\0" |
|
363 " avx\0" |
|
364 "\0"; |
|
365 |
|
366 static const int features_indices[] = { |
|
367 0, 5, 13, 23, 36, 41, 47, 53, |
|
368 61, 67, 73, 80, 88, 96, -1 |
|
369 }; |
|
370 // end generated |
|
371 |
|
372 const int features_count = (sizeof features_indices - 1) / (sizeof features_indices[0]); |
|
373 |
|
374 uint qDetectCPUFeatures() |
|
375 { |
|
376 static QBasicAtomicInt features = Q_BASIC_ATOMIC_INITIALIZER(-1); |
|
377 if (features != -1) |
|
378 return features; |
|
379 |
|
380 uint f = detectProcessorFeatures(); |
|
381 QByteArray disable = qgetenv("QT_NO_CPU_FEATURE"); |
|
382 if (!disable.isEmpty()) { |
|
383 disable.prepend(' '); |
|
384 for (int i = 0; i < features_count; ++i) { |
|
385 if (disable.contains(features_string + features_indices[i])) |
|
386 f &= ~(1 << i); |
|
387 } |
|
388 } |
|
389 |
|
390 features = f; |
|
391 return features; |
|
392 } |
|
393 |
|
394 void qDumpCPUFeatures() |
|
395 { |
|
396 uint features = qDetectCPUFeatures(); |
|
397 printf("Processor features: "); |
|
398 for (int i = 0; i < features_count; ++i) { |
|
399 if (features & (1 << i)) |
|
400 printf("%s", features_string + features_indices[i]); |
|
401 } |
|
402 puts(""); |
259 } |
403 } |
260 |
404 |
261 QT_END_NAMESPACE |
405 QT_END_NAMESPACE |