|
1 /* |
|
2 * Copyright (c) 2003, 2006 Matteo Frigo |
|
3 * Copyright (c) 2003, 2006 Massachusetts Institute of Technology |
|
4 * |
|
5 * Permission is hereby granted, free of charge, to any person obtaining |
|
6 * a copy of this software and associated documentation files (the |
|
7 * "Software"), to deal in the Software without restriction, including |
|
8 * without limitation the rights to use, copy, modify, merge, publish, |
|
9 * distribute, sublicense, and/or sell copies of the Software, and to |
|
10 * permit persons to whom the Software is furnished to do so, subject to |
|
11 * the following conditions: |
|
12 * |
|
13 * The above copyright notice and this permission notice shall be |
|
14 * included in all copies or substantial portions of the Software. |
|
15 * |
|
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, |
|
17 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF |
|
18 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND |
|
19 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE |
|
20 * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION |
|
21 * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION |
|
22 * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. |
|
23 * |
|
24 */ |
|
25 |
|
26 /* $Id: cycle.h,v 1.52 2006-02-08 02:36:47 athena Exp $ */ |
|
27 |
|
28 /* machine-dependent cycle counters code. Needs to be inlined. */ |
|
29 |
|
30 /***************************************************************************/ |
|
31 /* To use the cycle counters in your code, simply #include "cycle.h" (this |
|
32 file), and then use the functions/macros: |
|
33 |
|
34 CycleCounterTicks getticks(void); |
|
35 |
|
36 CycleCounterTicks is an opaque typedef defined below, representing the current time. |
|
37 You extract the elapsed time between two calls to gettick() via: |
|
38 |
|
39 double elapsed(CycleCounterTicks t1, CycleCounterTicks t0); |
|
40 |
|
41 which returns a double-precision variable in arbitrary units. You |
|
42 are not expected to convert this into human units like seconds; it |
|
43 is intended only for *comparisons* of time intervals. |
|
44 |
|
45 (In order to use some of the OS-dependent timer routines like |
|
46 Solaris' gethrtime, you need to paste the autoconf snippet below |
|
47 into your configure.ac file and #include "config.h" before cycle.h, |
|
48 or define the relevant macros manually if you are not using autoconf.) |
|
49 */ |
|
50 |
|
51 /***************************************************************************/ |
|
52 /* This file uses macros like HAVE_GETHRTIME that are assumed to be |
|
53 defined according to whether the corresponding function/type/header |
|
54 is available on your system. The necessary macros are most |
|
55 conveniently defined if you are using GNU autoconf, via the tests: |
|
56 |
|
57 dnl --------------------------------------------------------------------- |
|
58 |
|
59 AC_C_INLINE |
|
60 AC_HEADER_TIME |
|
61 AC_CHECK_HEADERS([sys/time.h c_asm.h intrinsics.h mach/mach_time.h]) |
|
62 |
|
63 AC_CHECK_TYPE([hrtime_t],[AC_DEFINE(HAVE_HRTIME_T, 1, [Define to 1 if hrtime_t is defined in <sys/time.h>])],,[#if HAVE_SYS_TIME_H |
|
64 #include <sys/time.h> |
|
65 #endif]) |
|
66 |
|
67 AC_CHECK_FUNCS([gethrtime read_real_time time_base_to_time clock_gettime mach_absolute_time]) |
|
68 |
|
69 dnl Cray UNICOS _rtc() (real-time clock) intrinsic |
|
70 AC_MSG_CHECKING([for _rtc intrinsic]) |
|
71 rtc_ok=yes |
|
72 AC_TRY_LINK([#ifdef HAVE_INTRINSICS_H |
|
73 #include <intrinsics.h> |
|
74 #endif], [_rtc()], [AC_DEFINE(HAVE__RTC,1,[Define if you have the UNICOS _rtc() intrinsic.])], [rtc_ok=no]) |
|
75 AC_MSG_RESULT($rtc_ok) |
|
76 |
|
77 dnl --------------------------------------------------------------------- |
|
78 */ |
|
79 |
|
80 /***************************************************************************/ |
|
81 |
|
82 #ifndef QBENCHLIB_CYCLE_H |
|
83 #define QBENCHLIB_CYCLE_H |
|
84 |
|
85 #if TIME_WITH_SYS_TIME |
|
86 # include <sys/time.h> |
|
87 # include <time.h> |
|
88 #else |
|
89 # if HAVE_SYS_TIME_H |
|
90 # include <sys/time.h> |
|
91 # else |
|
92 # include <time.h> |
|
93 # endif |
|
94 #endif |
|
95 |
|
96 #define INLINE_ELAPSED(INL) static INL double elapsed(CycleCounterTicks t1, CycleCounterTicks t0) \ |
|
97 { \ |
|
98 return (double)(t1 - t0); \ |
|
99 } |
|
100 |
|
101 /*----------------------------------------------------------------*/ |
|
102 /* Solaris */ |
|
103 #if defined(HAVE_GETHRTIME) && defined(HAVE_HRTIME_T) && !defined(HAVE_TICK_COUNTER) |
|
104 typedef hrtime_t CycleCounterTicks; |
|
105 |
|
106 #define getticks gethrtime |
|
107 |
|
108 INLINE_ELAPSED(inline) |
|
109 |
|
110 #define HAVE_TICK_COUNTER |
|
111 #endif |
|
112 |
|
113 /*----------------------------------------------------------------*/ |
|
114 /* AIX v. 4+ routines to read the real-time clock or time-base register */ |
|
115 #if defined(HAVE_READ_REAL_TIME) && defined(HAVE_TIME_BASE_TO_TIME) && !defined(HAVE_TICK_COUNTER) |
|
116 typedef timebasestruct_t CycleCounterTicks; |
|
117 |
|
118 static inline CycleCounterTicks getticks(void) |
|
119 { |
|
120 CycleCounterTicks t; |
|
121 read_real_time(&t, TIMEBASE_SZ); |
|
122 return t; |
|
123 } |
|
124 |
|
125 static inline double elapsed(CycleCounterTicks t1, CycleCounterTicks t0) /* time in nanoseconds */ |
|
126 { |
|
127 time_base_to_time(&t1, TIMEBASE_SZ); |
|
128 time_base_to_time(&t0, TIMEBASE_SZ); |
|
129 return ((t1.tb_high - t0.tb_high) * 1e9 + (t1.tb_low - t0.tb_low)); |
|
130 } |
|
131 |
|
132 #define HAVE_TICK_COUNTER |
|
133 #endif |
|
134 |
|
135 /*----------------------------------------------------------------*/ |
|
136 /* |
|
137 * PowerPC ``cycle'' counter using the time base register. |
|
138 */ |
|
139 #if ((defined(__GNUC__) && (defined(__powerpc__) || defined(__ppc__))) || (defined(__MWERKS__) && defined(macintosh))) && !defined(HAVE_TICK_COUNTER) |
|
140 typedef unsigned long long CycleCounterTicks; |
|
141 |
|
142 static __inline__ CycleCounterTicks getticks(void) |
|
143 { |
|
144 unsigned int tbl, tbu0, tbu1; |
|
145 |
|
146 do { |
|
147 __asm__ __volatile__ ("mftbu %0" : "=r"(tbu0)); |
|
148 __asm__ __volatile__ ("mftb %0" : "=r"(tbl)); |
|
149 __asm__ __volatile__ ("mftbu %0" : "=r"(tbu1)); |
|
150 } while (tbu0 != tbu1); |
|
151 |
|
152 return (((unsigned long long)tbu0) << 32) | tbl; |
|
153 } |
|
154 |
|
155 INLINE_ELAPSED(__inline__) |
|
156 |
|
157 #define HAVE_TICK_COUNTER |
|
158 #endif |
|
159 |
|
160 /* MacOS/Mach (Darwin) time-base register interface (unlike UpTime, |
|
161 from Carbon, requires no additional libraries to be linked). */ |
|
162 #if defined(HAVE_MACH_ABSOLUTE_TIME) && defined(HAVE_MACH_MACH_TIME_H) && !defined(HAVE_TICK_COUNTER) |
|
163 #include <mach/mach_time.h> |
|
164 typedef uint64_t CycleCounterTicks; |
|
165 #define getticks mach_absolute_time |
|
166 INLINE_ELAPSED(__inline__) |
|
167 #define HAVE_TICK_COUNTER |
|
168 #endif |
|
169 |
|
170 /*----------------------------------------------------------------*/ |
|
171 /* |
|
172 * Pentium cycle counter |
|
173 */ |
|
174 #if (defined(__GNUC__) || defined(__ICC)) && defined(__i386__) && !defined(HAVE_TICK_COUNTER) |
|
175 typedef unsigned long long CycleCounterTicks; |
|
176 |
|
177 static __inline__ CycleCounterTicks getticks(void) |
|
178 { |
|
179 CycleCounterTicks ret; |
|
180 |
|
181 __asm__ __volatile__("rdtsc": "=A" (ret)); |
|
182 /* no input, nothing else clobbered */ |
|
183 return ret; |
|
184 } |
|
185 |
|
186 INLINE_ELAPSED(__inline__) |
|
187 |
|
188 #define HAVE_TICK_COUNTER |
|
189 #define TIME_MIN 5000.0 /* unreliable pentium IV cycle counter */ |
|
190 #endif |
|
191 |
|
192 /* Visual C++ -- thanks to Morten Nissov for his help with this */ |
|
193 #if defined(_MSC_VER) |
|
194 #if _MSC_VER >= 1200 && (_M_IX86 >= 500 || (defined(_WIN32_WCE) && defined(_X86_))) && !defined(HAVE_TICK_COUNTER) |
|
195 #include <windows.h> |
|
196 typedef LARGE_INTEGER CycleCounterTicks; |
|
197 #define RDTSC __asm __emit 0fh __asm __emit 031h /* hack for VC++ 5.0 */ |
|
198 |
|
199 static __inline CycleCounterTicks getticks(void) |
|
200 { |
|
201 CycleCounterTicks retval; |
|
202 |
|
203 __asm { |
|
204 RDTSC |
|
205 mov retval.HighPart, edx |
|
206 mov retval.LowPart, eax |
|
207 } |
|
208 return retval; |
|
209 } |
|
210 |
|
211 static __inline double elapsed(CycleCounterTicks t1, CycleCounterTicks t0) |
|
212 { |
|
213 return (double)(t1.QuadPart - t0.QuadPart); |
|
214 } |
|
215 |
|
216 #define HAVE_TICK_COUNTER |
|
217 #define TIME_MIN 5000.0 /* unreliable pentium IV cycle counter */ |
|
218 #endif |
|
219 #endif |
|
220 |
|
221 #if _MSC_VER >= 1400 && defined(_WIN32_WCE) && !defined(HAVE_TICK_COUNTER) |
|
222 #include <windows.h> |
|
223 typedef DWORD CycleCounterTicks; |
|
224 |
|
225 static __inline CycleCounterTicks getticks(void) |
|
226 { |
|
227 return GetTickCount(); |
|
228 } |
|
229 |
|
230 static __inline double elapsed(CycleCounterTicks t1, CycleCounterTicks t0) |
|
231 { |
|
232 return (double)(t1 - t0); |
|
233 } |
|
234 |
|
235 #define HAVE_TICK_COUNTER |
|
236 #define TIME_MIN 5000.0 |
|
237 #endif |
|
238 |
|
239 /*----------------------------------------------------------------*/ |
|
240 /* |
|
241 * X86-64 cycle counter |
|
242 */ |
|
243 #if (defined(__GNUC__) || defined(__ICC)) && defined(__x86_64__) && !defined(HAVE_TICK_COUNTER) |
|
244 typedef unsigned long long CycleCounterTicks; |
|
245 |
|
246 static __inline__ CycleCounterTicks getticks(void) |
|
247 { |
|
248 unsigned a, d; |
|
249 asm volatile("rdtsc" : "=a" (a), "=d" (d)); |
|
250 return ((CycleCounterTicks)a) | (((CycleCounterTicks)d) << 32); |
|
251 } |
|
252 |
|
253 INLINE_ELAPSED(__inline__) |
|
254 |
|
255 #define HAVE_TICK_COUNTER |
|
256 #endif |
|
257 |
|
258 /* PGI compiler, courtesy Cristiano Calonaci, Andrea Tarsi, & Roberto Gori. |
|
259 NOTE: this code will fail to link unless you use the -Masmkeyword compiler |
|
260 option (grrr). */ |
|
261 #if defined(__PGI) && defined(__x86_64__) && !defined(HAVE_TICK_COUNTER) |
|
262 typedef unsigned long long CycleCounterTicks; |
|
263 static CycleCounterTicks getticks(void) |
|
264 { |
|
265 asm(" rdtsc; shl $0x20,%rdx; mov %eax,%eax; or %rdx,%rax; "); |
|
266 } |
|
267 INLINE_ELAPSED(__inline__) |
|
268 #define HAVE_TICK_COUNTER |
|
269 #endif |
|
270 |
|
271 /* Visual C++ */ |
|
272 #if _MSC_VER >= 1400 && (defined(_M_AMD64) || defined(_M_X64)) && !defined(HAVE_TICK_COUNTER) |
|
273 #include <intrin.h> |
|
274 |
|
275 typedef unsigned __int64 CycleCounterTicks; |
|
276 |
|
277 #define getticks __rdtsc |
|
278 |
|
279 INLINE_ELAPSED(__inline) |
|
280 |
|
281 #define HAVE_TICK_COUNTER |
|
282 #endif |
|
283 |
|
284 /*----------------------------------------------------------------*/ |
|
285 /* |
|
286 * IA64 cycle counter |
|
287 */ |
|
288 |
|
289 /* intel's icc/ecc compiler */ |
|
290 #if (defined(__EDG_VERSION) || defined(__ECC)) && defined(__ia64__) && !defined(HAVE_TICK_COUNTER) |
|
291 typedef unsigned long CycleCounterTicks; |
|
292 #include <ia64intrin.h> |
|
293 |
|
294 static __inline__ CycleCounterTicks getticks(void) |
|
295 { |
|
296 return __getReg(_IA64_REG_AR_ITC); |
|
297 } |
|
298 |
|
299 INLINE_ELAPSED(__inline__) |
|
300 |
|
301 #define HAVE_TICK_COUNTER |
|
302 #endif |
|
303 |
|
304 /* gcc */ |
|
305 #if defined(__GNUC__) && defined(__ia64__) && !defined(HAVE_TICK_COUNTER) |
|
306 typedef unsigned long CycleCounterTicks; |
|
307 |
|
308 static __inline__ CycleCounterTicks getticks(void) |
|
309 { |
|
310 CycleCounterTicks ret; |
|
311 |
|
312 __asm__ __volatile__ ("mov %0=ar.itc" : "=r"(ret)); |
|
313 return ret; |
|
314 } |
|
315 |
|
316 INLINE_ELAPSED(__inline__) |
|
317 |
|
318 #define HAVE_TICK_COUNTER |
|
319 #endif |
|
320 |
|
321 /* HP/UX IA64 compiler, courtesy Teresa L. Johnson: */ |
|
322 #if defined(__hpux) && defined(__ia64) && !defined(HAVE_TICK_COUNTER) |
|
323 #include <machine/sys/inline.h> |
|
324 typedef unsigned long CycleCounterTicks; |
|
325 |
|
326 static inline CycleCounterTicks getticks(void) |
|
327 { |
|
328 CycleCounterTicks ret; |
|
329 |
|
330 ret = _Asm_mov_from_ar (_AREG_ITC); |
|
331 return ret; |
|
332 } |
|
333 |
|
334 INLINE_ELAPSED(inline) |
|
335 |
|
336 #define HAVE_TICK_COUNTER |
|
337 #endif |
|
338 |
|
339 /* Microsoft Visual C++ */ |
|
340 #if defined(_MSC_VER) && defined(_M_IA64) && !defined(HAVE_TICK_COUNTER) |
|
341 typedef unsigned __int64 CycleCounterTicks; |
|
342 |
|
343 # ifdef __cplusplus |
|
344 extern "C" |
|
345 # endif |
|
346 ticks __getReg(int whichReg); |
|
347 #pragma intrinsic(__getReg) |
|
348 |
|
349 static __inline CycleCounterTicks getticks(void) |
|
350 { |
|
351 volatile CycleCounterTicks temp; |
|
352 temp = __getReg(3116); |
|
353 return temp; |
|
354 } |
|
355 |
|
356 #define HAVE_TICK_COUNTER |
|
357 #endif |
|
358 |
|
359 /*----------------------------------------------------------------*/ |
|
360 /* |
|
361 * PA-RISC cycle counter |
|
362 */ |
|
363 #if (defined(__hppa__) || defined(__hppa)) && !defined(HAVE_TICK_COUNTER) |
|
364 typedef unsigned long CycleCounterTicks; |
|
365 |
|
366 # ifdef __GNUC__ |
|
367 static __inline__ CycleCounterTicks getticks(void) |
|
368 { |
|
369 CycleCounterTicks ret; |
|
370 |
|
371 __asm__ __volatile__("mfctl 16, %0": "=r" (ret)); |
|
372 /* no input, nothing else clobbered */ |
|
373 return ret; |
|
374 } |
|
375 |
|
376 INLINE_ELAPSED(inline) |
|
377 |
|
378 #define HAVE_TICK_COUNTER |
|
379 |
|
380 # elif 0 // Doesn't compile |
|
381 # include <machine/inline.h> |
|
382 static inline unsigned long getticks(void) |
|
383 { |
|
384 register CycleCounterTicks ret; |
|
385 _MFCTL(16, ret); |
|
386 return ret; |
|
387 } |
|
388 # endif |
|
389 |
|
390 #endif |
|
391 |
|
392 /*----------------------------------------------------------------*/ |
|
393 /* S390, courtesy of James Treacy */ |
|
394 #if defined(__GNUC__) && defined(__s390__) && !defined(HAVE_TICK_COUNTER) |
|
395 typedef unsigned long long CycleCounterTicks; |
|
396 |
|
397 static __inline__ CycleCounterTicks getticks(void) |
|
398 { |
|
399 CycleCounterTicks cycles; |
|
400 __asm__("stck 0(%0)" : : "a" (&(cycles)) : "memory", "cc"); |
|
401 return cycles; |
|
402 } |
|
403 |
|
404 INLINE_ELAPSED(__inline__) |
|
405 |
|
406 #define HAVE_TICK_COUNTER |
|
407 #endif |
|
408 /*----------------------------------------------------------------*/ |
|
409 #if defined(__GNUC__) && defined(__alpha__) && !defined(HAVE_TICK_COUNTER) |
|
410 /* |
|
411 * The 32-bit cycle counter on alpha overflows pretty quickly, |
|
412 * unfortunately. A 1GHz machine overflows in 4 seconds. |
|
413 */ |
|
414 typedef unsigned int CycleCounterTicks; |
|
415 |
|
416 static __inline__ CycleCounterTicks getticks(void) |
|
417 { |
|
418 unsigned long cc; |
|
419 __asm__ __volatile__ ("rpcc %0" : "=r"(cc)); |
|
420 return (cc & 0xFFFFFFFF); |
|
421 } |
|
422 |
|
423 INLINE_ELAPSED(__inline__) |
|
424 |
|
425 #define HAVE_TICK_COUNTER |
|
426 #endif |
|
427 |
|
428 /*----------------------------------------------------------------*/ |
|
429 #if defined(__GNUC__) && defined(__sparc_v9__) && !defined(HAVE_TICK_COUNTER) |
|
430 typedef unsigned long CycleCounterTicks; |
|
431 |
|
432 static __inline__ CycleCounterTicks getticks(void) |
|
433 { |
|
434 CycleCounterTicks ret; |
|
435 __asm__ __volatile__("rd %%tick, %0" : "=r" (ret)); |
|
436 return ret; |
|
437 } |
|
438 |
|
439 INLINE_ELAPSED(__inline__) |
|
440 |
|
441 #define HAVE_TICK_COUNTER |
|
442 #endif |
|
443 |
|
444 /*----------------------------------------------------------------*/ |
|
445 #if (defined(__DECC) || defined(__DECCXX)) && defined(__alpha) && defined(HAVE_C_ASM_H) && !defined(HAVE_TICK_COUNTER) |
|
446 # include <c_asm.h> |
|
447 typedef unsigned int CycleCounterTicks; |
|
448 |
|
449 static __inline CycleCounterTicks getticks(void) |
|
450 { |
|
451 unsigned long cc; |
|
452 cc = asm("rpcc %v0"); |
|
453 return (cc & 0xFFFFFFFF); |
|
454 } |
|
455 |
|
456 INLINE_ELAPSED(__inline) |
|
457 |
|
458 #define HAVE_TICK_COUNTER |
|
459 #endif |
|
460 /*----------------------------------------------------------------*/ |
|
461 /* SGI/Irix */ |
|
462 #if defined(HAVE_CLOCK_GETTIME) && defined(CLOCK_SGI_CYCLE) && !defined(HAVE_TICK_COUNTER) |
|
463 typedef struct timespec CycleCounterTicks; |
|
464 |
|
465 static inline CycleCounterTicks getticks(void) |
|
466 { |
|
467 struct timespec t; |
|
468 clock_gettime(CLOCK_SGI_CYCLE, &t); |
|
469 return t; |
|
470 } |
|
471 |
|
472 static inline double elapsed(CycleCounterTicks t1, CycleCounterTicks t0) |
|
473 { |
|
474 return (double)(t1.tv_sec - t0.tv_sec) * 1.0E9 + |
|
475 (double)(t1.tv_nsec - t0.tv_nsec); |
|
476 } |
|
477 #define HAVE_TICK_COUNTER |
|
478 #endif |
|
479 |
|
480 /*----------------------------------------------------------------*/ |
|
481 /* Cray UNICOS _rtc() intrinsic function */ |
|
482 #if defined(HAVE__RTC) && !defined(HAVE_TICK_COUNTER) |
|
483 #ifdef HAVE_INTRINSICS_H |
|
484 # include <intrinsics.h> |
|
485 #endif |
|
486 |
|
487 typedef long long CycleCounterTicks; |
|
488 |
|
489 #define getticks _rtc |
|
490 |
|
491 INLINE_ELAPSED(inline) |
|
492 |
|
493 #define HAVE_TICK_COUNTER |
|
494 #endif |
|
495 |
|
496 /*----------------------------------------------------------------*/ |
|
497 /* Symbian */ |
|
498 #if defined(__SYMBIAN32__) && !defined(HAVE_TICK_COUNTER) |
|
499 #include <e32std.h> |
|
500 |
|
501 typedef TUint32 CycleCounterTicks; |
|
502 |
|
503 static inline CycleCounterTicks getticks(void) |
|
504 { |
|
505 return User::FastCounter(); |
|
506 } |
|
507 |
|
508 INLINE_ELAPSED(inline) |
|
509 |
|
510 #define HAVE_TICK_COUNTER |
|
511 #endif |
|
512 |
|
513 #endif // QBENCHLIB_CYCLE_H |