![]() |
NFFT
3.3.1
|
00001 /* 00002 * Copyright (c) 2003, 2007-14 Matteo Frigo 00003 * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology 00004 * 00005 * Permission is hereby granted, free of charge, to any person obtaining 00006 * a copy of this software and associated documentation files (the 00007 * "Software"), to deal in the Software without restriction, including 00008 * without limitation the rights to use, copy, modify, merge, publish, 00009 * distribute, sublicense, and/or sell copies of the Software, and to 00010 * permit persons to whom the Software is furnished to do so, subject to 00011 * the following conditions: 00012 * 00013 * The above copyright notice and this permission notice shall be 00014 * included in all copies or substantial portions of the Software. 00015 * 00016 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 00017 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 00018 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 00019 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE 00020 * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION 00021 * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION 00022 * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 00023 * 00024 */ 00025 00026 00027 /* machine-dependent cycle counters code. Needs to be inlined. */ 00028 00029 /***************************************************************************/ 00030 /* To use the cycle counters in your code, simply #include "cycle.h" (this 00031 file), and then use the functions/macros: 00032 00033 ticks getticks(void); 00034 00035 ticks is an opaque typedef defined below, representing the current time. 00036 You extract the elapsed time between two calls to gettick() via: 00037 00038 double elapsed(ticks t1, ticks t0); 00039 00040 which returns a double-precision variable in arbitrary units. You 00041 are not expected to convert this into human units like seconds; it 00042 is intended only for *comparisons* of time intervals. 00043 00044 (In order to use some of the OS-dependent timer routines like 00045 Solaris' gethrtime, you need to paste the autoconf snippet below 00046 into your configure.ac file and #include "config.h" before cycle.h, 00047 or define the relevant macros manually if you are not using autoconf.) 00048 */ 00049 00050 /***************************************************************************/ 00051 /* This file uses macros like HAVE_GETHRTIME that are assumed to be 00052 defined according to whether the corresponding function/type/header 00053 is available on your system. The necessary macros are most 00054 conveniently defined if you are using GNU autoconf, via the tests: 00055 00056 dnl --------------------------------------------------------------------- 00057 00058 AC_C_INLINE 00059 AC_HEADER_TIME 00060 AC_CHECK_HEADERS([sys/time.h c_asm.h intrinsics.h mach/mach_time.h]) 00061 00062 AC_CHECK_TYPE([hrtime_t],[AC_DEFINE(HAVE_HRTIME_T, 1, [Define to 1 if hrtime_t is defined in <sys/time.h>])],,[#if HAVE_SYS_TIME_H 00063 #include <sys/time.h> 00064 #endif]) 00065 00066 AC_CHECK_FUNCS([gethrtime read_real_time time_base_to_time clock_gettime mach_absolute_time]) 00067 00068 dnl Cray UNICOS _rtc() (real-time clock) intrinsic 00069 AC_MSG_CHECKING([for _rtc intrinsic]) 00070 rtc_ok=yes 00071 AC_TRY_LINK([#ifdef HAVE_INTRINSICS_H 00072 #include <intrinsics.h> 00073 #endif], [_rtc()], [AC_DEFINE(HAVE__RTC,1,[Define if you have the UNICOS _rtc() intrinsic.])], [rtc_ok=no]) 00074 AC_MSG_RESULT($rtc_ok) 00075 00076 dnl --------------------------------------------------------------------- 00077 */ 00078 00079 /***************************************************************************/ 00080 00081 #if TIME_WITH_SYS_TIME 00082 # include <sys/time.h> 00083 # include <time.h> 00084 #else 00085 # if HAVE_SYS_TIME_H 00086 # include <sys/time.h> 00087 # else 00088 # include <time.h> 00089 # endif 00090 #endif 00091 00092 #define INLINE_ELAPSED(INL) static INL double elapsed(ticks t1, ticks t0) \ 00093 { \ 00094 return (double)t1 - (double)t0; \ 00095 } 00096 00097 /*----------------------------------------------------------------*/ 00098 /* Solaris */ 00099 #if defined(HAVE_GETHRTIME) && defined(HAVE_HRTIME_T) && !defined(HAVE_TICK_COUNTER) 00100 typedef hrtime_t ticks; 00101 00102 #define getticks gethrtime 00103 00104 INLINE_ELAPSED(inline) 00105 00106 #define HAVE_TICK_COUNTER 00107 #endif 00108 00109 /*----------------------------------------------------------------*/ 00110 /* AIX v. 4+ routines to read the real-time clock or time-base register */ 00111 #if defined(HAVE_READ_REAL_TIME) && defined(HAVE_TIME_BASE_TO_TIME) && !defined(HAVE_TICK_COUNTER) 00112 typedef timebasestruct_t ticks; 00113 00114 static __inline ticks getticks(void) 00115 { 00116 ticks t; 00117 read_real_time(&t, TIMEBASE_SZ); 00118 return t; 00119 } 00120 00121 static __inline double elapsed(ticks t1, ticks t0) /* time in nanoseconds */ 00122 { 00123 time_base_to_time(&t1, TIMEBASE_SZ); 00124 time_base_to_time(&t0, TIMEBASE_SZ); 00125 return (((double)t1.tb_high - (double)t0.tb_high) * 1.0e9 + 00126 ((double)t1.tb_low - (double)t0.tb_low)); 00127 } 00128 00129 #define HAVE_TICK_COUNTER 00130 #endif 00131 00132 /*----------------------------------------------------------------*/ 00133 /* 00134 * PowerPC ``cycle'' counter using the time base register. 00135 */ 00136 #if ((((defined(__GNUC__) && (defined(__powerpc__) || defined(__ppc__))) || (defined(__MWERKS__) && defined(macintosh)))) || (defined(__IBM_GCC_ASM) && (defined(__powerpc__) || defined(__ppc__)))) && !defined(HAVE_TICK_COUNTER) 00137 typedef unsigned long long ticks; 00138 00139 static __inline__ ticks getticks(void) 00140 { 00141 unsigned int tbl, tbu0, tbu1; 00142 00143 do { 00144 __asm__ __volatile__ ("mftbu %0" : "=r"(tbu0)); 00145 __asm__ __volatile__ ("mftb %0" : "=r"(tbl)); 00146 __asm__ __volatile__ ("mftbu %0" : "=r"(tbu1)); 00147 } while (tbu0 != tbu1); 00148 00149 return (((unsigned long long)tbu0) << 32) | tbl; 00150 } 00151 00152 INLINE_ELAPSED(__inline__) 00153 00154 #define HAVE_TICK_COUNTER 00155 #endif 00156 00157 /* MacOS/Mach (Darwin) time-base register interface (unlike UpTime, 00158 from Carbon, requires no additional libraries to be linked). */ 00159 #if defined(HAVE_MACH_ABSOLUTE_TIME) && defined(HAVE_MACH_MACH_TIME_H) && !defined(HAVE_TICK_COUNTER) 00160 #include <mach/mach_time.h> 00161 typedef uint64_t ticks; 00162 #define getticks mach_absolute_time 00163 INLINE_ELAPSED(__inline__) 00164 #define HAVE_TICK_COUNTER 00165 #endif 00166 00167 /*----------------------------------------------------------------*/ 00168 /* 00169 * Pentium cycle counter 00170 */ 00171 #if (defined(__GNUC__) || defined(__ICC)) && defined(__i386__) && !defined(HAVE_TICK_COUNTER) 00172 typedef unsigned long long ticks; 00173 00174 static __inline__ ticks getticks(void) 00175 { 00176 ticks ret; 00177 00178 __asm__ __volatile__("rdtsc": "=A" (ret)); 00179 /* no input, nothing else clobbered */ 00180 return ret; 00181 } 00182 00183 INLINE_ELAPSED(__inline__) 00184 00185 #define HAVE_TICK_COUNTER 00186 #define TIME_MIN 5000.0 /* unreliable pentium IV cycle counter */ 00187 #endif 00188 00189 /* Visual C++ -- thanks to Morten Nissov for his help with this */ 00190 #if defined(_MSC_VER) && _MSC_VER >= 1200 && _M_IX86 >= 500 && !defined(HAVE_TICK_COUNTER) 00191 #include <windows.h> 00192 typedef LARGE_INTEGER ticks; 00193 #define RDTSC __asm __emit 0fh __asm __emit 031h /* hack for VC++ 5.0 */ 00194 00195 static __inline ticks getticks(void) 00196 { 00197 ticks retval; 00198 00199 __asm { 00200 RDTSC 00201 mov retval.HighPart, edx 00202 mov retval.LowPart, eax 00203 } 00204 return retval; 00205 } 00206 00207 static __inline double elapsed(ticks t1, ticks t0) 00208 { 00209 return (double)t1.QuadPart - (double)t0.QuadPart; 00210 } 00211 00212 #define HAVE_TICK_COUNTER 00213 #define TIME_MIN 5000.0 /* unreliable pentium IV cycle counter */ 00214 #endif 00215 00216 /*----------------------------------------------------------------*/ 00217 /* 00218 * X86-64 cycle counter 00219 */ 00220 #if (defined(__GNUC__) || defined(__ICC) || defined(__SUNPRO_C)) && defined(__x86_64__) && !defined(HAVE_TICK_COUNTER) 00221 typedef unsigned long long ticks; 00222 00223 static __inline__ ticks getticks(void) 00224 { 00225 unsigned a, d; 00226 asm volatile("rdtsc" : "=a" (a), "=d" (d)); 00227 return ((ticks)a) | (((ticks)d) << 32); 00228 } 00229 00230 INLINE_ELAPSED(__inline__) 00231 00232 #define HAVE_TICK_COUNTER 00233 #define TIME_MIN 5000.0 00234 #endif 00235 00236 /* PGI compiler, courtesy Cristiano Calonaci, Andrea Tarsi, & Roberto Gori. 00237 NOTE: this code will fail to link unless you use the -Masmkeyword compiler 00238 option (grrr). */ 00239 #if defined(__PGI) && defined(__x86_64__) && !defined(HAVE_TICK_COUNTER) 00240 typedef unsigned long long ticks; 00241 static ticks getticks(void) 00242 { 00243 asm(" rdtsc; shl $0x20,%rdx; mov %eax,%eax; or %rdx,%rax; "); 00244 } 00245 INLINE_ELAPSED(__inline__) 00246 #define HAVE_TICK_COUNTER 00247 #define TIME_MIN 5000.0 00248 #endif 00249 00250 /* Visual C++, courtesy of Dirk Michaelis */ 00251 #if defined(_MSC_VER) && _MSC_VER >= 1400 && (defined(_M_AMD64) || defined(_M_X64)) && !defined(HAVE_TICK_COUNTER) 00252 00253 #include <intrin.h> 00254 #pragma intrinsic(__rdtsc) 00255 typedef unsigned __int64 ticks; 00256 #define getticks __rdtsc 00257 INLINE_ELAPSED(__inline) 00258 00259 #define HAVE_TICK_COUNTER 00260 #define TIME_MIN 5000.0 00261 #endif 00262 00263 /*----------------------------------------------------------------*/ 00264 /* 00265 * IA64 cycle counter 00266 */ 00267 00268 /* intel's icc/ecc compiler */ 00269 #if (defined(__EDG_VERSION) || defined(__ECC)) && defined(__ia64__) && !defined(HAVE_TICK_COUNTER) 00270 typedef unsigned long ticks; 00271 #include <ia64intrin.h> 00272 00273 static __inline__ ticks getticks(void) 00274 { 00275 return __getReg(_IA64_REG_AR_ITC); 00276 } 00277 00278 INLINE_ELAPSED(__inline__) 00279 00280 #define HAVE_TICK_COUNTER 00281 #endif 00282 00283 /* gcc */ 00284 #if defined(__GNUC__) && defined(__ia64__) && !defined(HAVE_TICK_COUNTER) 00285 typedef unsigned long ticks; 00286 00287 static __inline__ ticks getticks(void) 00288 { 00289 ticks ret; 00290 00291 __asm__ __volatile__ ("mov %0=ar.itc" : "=r"(ret)); 00292 return ret; 00293 } 00294 00295 INLINE_ELAPSED(__inline__) 00296 00297 #define HAVE_TICK_COUNTER 00298 #endif 00299 00300 /* HP/UX IA64 compiler, courtesy Teresa L. Johnson: */ 00301 #if defined(__hpux) && defined(__ia64) && !defined(HAVE_TICK_COUNTER) 00302 #include <machine/sys/inline.h> 00303 typedef unsigned long ticks; 00304 00305 static inline ticks getticks(void) 00306 { 00307 ticks ret; 00308 00309 ret = _Asm_mov_from_ar (_AREG_ITC); 00310 return ret; 00311 } 00312 00313 INLINE_ELAPSED(inline) 00314 00315 #define HAVE_TICK_COUNTER 00316 #endif 00317 00318 /* Microsoft Visual C++ */ 00319 #if defined(_MSC_VER) && defined(_M_IA64) && !defined(HAVE_TICK_COUNTER) 00320 typedef unsigned __int64 ticks; 00321 00322 # ifdef __cplusplus 00323 extern "C" 00324 # endif 00325 ticks __getReg(int whichReg); 00326 #pragma intrinsic(__getReg) 00327 00328 static __inline ticks getticks(void) 00329 { 00330 volatile ticks temp; 00331 temp = __getReg(3116); 00332 return temp; 00333 } 00334 00335 INLINE_ELAPSED(inline) 00336 00337 #define HAVE_TICK_COUNTER 00338 #endif 00339 00340 /*----------------------------------------------------------------*/ 00341 /* 00342 * PA-RISC cycle counter 00343 */ 00344 #if (defined(__hppa__) || defined(__hppa)) && !defined(HAVE_TICK_COUNTER) 00345 typedef unsigned long ticks; 00346 00347 # ifdef __GNUC__ 00348 static __inline__ ticks getticks(void) 00349 { 00350 ticks ret; 00351 00352 __asm__ __volatile__("mfctl 16, %0": "=r" (ret)); 00353 /* no input, nothing else clobbered */ 00354 return ret; 00355 } 00356 # else 00357 # include <machine/inline.h> 00358 static inline unsigned long getticks(void) 00359 { 00360 register ticks ret; 00361 _MFCTL(16, ret); 00362 return ret; 00363 } 00364 # endif 00365 00366 INLINE_ELAPSED(inline) 00367 00368 #define HAVE_TICK_COUNTER 00369 #endif 00370 00371 /*----------------------------------------------------------------*/ 00372 /* S390, courtesy of James Treacy */ 00373 #if defined(__GNUC__) && defined(__s390__) && !defined(HAVE_TICK_COUNTER) 00374 typedef unsigned long long ticks; 00375 00376 static __inline__ ticks getticks(void) 00377 { 00378 ticks cycles; 00379 __asm__("stck 0(%0)" : : "a" (&(cycles)) : "memory", "cc"); 00380 return cycles; 00381 } 00382 00383 INLINE_ELAPSED(__inline__) 00384 00385 #define HAVE_TICK_COUNTER 00386 #endif 00387 /*----------------------------------------------------------------*/ 00388 #if defined(__GNUC__) && defined(__alpha__) && !defined(HAVE_TICK_COUNTER) 00389 /* 00390 * The 32-bit cycle counter on alpha overflows pretty quickly, 00391 * unfortunately. A 1GHz machine overflows in 4 seconds. 00392 */ 00393 typedef unsigned int ticks; 00394 00395 static __inline__ ticks getticks(void) 00396 { 00397 unsigned long cc; 00398 __asm__ __volatile__ ("rpcc %0" : "=r"(cc)); 00399 return (cc & 0xFFFFFFFF); 00400 } 00401 00402 INLINE_ELAPSED(__inline__) 00403 00404 #define HAVE_TICK_COUNTER 00405 #endif 00406 00407 /*----------------------------------------------------------------*/ 00408 #if defined(__GNUC__) && defined(__sparc_v9__) && !defined(HAVE_TICK_COUNTER) 00409 typedef unsigned long ticks; 00410 00411 static __inline__ ticks getticks(void) 00412 { 00413 ticks ret; 00414 __asm__ __volatile__("rd %%tick, %0" : "=r" (ret)); 00415 return ret; 00416 } 00417 00418 INLINE_ELAPSED(__inline__) 00419 00420 #define HAVE_TICK_COUNTER 00421 #endif 00422 00423 /*----------------------------------------------------------------*/ 00424 #if (defined(__DECC) || defined(__DECCXX)) && defined(__alpha) && defined(HAVE_C_ASM_H) && !defined(HAVE_TICK_COUNTER) 00425 # include <c_asm.h> 00426 typedef unsigned int ticks; 00427 00428 static __inline ticks getticks(void) 00429 { 00430 unsigned long cc; 00431 cc = asm("rpcc %v0"); 00432 return (cc & 0xFFFFFFFF); 00433 } 00434 00435 INLINE_ELAPSED(__inline) 00436 00437 #define HAVE_TICK_COUNTER 00438 #endif 00439 /*----------------------------------------------------------------*/ 00440 /* SGI/Irix */ 00441 #if defined(HAVE_CLOCK_GETTIME) && defined(CLOCK_SGI_CYCLE) && !defined(HAVE_TICK_COUNTER) 00442 typedef struct timespec ticks; 00443 00444 static inline ticks getticks(void) 00445 { 00446 struct timespec t; 00447 clock_gettime(CLOCK_SGI_CYCLE, &t); 00448 return t; 00449 } 00450 00451 static inline double elapsed(ticks t1, ticks t0) 00452 { 00453 return ((double)t1.tv_sec - (double)t0.tv_sec) * 1.0E9 + 00454 ((double)t1.tv_nsec - (double)t0.tv_nsec); 00455 } 00456 #define HAVE_TICK_COUNTER 00457 #endif 00458 00459 /*----------------------------------------------------------------*/ 00460 /* Cray UNICOS _rtc() intrinsic function */ 00461 #if defined(HAVE__RTC) && !defined(HAVE_TICK_COUNTER) 00462 #ifdef HAVE_INTRINSICS_H 00463 # include <intrinsics.h> 00464 #endif 00465 00466 typedef long long ticks; 00467 00468 #define getticks _rtc 00469 00470 INLINE_ELAPSED(inline) 00471 00472 #define HAVE_TICK_COUNTER 00473 #endif 00474 00475 /*----------------------------------------------------------------*/ 00476 /* MIPS ZBus */ 00477 #if defined(HAVE_MIPS_ZBUS_TIMER) && HAVE_MIPS_ZBUS_TIMER 00478 #if defined(__mips__) && !defined(HAVE_TICK_COUNTER) 00479 #include <sys/mman.h> 00480 #include <unistd.h> 00481 #include <fcntl.h> 00482 00483 typedef uint64_t ticks; 00484 00485 static inline ticks getticks(void) 00486 { 00487 static uint64_t* addr = 0; 00488 00489 if (addr == 0) 00490 { 00491 uint32_t rq_addr = 0x10030000; 00492 int fd; 00493 int pgsize; 00494 00495 pgsize = getpagesize(); 00496 fd = open ("/dev/mem", O_RDONLY | O_SYNC, 0); 00497 if (fd < 0) { 00498 perror("open"); 00499 return NULL; 00500 } 00501 addr = mmap(0, pgsize, PROT_READ, MAP_SHARED, fd, rq_addr); 00502 close(fd); 00503 if (addr == (uint64_t *)-1) { 00504 perror("mmap"); 00505 return NULL; 00506 } 00507 } 00508 00509 return *addr; 00510 } 00511 00512 INLINE_ELAPSED(inline) 00513 00514 #define HAVE_TICK_COUNTER 00515 #endif 00516 #endif /* HAVE_MIPS_ZBUS_TIMER */ 00517