NFFT  3.3.1
cycle.h
00001 /*
00002  * Copyright (c) 2003, 2007-14 Matteo Frigo
00003  * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology
00004  *
00005  * Permission is hereby granted, free of charge, to any person obtaining
00006  * a copy of this software and associated documentation files (the
00007  * "Software"), to deal in the Software without restriction, including
00008  * without limitation the rights to use, copy, modify, merge, publish,
00009  * distribute, sublicense, and/or sell copies of the Software, and to
00010  * permit persons to whom the Software is furnished to do so, subject to
00011  * the following conditions:
00012  *
00013  * The above copyright notice and this permission notice shall be
00014  * included in all copies or substantial portions of the Software.
00015  *
00016  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
00017  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
00018  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
00019  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
00020  * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
00021  * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
00022  * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
00023  *
00024  */
00025 
00026 
00027 /* machine-dependent cycle counters code. Needs to be inlined. */
00028 
00029 /***************************************************************************/
00030 /* To use the cycle counters in your code, simply #include "cycle.h" (this
00031    file), and then use the functions/macros:
00032 
00033                  ticks getticks(void);
00034 
00035    ticks is an opaque typedef defined below, representing the current time.
00036    You extract the elapsed time between two calls to gettick() via:
00037 
00038                  double elapsed(ticks t1, ticks t0);
00039 
00040    which returns a double-precision variable in arbitrary units.  You
00041    are not expected to convert this into human units like seconds; it
00042    is intended only for *comparisons* of time intervals.
00043 
00044    (In order to use some of the OS-dependent timer routines like
00045    Solaris' gethrtime, you need to paste the autoconf snippet below
00046    into your configure.ac file and #include "config.h" before cycle.h,
00047    or define the relevant macros manually if you are not using autoconf.)
00048 */
00049 
00050 /***************************************************************************/
00051 /* This file uses macros like HAVE_GETHRTIME that are assumed to be
00052    defined according to whether the corresponding function/type/header
00053    is available on your system.  The necessary macros are most
00054    conveniently defined if you are using GNU autoconf, via the tests:
00055 
00056    dnl ---------------------------------------------------------------------
00057 
00058    AC_C_INLINE
00059    AC_HEADER_TIME
00060    AC_CHECK_HEADERS([sys/time.h c_asm.h intrinsics.h mach/mach_time.h])
00061 
00062    AC_CHECK_TYPE([hrtime_t],[AC_DEFINE(HAVE_HRTIME_T, 1, [Define to 1 if hrtime_t is defined in <sys/time.h>])],,[#if HAVE_SYS_TIME_H
00063 #include <sys/time.h>
00064 #endif])
00065 
00066    AC_CHECK_FUNCS([gethrtime read_real_time time_base_to_time clock_gettime mach_absolute_time])
00067 
00068    dnl Cray UNICOS _rtc() (real-time clock) intrinsic
00069    AC_MSG_CHECKING([for _rtc intrinsic])
00070    rtc_ok=yes
00071    AC_TRY_LINK([#ifdef HAVE_INTRINSICS_H
00072 #include <intrinsics.h>
00073 #endif], [_rtc()], [AC_DEFINE(HAVE__RTC,1,[Define if you have the UNICOS _rtc() intrinsic.])], [rtc_ok=no])
00074    AC_MSG_RESULT($rtc_ok)
00075 
00076    dnl ---------------------------------------------------------------------
00077 */
00078 
00079 /***************************************************************************/
00080 
00081 #if TIME_WITH_SYS_TIME
00082 # include <sys/time.h>
00083 # include <time.h>
00084 #else
00085 # if HAVE_SYS_TIME_H
00086 #  include <sys/time.h>
00087 # else
00088 #  include <time.h>
00089 # endif
00090 #endif
00091 
00092 #define INLINE_ELAPSED(INL) static INL double elapsed(ticks t1, ticks t0) \
00093 {                                     \
00094      return (double)t1 - (double)t0;                      \
00095 }
00096 
00097 /*----------------------------------------------------------------*/
00098 /* Solaris */
00099 #if defined(HAVE_GETHRTIME) && defined(HAVE_HRTIME_T) && !defined(HAVE_TICK_COUNTER)
00100 typedef hrtime_t ticks;
00101 
00102 #define getticks gethrtime
00103 
00104 INLINE_ELAPSED(inline)
00105 
00106 #define HAVE_TICK_COUNTER
00107 #endif
00108 
00109 /*----------------------------------------------------------------*/
00110 /* AIX v. 4+ routines to read the real-time clock or time-base register */
00111 #if defined(HAVE_READ_REAL_TIME) && defined(HAVE_TIME_BASE_TO_TIME) && !defined(HAVE_TICK_COUNTER)
00112 typedef timebasestruct_t ticks;
00113 
00114 static __inline ticks getticks(void)
00115 {
00116      ticks t;
00117      read_real_time(&t, TIMEBASE_SZ);
00118      return t;
00119 }
00120 
00121 static __inline double elapsed(ticks t1, ticks t0) /* time in nanoseconds */
00122 {
00123      time_base_to_time(&t1, TIMEBASE_SZ);
00124      time_base_to_time(&t0, TIMEBASE_SZ);
00125      return (((double)t1.tb_high - (double)t0.tb_high) * 1.0e9 +
00126          ((double)t1.tb_low - (double)t0.tb_low));
00127 }
00128 
00129 #define HAVE_TICK_COUNTER
00130 #endif
00131 
00132 /*----------------------------------------------------------------*/
00133 /*
00134  * PowerPC ``cycle'' counter using the time base register.
00135  */
00136 #if ((((defined(__GNUC__) && (defined(__powerpc__) || defined(__ppc__))) || (defined(__MWERKS__) && defined(macintosh)))) || (defined(__IBM_GCC_ASM) && (defined(__powerpc__) || defined(__ppc__))))  && !defined(HAVE_TICK_COUNTER)
00137 typedef unsigned long long ticks;
00138 
00139 static __inline__ ticks getticks(void)
00140 {
00141      unsigned int tbl, tbu0, tbu1;
00142 
00143      do {
00144       __asm__ __volatile__ ("mftbu %0" : "=r"(tbu0));
00145       __asm__ __volatile__ ("mftb %0" : "=r"(tbl));
00146       __asm__ __volatile__ ("mftbu %0" : "=r"(tbu1));
00147      } while (tbu0 != tbu1);
00148 
00149      return (((unsigned long long)tbu0) << 32) | tbl;
00150 }
00151 
00152 INLINE_ELAPSED(__inline__)
00153 
00154 #define HAVE_TICK_COUNTER
00155 #endif
00156 
00157 /* MacOS/Mach (Darwin) time-base register interface (unlike UpTime,
00158    from Carbon, requires no additional libraries to be linked). */
00159 #if defined(HAVE_MACH_ABSOLUTE_TIME) && defined(HAVE_MACH_MACH_TIME_H) && !defined(HAVE_TICK_COUNTER)
00160 #include <mach/mach_time.h>
00161 typedef uint64_t ticks;
00162 #define getticks mach_absolute_time
00163 INLINE_ELAPSED(__inline__)
00164 #define HAVE_TICK_COUNTER
00165 #endif
00166 
00167 /*----------------------------------------------------------------*/
00168 /*
00169  * Pentium cycle counter
00170  */
00171 #if (defined(__GNUC__) || defined(__ICC)) && defined(__i386__)  && !defined(HAVE_TICK_COUNTER)
00172 typedef unsigned long long ticks;
00173 
00174 static __inline__ ticks getticks(void)
00175 {
00176      ticks ret;
00177 
00178      __asm__ __volatile__("rdtsc": "=A" (ret));
00179      /* no input, nothing else clobbered */
00180      return ret;
00181 }
00182 
00183 INLINE_ELAPSED(__inline__)
00184 
00185 #define HAVE_TICK_COUNTER
00186 #define TIME_MIN 5000.0   /* unreliable pentium IV cycle counter */
00187 #endif
00188 
00189 /* Visual C++ -- thanks to Morten Nissov for his help with this */
00190 #if defined(_MSC_VER) && _MSC_VER >= 1200 && _M_IX86 >= 500 && !defined(HAVE_TICK_COUNTER)
00191 #include <windows.h>
00192 typedef LARGE_INTEGER ticks;
00193 #define RDTSC __asm __emit 0fh __asm __emit 031h /* hack for VC++ 5.0 */
00194 
00195 static __inline ticks getticks(void)
00196 {
00197      ticks retval;
00198 
00199      __asm {
00200       RDTSC
00201       mov retval.HighPart, edx
00202       mov retval.LowPart, eax
00203      }
00204      return retval;
00205 }
00206 
00207 static __inline double elapsed(ticks t1, ticks t0)
00208 {
00209      return (double)t1.QuadPart - (double)t0.QuadPart;
00210 }
00211 
00212 #define HAVE_TICK_COUNTER
00213 #define TIME_MIN 5000.0   /* unreliable pentium IV cycle counter */
00214 #endif
00215 
00216 /*----------------------------------------------------------------*/
00217 /*
00218  * X86-64 cycle counter
00219  */
00220 #if (defined(__GNUC__) || defined(__ICC) || defined(__SUNPRO_C)) && defined(__x86_64__)  && !defined(HAVE_TICK_COUNTER)
00221 typedef unsigned long long ticks;
00222 
00223 static __inline__ ticks getticks(void)
00224 {
00225      unsigned a, d;
00226      asm volatile("rdtsc" : "=a" (a), "=d" (d));
00227      return ((ticks)a) | (((ticks)d) << 32);
00228 }
00229 
00230 INLINE_ELAPSED(__inline__)
00231 
00232 #define HAVE_TICK_COUNTER
00233 #define TIME_MIN 5000.0
00234 #endif
00235 
00236 /* PGI compiler, courtesy Cristiano Calonaci, Andrea Tarsi, & Roberto Gori.
00237    NOTE: this code will fail to link unless you use the -Masmkeyword compiler
00238    option (grrr). */
00239 #if defined(__PGI) && defined(__x86_64__) && !defined(HAVE_TICK_COUNTER)
00240 typedef unsigned long long ticks;
00241 static ticks getticks(void)
00242 {
00243     asm(" rdtsc; shl    $0x20,%rdx; mov    %eax,%eax; or     %rdx,%rax;    ");
00244 }
00245 INLINE_ELAPSED(__inline__)
00246 #define HAVE_TICK_COUNTER
00247 #define TIME_MIN 5000.0
00248 #endif
00249 
00250 /* Visual C++, courtesy of Dirk Michaelis */
00251 #if defined(_MSC_VER) && _MSC_VER >= 1400 && (defined(_M_AMD64) || defined(_M_X64)) && !defined(HAVE_TICK_COUNTER)
00252 
00253 #include <intrin.h>
00254 #pragma intrinsic(__rdtsc)
00255 typedef unsigned __int64 ticks;
00256 #define getticks __rdtsc
00257 INLINE_ELAPSED(__inline)
00258 
00259 #define HAVE_TICK_COUNTER
00260 #define TIME_MIN 5000.0
00261 #endif
00262 
00263 /*----------------------------------------------------------------*/
00264 /*
00265  * IA64 cycle counter
00266  */
00267 
00268 /* intel's icc/ecc compiler */
00269 #if (defined(__EDG_VERSION) || defined(__ECC)) && defined(__ia64__) && !defined(HAVE_TICK_COUNTER)
00270 typedef unsigned long ticks;
00271 #include <ia64intrin.h>
00272 
00273 static __inline__ ticks getticks(void)
00274 {
00275      return __getReg(_IA64_REG_AR_ITC);
00276 }
00277 
00278 INLINE_ELAPSED(__inline__)
00279 
00280 #define HAVE_TICK_COUNTER
00281 #endif
00282 
00283 /* gcc */
00284 #if defined(__GNUC__) && defined(__ia64__) && !defined(HAVE_TICK_COUNTER)
00285 typedef unsigned long ticks;
00286 
00287 static __inline__ ticks getticks(void)
00288 {
00289      ticks ret;
00290 
00291      __asm__ __volatile__ ("mov %0=ar.itc" : "=r"(ret));
00292      return ret;
00293 }
00294 
00295 INLINE_ELAPSED(__inline__)
00296 
00297 #define HAVE_TICK_COUNTER
00298 #endif
00299 
00300 /* HP/UX IA64 compiler, courtesy Teresa L. Johnson: */
00301 #if defined(__hpux) && defined(__ia64) && !defined(HAVE_TICK_COUNTER)
00302 #include <machine/sys/inline.h>
00303 typedef unsigned long ticks;
00304 
00305 static inline ticks getticks(void)
00306 {
00307      ticks ret;
00308 
00309      ret = _Asm_mov_from_ar (_AREG_ITC);
00310      return ret;
00311 }
00312 
00313 INLINE_ELAPSED(inline)
00314 
00315 #define HAVE_TICK_COUNTER
00316 #endif
00317 
00318 /* Microsoft Visual C++ */
00319 #if defined(_MSC_VER) && defined(_M_IA64) && !defined(HAVE_TICK_COUNTER)
00320 typedef unsigned __int64 ticks;
00321 
00322 #  ifdef __cplusplus
00323 extern "C"
00324 #  endif
00325 ticks __getReg(int whichReg);
00326 #pragma intrinsic(__getReg)
00327 
00328 static __inline ticks getticks(void)
00329 {
00330      volatile ticks temp;
00331      temp = __getReg(3116);
00332      return temp;
00333 }
00334 
00335 INLINE_ELAPSED(inline)
00336 
00337 #define HAVE_TICK_COUNTER
00338 #endif
00339 
00340 /*----------------------------------------------------------------*/
00341 /*
00342  * PA-RISC cycle counter
00343  */
00344 #if (defined(__hppa__) || defined(__hppa)) && !defined(HAVE_TICK_COUNTER)
00345 typedef unsigned long ticks;
00346 
00347 #  ifdef __GNUC__
00348 static __inline__ ticks getticks(void)
00349 {
00350      ticks ret;
00351 
00352      __asm__ __volatile__("mfctl 16, %0": "=r" (ret));
00353      /* no input, nothing else clobbered */
00354      return ret;
00355 }
00356 #  else
00357 #  include <machine/inline.h>
00358 static inline unsigned long getticks(void)
00359 {
00360      register ticks ret;
00361      _MFCTL(16, ret);
00362      return ret;
00363 }
00364 #  endif
00365 
00366 INLINE_ELAPSED(inline)
00367 
00368 #define HAVE_TICK_COUNTER
00369 #endif
00370 
00371 /*----------------------------------------------------------------*/
00372 /* S390, courtesy of James Treacy */
00373 #if defined(__GNUC__) && defined(__s390__) && !defined(HAVE_TICK_COUNTER)
00374 typedef unsigned long long ticks;
00375 
00376 static __inline__ ticks getticks(void)
00377 {
00378      ticks cycles;
00379      __asm__("stck 0(%0)" : : "a" (&(cycles)) : "memory", "cc");
00380      return cycles;
00381 }
00382 
00383 INLINE_ELAPSED(__inline__)
00384 
00385 #define HAVE_TICK_COUNTER
00386 #endif
00387 /*----------------------------------------------------------------*/
00388 #if defined(__GNUC__) && defined(__alpha__) && !defined(HAVE_TICK_COUNTER)
00389 /*
00390  * The 32-bit cycle counter on alpha overflows pretty quickly,
00391  * unfortunately.  A 1GHz machine overflows in 4 seconds.
00392  */
00393 typedef unsigned int ticks;
00394 
00395 static __inline__ ticks getticks(void)
00396 {
00397      unsigned long cc;
00398      __asm__ __volatile__ ("rpcc %0" : "=r"(cc));
00399      return (cc & 0xFFFFFFFF);
00400 }
00401 
00402 INLINE_ELAPSED(__inline__)
00403 
00404 #define HAVE_TICK_COUNTER
00405 #endif
00406 
00407 /*----------------------------------------------------------------*/
00408 #if defined(__GNUC__) && defined(__sparc_v9__) && !defined(HAVE_TICK_COUNTER)
00409 typedef unsigned long ticks;
00410 
00411 static __inline__ ticks getticks(void)
00412 {
00413      ticks ret;
00414      __asm__ __volatile__("rd %%tick, %0" : "=r" (ret));
00415      return ret;
00416 }
00417 
00418 INLINE_ELAPSED(__inline__)
00419 
00420 #define HAVE_TICK_COUNTER
00421 #endif
00422 
00423 /*----------------------------------------------------------------*/
00424 #if (defined(__DECC) || defined(__DECCXX)) && defined(__alpha) && defined(HAVE_C_ASM_H) && !defined(HAVE_TICK_COUNTER)
00425 #  include <c_asm.h>
00426 typedef unsigned int ticks;
00427 
00428 static __inline ticks getticks(void)
00429 {
00430      unsigned long cc;
00431      cc = asm("rpcc %v0");
00432      return (cc & 0xFFFFFFFF);
00433 }
00434 
00435 INLINE_ELAPSED(__inline)
00436 
00437 #define HAVE_TICK_COUNTER
00438 #endif
00439 /*----------------------------------------------------------------*/
00440 /* SGI/Irix */
00441 #if defined(HAVE_CLOCK_GETTIME) && defined(CLOCK_SGI_CYCLE) && !defined(HAVE_TICK_COUNTER)
00442 typedef struct timespec ticks;
00443 
00444 static inline ticks getticks(void)
00445 {
00446      struct timespec t;
00447      clock_gettime(CLOCK_SGI_CYCLE, &t);
00448      return t;
00449 }
00450 
00451 static inline double elapsed(ticks t1, ticks t0)
00452 {
00453      return ((double)t1.tv_sec - (double)t0.tv_sec) * 1.0E9 +
00454       ((double)t1.tv_nsec - (double)t0.tv_nsec);
00455 }
00456 #define HAVE_TICK_COUNTER
00457 #endif
00458 
00459 /*----------------------------------------------------------------*/
00460 /* Cray UNICOS _rtc() intrinsic function */
00461 #if defined(HAVE__RTC) && !defined(HAVE_TICK_COUNTER)
00462 #ifdef HAVE_INTRINSICS_H
00463 #  include <intrinsics.h>
00464 #endif
00465 
00466 typedef long long ticks;
00467 
00468 #define getticks _rtc
00469 
00470 INLINE_ELAPSED(inline)
00471 
00472 #define HAVE_TICK_COUNTER
00473 #endif
00474 
00475 /*----------------------------------------------------------------*/
00476 /* MIPS ZBus */
00477 #if defined(HAVE_MIPS_ZBUS_TIMER) && HAVE_MIPS_ZBUS_TIMER
00478 #if defined(__mips__) && !defined(HAVE_TICK_COUNTER)
00479 #include <sys/mman.h>
00480 #include <unistd.h>
00481 #include <fcntl.h>
00482 
00483 typedef uint64_t ticks;
00484 
00485 static inline ticks getticks(void)
00486 {
00487   static uint64_t* addr = 0;
00488 
00489   if (addr == 0)
00490   {
00491     uint32_t rq_addr = 0x10030000;
00492     int fd;
00493     int pgsize;
00494 
00495     pgsize = getpagesize();
00496     fd = open ("/dev/mem", O_RDONLY | O_SYNC, 0);
00497     if (fd < 0) {
00498       perror("open");
00499       return NULL;
00500     }
00501     addr = mmap(0, pgsize, PROT_READ, MAP_SHARED, fd, rq_addr);
00502     close(fd);
00503     if (addr == (uint64_t *)-1) {
00504       perror("mmap");
00505       return NULL;
00506     }
00507   }
00508 
00509   return *addr;
00510 }
00511 
00512 INLINE_ELAPSED(inline)
00513 
00514 #define HAVE_TICK_COUNTER
00515 #endif
00516 #endif /* HAVE_MIPS_ZBUS_TIMER */
00517