|
PAPI
5.3.0.0
|
00001 /* 00002 * File: linux-timer.c 00003 * 00004 */ 00005 00006 #include <time.h> 00007 #include <sys/syscall.h> 00008 00009 #include "papi.h" 00010 #include "papi_internal.h" 00011 #include "papi_vector.h" 00012 00013 #include <fcntl.h> 00014 #include <errno.h> 00015 #include <string.h> 00016 00017 #include <sys/time.h> 00018 00019 #include <fcntl.h> 00020 #include "linux-common.h" 00021 00022 #include <sys/time.h> 00023 #include <sys/resource.h> 00024 00025 #include <sys/times.h> 00026 00027 #ifdef __ia64__ 00028 #include "perfmon/pfmlib_itanium2.h" 00029 #include "perfmon/pfmlib_montecito.h" 00030 #endif 00031 00032 #if defined(HAVE_MMTIMER) 00033 #include <sys/mman.h> 00034 #include <linux/mmtimer.h> 00035 #include <sys/ioctl.h> 00036 #ifndef MMTIMER_FULLNAME 00037 #define MMTIMER_FULLNAME "/dev/mmtimer" 00038 #endif 00039 00040 static int mmdev_fd; 00041 static unsigned long mmdev_mask; 00042 static unsigned long mmdev_ratio; 00043 static volatile unsigned long *mmdev_timer_addr; 00044 00045 /* setup mmtimer */ 00046 int mmtimer_setup(void) { 00047 00048 unsigned long femtosecs_per_tick = 0; 00049 unsigned long freq = 0; 00050 int result; 00051 int offset; 00052 00053 SUBDBG( "MMTIMER Opening %s\n", MMTIMER_FULLNAME ); 00054 if ( ( mmdev_fd = open( MMTIMER_FULLNAME, O_RDONLY ) ) == -1 ) { 00055 PAPIERROR( "Failed to open MM timer %s", MMTIMER_FULLNAME ); 00056 return PAPI_ESYS; 00057 } 00058 SUBDBG( "MMTIMER checking if we can mmap" ); 00059 if ( ioctl( mmdev_fd, MMTIMER_MMAPAVAIL, 0 ) != 1 ) { 00060 PAPIERROR( "mmap of MM timer unavailable" ); 00061 return PAPI_ESYS; 00062 } 00063 SUBDBG( "MMTIMER setting close on EXEC flag\n" ); 00064 if ( fcntl( mmdev_fd, F_SETFD, FD_CLOEXEC ) == -1 ) { 00065 PAPIERROR( "Failed to fcntl(FD_CLOEXEC) on MM timer FD %d: %s", 00066 mmdev_fd, strerror( errno ) ); 00067 return PAPI_ESYS; 00068 } 00069 SUBDBG( "MMTIMER is on FD %d, getting offset\n", mmdev_fd ); 00070 if ( ( offset = ioctl( mmdev_fd, MMTIMER_GETOFFSET, 0 ) ) < 0 ) { 00071 PAPIERROR( "Failed to get offset of MM timer" ); 00072 return PAPI_ESYS; 00073 } 00074 SUBDBG( "MMTIMER has offset of %d, getting frequency\n", offset ); 00075 if ( ioctl( mmdev_fd, MMTIMER_GETFREQ, &freq ) == -1 ) { 00076 PAPIERROR( "Failed to get frequency of MM timer" ); 00077 return PAPI_ESYS; 00078 } 00079 SUBDBG( "MMTIMER has frequency %lu Mhz\n", freq / 1000000 ); 00080 // don't know for sure, but I think this ratio is inverted 00081 // mmdev_ratio = (freq/1000000) / (unsigned long)_papi_hwi_system_info.hw_info.mhz; 00082 mmdev_ratio = 00083 ( unsigned long ) _papi_hwi_system_info.hw_info.cpu_max_mhz / 00084 ( freq / 1000000 ); 00085 SUBDBG( "MMTIMER has a ratio of %ld to the CPU's clock, getting resolution\n", 00086 mmdev_ratio ); 00087 if ( ioctl( mmdev_fd, MMTIMER_GETRES, &femtosecs_per_tick ) == -1 ) { 00088 PAPIERROR( "Failed to get femtoseconds per tick" ); 00089 return PAPI_ESYS; 00090 } 00091 SUBDBG( "MMTIMER res is %lu femtosecs/tick (10^-15s) or %f Mhz, getting valid bits\n", 00092 femtosecs_per_tick, 1.0e9 / ( double ) femtosecs_per_tick ); 00093 if ( ( result = ioctl( mmdev_fd, MMTIMER_GETBITS, 0 ) ) == -ENOSYS ) { 00094 PAPIERROR( "Failed to get number of bits in MMTIMER" ); 00095 return PAPI_ESYS; 00096 } 00097 mmdev_mask = ~( 0xffffffffffffffff << result ); 00098 SUBDBG( "MMTIMER has %d valid bits, mask 0x%16lx, getting mmaped page\n", 00099 result, mmdev_mask ); 00100 if ( ( mmdev_timer_addr = 00101 ( unsigned long * ) mmap( 0, getpagesize( ), PROT_READ, 00102 MAP_PRIVATE, mmdev_fd, 00103 0 ) ) == NULL ) { 00104 PAPIERROR( "Failed to mmap MM timer" ); 00105 return PAPI_ESYS; 00106 } 00107 SUBDBG( "MMTIMER page is at %p, actual address is %p\n", 00108 mmdev_timer_addr, mmdev_timer_addr + offset ); 00109 mmdev_timer_addr += offset; 00110 /* mmdev_fd should be closed and page should be unmapped in a global shutdown routine */ 00111 return PAPI_OK; 00112 00113 } 00114 00115 #else 00116 int mmtimer_setup(void) { return PAPI_OK; } 00117 #endif 00118 00119 00120 00121 00122 00123 /* Hardware clock functions */ 00124 00125 /* All architectures should set HAVE_CYCLES in configure if they have these. 00126 Not all do so for now, we have to guard at the end of the statement, 00127 instead of the top. When all archs set this, this region will be guarded 00128 with: 00129 #if defined(HAVE_CYCLE) 00130 which is equivalent to 00131 #if !defined(HAVE_GETTIMEOFDAY) && !defined(HAVE_CLOCK_GETTIME) 00132 */ 00133 00134 /************************/ 00135 /* MMTIMER get_cycles() */ 00136 /************************/ 00137 00138 #if defined(HAVE_MMTIMER) 00139 00140 static inline long long 00141 get_cycles( void ) 00142 { 00143 long long tmp = 0; 00144 00145 tmp = *mmdev_timer_addr & mmdev_mask; 00146 SUBDBG("MMTIMER is %llu, scaled %llu\n",tmp,tmp*mmdev_ratio); 00147 tmp *= mmdev_ratio; 00148 00149 return tmp; 00150 } 00151 00152 /************************/ 00153 /* ia64 get_cycles() */ 00154 /************************/ 00155 00156 #elif defined(__ia64__) 00157 extern int _perfmon2_pfm_pmu_type; 00158 00159 static inline long long 00160 get_cycles( void ) 00161 { 00162 long long tmp = 0; 00163 #if defined(__INTEL_COMPILER) 00164 tmp = __getReg( _IA64_REG_AR_ITC ); 00165 #else 00166 __asm__ __volatile__( "mov %0=ar.itc":"=r"( tmp )::"memory" ); 00167 #endif 00168 switch ( _perfmon2_pfm_pmu_type ) { 00169 case PFMLIB_MONTECITO_PMU: 00170 tmp = tmp * 4; 00171 break; 00172 } 00173 return tmp; 00174 } 00175 00176 /************************/ 00177 /* x86 get_cycles() */ 00178 /************************/ 00179 00180 #elif (defined(__i386__)||defined(__x86_64__)) 00181 static inline long long 00182 get_cycles( void ) 00183 { 00184 long long ret = 0; 00185 #ifdef __x86_64__ 00186 do { 00187 unsigned int a, d; 00188 asm volatile ( "rdtsc":"=a" ( a ), "=d"( d ) ); 00189 ( ret ) = ( ( long long ) a ) | ( ( ( long long ) d ) << 32 ); 00190 } 00191 while ( 0 ); 00192 #else 00193 __asm__ __volatile__( "rdtsc":"=A"( ret ): ); 00194 #endif 00195 return ret; 00196 } 00197 00198 /************************/ 00199 /* SPARC get_cycles() */ 00200 /************************/ 00201 00202 /* #define get_cycles _rtc ?? */ 00203 #elif defined(__sparc__) 00204 static inline long long 00205 get_cycles( void ) 00206 { 00207 register unsigned long ret asm( "g1" ); 00208 00209 __asm__ __volatile__( ".word 0x83410000" /* rd %tick, %g1 */ 00210 :"=r"( ret ) ); 00211 return ret; 00212 } 00213 00214 /************************/ 00215 /* POWER get_cycles() */ 00216 /************************/ 00217 00218 #elif (defined(__powerpc__) || defined(__arm__) || defined(__mips__) || defined(__aarch64__)) 00219 /* 00220 * It's not possible to read the cycles from user space on ppc970. 00221 * There is a 64-bit time-base register (TBU|TBL), but its 00222 * update rate is implementation-specific and cannot easily be translated 00223 * into a cycle count. So don't implement get_cycles for now, 00224 * but instead, rely on the definition of HAVE_CLOCK_GETTIME_REALTIME in 00225 * _papi_hwd_get_real_usec() for the needed functionality. 00226 */ 00227 00228 static inline long long 00229 get_cycles( void ) 00230 { 00231 return 0; 00232 } 00233 00234 00235 #elif !defined(HAVE_GETTIMEOFDAY) && !defined(HAVE_CLOCK_GETTIME) 00236 #error "No get_cycles support for this architecture. " 00237 #endif 00238 00239 00240 00241 00242 00243 00244 long long 00245 _linux_get_real_cycles( void ) 00246 { 00247 long long retval; 00248 #if defined(HAVE_GETTIMEOFDAY)||defined(__powerpc__)||defined(__arm__)||defined(__mips__) 00249 00250 /* Crude estimate, not accurate in prescence of DVFS */ 00251 00252 retval = _papi_os_vector.get_real_usec( ) * 00253 ( long long ) _papi_hwi_system_info.hw_info.cpu_max_mhz; 00254 #else 00255 retval = get_cycles( ); 00256 #endif 00257 return retval; 00258 } 00259 00260 00261 00262 00263 /******************************************************************** 00264 * microsecond timers * 00265 ********************************************************************/ 00266 00267 00268 /******************************* 00269 * HAVE_CLOCK_GETTIME * 00270 *******************************/ 00271 00272 long long 00273 _linux_get_real_usec_gettime( void ) 00274 { 00275 00276 long long retval; 00277 00278 struct timespec foo; 00279 #ifdef HAVE_CLOCK_GETTIME_REALTIME_HR 00280 syscall( __NR_clock_gettime, CLOCK_REALTIME_HR, &foo ); 00281 #else 00282 syscall( __NR_clock_gettime, CLOCK_REALTIME, &foo ); 00283 #endif 00284 retval = ( long long ) foo.tv_sec * ( long long ) 1000000; 00285 retval += ( long long ) ( foo.tv_nsec / 1000 ); 00286 00287 return retval; 00288 } 00289 00290 /********************** 00291 * HAVE_GETTIMEOFDAY * 00292 **********************/ 00293 00294 long long 00295 _linux_get_real_usec_gettimeofday( void ) 00296 { 00297 00298 long long retval; 00299 00300 struct timeval buffer; 00301 gettimeofday( &buffer, NULL ); 00302 retval = ( long long ) buffer.tv_sec * ( long long ) 1000000; 00303 retval += ( long long ) ( buffer.tv_usec ); 00304 00305 return retval; 00306 } 00307 00308 00309 long long 00310 _linux_get_real_usec_cycles( void ) 00311 { 00312 00313 long long retval; 00314 00315 /* Not accurate in the prescence of DVFS */ 00316 00317 retval = get_cycles( ) / 00318 ( long long ) _papi_hwi_system_info.hw_info.cpu_max_mhz; 00319 00320 return retval; 00321 } 00322 00323 00324 00325 /******************************* 00326 * HAVE_PER_THREAD_GETRUSAGE * 00327 *******************************/ 00328 00329 long long 00330 _linux_get_virt_usec_rusage( void ) 00331 { 00332 00333 long long retval; 00334 00335 struct rusage buffer; 00336 00337 getrusage( RUSAGE_SELF, &buffer ); 00338 SUBDBG( "user %d system %d\n", ( int ) buffer.ru_utime.tv_sec, 00339 ( int ) buffer.ru_stime.tv_sec ); 00340 retval = ( long long ) ( buffer.ru_utime.tv_sec + buffer.ru_stime.tv_sec ) 00341 * ( long long ) 1000000; 00342 retval += (long long) ( buffer.ru_utime.tv_usec + buffer.ru_stime.tv_usec ); 00343 00344 return retval; 00345 } 00346 00347 /************************** 00348 * HAVE_PER_THREAD_TIMES * 00349 **************************/ 00350 00351 long long 00352 _linux_get_virt_usec_times( void ) 00353 { 00354 00355 long long retval; 00356 00357 struct tms buffer; 00358 00359 times( &buffer ); 00360 00361 SUBDBG( "user %d system %d\n", ( int ) buffer.tms_utime, 00362 ( int ) buffer.tms_stime ); 00363 retval = ( long long ) ( ( buffer.tms_utime + buffer.tms_stime ) * 00364 1000000 / sysconf( _SC_CLK_TCK )); 00365 00366 /* NOT CLOCKS_PER_SEC as in the headers! */ 00367 00368 return retval; 00369 } 00370 00371 /******************************/ 00372 /* HAVE_CLOCK_GETTIME_THREAD */ 00373 /******************************/ 00374 00375 long long 00376 _linux_get_virt_usec_gettime( void ) 00377 { 00378 00379 long long retval; 00380 00381 struct timespec foo; 00382 00383 syscall( __NR_clock_gettime, CLOCK_THREAD_CPUTIME_ID, &foo ); 00384 retval = ( long long ) foo.tv_sec * ( long long ) 1000000; 00385 retval += ( long long ) foo.tv_nsec / 1000; 00386 00387 return retval; 00388 } 00389 00390 /********************/ 00391 /* USE_PROC_PTTIMER */ 00392 /********************/ 00393 00394 long long 00395 _linux_get_virt_usec_pttimer( void ) 00396 { 00397 00398 long long retval; 00399 char buf[LINE_MAX]; 00400 long long utime, stime; 00401 int rv, cnt = 0, i = 0; 00402 int stat_fd; 00403 00404 00405 again: 00406 sprintf( buf, "/proc/%d/task/%d/stat", getpid( ), mygettid( ) ); 00407 stat_fd = open( buf, O_RDONLY ); 00408 if ( stat_fd == -1 ) { 00409 PAPIERROR( "open(%s)", buf ); 00410 return PAPI_ESYS; 00411 } 00412 00413 rv = read( stat_fd, buf, LINE_MAX * sizeof ( char ) ); 00414 if ( rv == -1 ) { 00415 if ( errno == EBADF ) { 00416 close(stat_fd); 00417 goto again; 00418 } 00419 PAPIERROR( "read()" ); 00420 close(stat_fd); 00421 return PAPI_ESYS; 00422 } 00423 lseek( stat_fd, 0, SEEK_SET ); 00424 00425 if (rv == LINE_MAX) rv--; 00426 buf[rv] = '\0'; 00427 SUBDBG( "Thread stat file is:%s\n", buf ); 00428 while ( ( cnt != 13 ) && ( i < rv ) ) { 00429 if ( buf[i] == ' ' ) { 00430 cnt++; 00431 } 00432 i++; 00433 } 00434 00435 if ( cnt != 13 ) { 00436 PAPIERROR( "utime and stime not in thread stat file?" ); 00437 close(stat_fd); 00438 return PAPI_ESYS; 00439 } 00440 00441 if ( sscanf( buf + i, "%llu %llu", &utime, &stime ) != 2 ) { 00442 close(stat_fd); 00443 PAPIERROR("Unable to scan two items from thread stat file at 13th space?"); 00444 return PAPI_ESYS; 00445 } 00446 00447 retval = ( utime + stime ) * ( long long ) 1000000 /_papi_os_info.clock_ticks; 00448 00449 close(stat_fd); 00450 00451 return retval; 00452 } 00453 00454 00455 /******************************************************************** 00456 * nanosecond timers * 00457 ********************************************************************/ 00458 00459 00460 00461 /******************************* 00462 * HAVE_CLOCK_GETTIME * 00463 *******************************/ 00464 00465 long long 00466 _linux_get_real_nsec_gettime( void ) 00467 { 00468 00469 long long retval; 00470 00471 struct timespec foo; 00472 #ifdef HAVE_CLOCK_GETTIME_REALTIME_HR 00473 syscall( __NR_clock_gettime, CLOCK_REALTIME_HR, &foo ); 00474 #else 00475 syscall( __NR_clock_gettime, CLOCK_REALTIME, &foo ); 00476 #endif 00477 retval = ( long long ) foo.tv_sec * ( long long ) 1000000000; 00478 retval += ( long long ) ( foo.tv_nsec ); 00479 00480 return retval; 00481 } 00482 00483 00484 /******************************/ 00485 /* HAVE_CLOCK_GETTIME_THREAD */ 00486 /******************************/ 00487 00488 long long 00489 _linux_get_virt_nsec_gettime( void ) 00490 { 00491 00492 long long retval; 00493 00494 struct timespec foo; 00495 00496 syscall( __NR_clock_gettime, CLOCK_THREAD_CPUTIME_ID, &foo ); 00497 retval = ( long long ) foo.tv_sec * ( long long ) 1000000000; 00498 retval += ( long long ) foo.tv_nsec ; 00499 00500 return retval; 00501 } 00502 00503 00504 00505