|
PAPI
5.0.1.0
|
00001 /* 00002 * File: linux-timer.c 00003 * 00004 */ 00005 00006 #include <time.h> 00007 #include <sys/syscall.h> 00008 00009 #include "papi.h" 00010 #include "papi_internal.h" 00011 #include "papi_vector.h" 00012 00013 #include <fcntl.h> 00014 #include <errno.h> 00015 #include <string.h> 00016 00017 #include <sys/time.h> 00018 00019 #include <fcntl.h> 00020 #include "linux-common.h" 00021 00022 #include <sys/time.h> 00023 #include <sys/resource.h> 00024 00025 #include <sys/times.h> 00026 00027 00028 00029 00030 #if defined(HAVE_MMTIMER) 00031 #include <sys/mman.h> 00032 #include <linux/mmtimer.h> 00033 #include <sys/ioctl.h> 00034 #ifndef MMTIMER_FULLNAME 00035 #define MMTIMER_FULLNAME "/dev/mmtimer" 00036 #endif 00037 00038 static int mmdev_fd; 00039 static unsigned long mmdev_mask; 00040 static unsigned long mmdev_ratio; 00041 static volatile unsigned long *mmdev_timer_addr; 00042 00043 /* setup mmtimer */ 00044 int mmtimer_setup(void) { 00045 00046 unsigned long femtosecs_per_tick = 0; 00047 unsigned long freq = 0; 00048 int result; 00049 int offset; 00050 00051 SUBDBG( "MMTIMER Opening %s\n", MMTIMER_FULLNAME ); 00052 if ( ( mmdev_fd = open( MMTIMER_FULLNAME, O_RDONLY ) ) == -1 ) { 00053 PAPIERROR( "Failed to open MM timer %s", MMTIMER_FULLNAME ); 00054 return PAPI_ESYS; 00055 } 00056 SUBDBG( "MMTIMER checking if we can mmap" ); 00057 if ( ioctl( mmdev_fd, MMTIMER_MMAPAVAIL, 0 ) != 1 ) { 00058 PAPIERROR( "mmap of MM timer unavailable" ); 00059 return PAPI_ESYS; 00060 } 00061 SUBDBG( "MMTIMER setting close on EXEC flag\n" ); 00062 if ( fcntl( mmdev_fd, F_SETFD, FD_CLOEXEC ) == -1 ) { 00063 PAPIERROR( "Failed to fcntl(FD_CLOEXEC) on MM timer FD %d: %s", 00064 mmdev_fd, strerror( errno ) ); 00065 return PAPI_ESYS; 00066 } 00067 SUBDBG( "MMTIMER is on FD %d, getting offset\n", mmdev_fd ); 00068 if ( ( offset = ioctl( mmdev_fd, MMTIMER_GETOFFSET, 0 ) ) < 0 ) { 00069 PAPIERROR( "Failed to get offset of MM timer" ); 00070 return PAPI_ESYS; 00071 } 00072 SUBDBG( "MMTIMER has offset of %d, getting frequency\n", offset ); 00073 if ( ioctl( mmdev_fd, MMTIMER_GETFREQ, &freq ) == -1 ) { 00074 PAPIERROR( "Failed to get frequency of MM timer" ); 00075 return PAPI_ESYS; 00076 } 00077 SUBDBG( "MMTIMER has frequency %lu Mhz\n", freq / 1000000 ); 00078 // don't know for sure, but I think this ratio is inverted 00079 // mmdev_ratio = (freq/1000000) / (unsigned long)_papi_hwi_system_info.hw_info.mhz; 00080 mmdev_ratio = 00081 ( unsigned long ) _papi_hwi_system_info.hw_info.cpu_max_mhz / 00082 ( freq / 1000000 ); 00083 SUBDBG( "MMTIMER has a ratio of %ld to the CPU's clock, getting resolution\n", 00084 mmdev_ratio ); 00085 if ( ioctl( mmdev_fd, MMTIMER_GETRES, &femtosecs_per_tick ) == -1 ) { 00086 PAPIERROR( "Failed to get femtoseconds per tick" ); 00087 return PAPI_ESYS; 00088 } 00089 SUBDBG( "MMTIMER res is %lu femtosecs/tick (10^-15s) or %f Mhz, getting valid bits\n", 00090 femtosecs_per_tick, 1.0e9 / ( double ) femtosecs_per_tick ); 00091 if ( ( result = ioctl( mmdev_fd, MMTIMER_GETBITS, 0 ) ) == -ENOSYS ) { 00092 PAPIERROR( "Failed to get number of bits in MMTIMER" ); 00093 return PAPI_ESYS; 00094 } 00095 mmdev_mask = ~( 0xffffffffffffffff << result ); 00096 SUBDBG( "MMTIMER has %d valid bits, mask 0x%16lx, getting mmaped page\n", 00097 result, mmdev_mask ); 00098 if ( ( mmdev_timer_addr = 00099 ( unsigned long * ) mmap( 0, getpagesize( ), PROT_READ, 00100 MAP_PRIVATE, mmdev_fd, 00101 0 ) ) == NULL ) { 00102 PAPIERROR( "Failed to mmap MM timer" ); 00103 return PAPI_ESYS; 00104 } 00105 SUBDBG( "MMTIMER page is at %p, actual address is %p\n", 00106 mmdev_timer_addr, mmdev_timer_addr + offset ); 00107 mmdev_timer_addr += offset; 00108 /* mmdev_fd should be closed and page should be unmapped in a global shutdown routine */ 00109 return PAPI_OK; 00110 00111 } 00112 00113 #else 00114 int mmtimer_setup(void) { return PAPI_OK; } 00115 #endif 00116 00117 00118 00119 00120 00121 /* Hardware clock functions */ 00122 00123 /* All architectures should set HAVE_CYCLES in configure if they have these. 00124 Not all do so for now, we have to guard at the end of the statement, 00125 instead of the top. When all archs set this, this region will be guarded 00126 with: 00127 #if defined(HAVE_CYCLE) 00128 which is equivalent to 00129 #if !defined(HAVE_GETTIMEOFDAY) && !defined(HAVE_CLOCK_GETTIME) 00130 */ 00131 00132 /************************/ 00133 /* MMTIMER get_cycles() */ 00134 /************************/ 00135 00136 #if defined(HAVE_MMTIMER) 00137 00138 static inline long long 00139 get_cycles( void ) 00140 { 00141 long long tmp = 0; 00142 00143 tmp = *mmdev_timer_addr & mmdev_mask; 00144 SUBDBG("MMTIMER is %llu, scaled %llu\n",tmp,tmp*mmdev_ratio); 00145 tmp *= mmdev_ratio; 00146 00147 return tmp; 00148 } 00149 00150 /************************/ 00151 /* ia64 get_cycles() */ 00152 /************************/ 00153 00154 #elif defined(__ia64__) 00155 extern int _perfmon2_pfm_pmu_type; 00156 00157 static inline long long 00158 get_cycles( void ) 00159 { 00160 long long tmp = 0; 00161 #if defined(__INTEL_COMPILER) 00162 tmp = __getReg( _IA64_REG_AR_ITC ); 00163 #else 00164 __asm__ __volatile__( "mov %0=ar.itc":"=r"( tmp )::"memory" ); 00165 #endif 00166 switch ( _perfmon2_pfm_pmu_type ) { 00167 case PFMLIB_MONTECITO_PMU: 00168 tmp = tmp * 4; 00169 break; 00170 } 00171 return tmp; 00172 } 00173 00174 /************************/ 00175 /* x86 get_cycles() */ 00176 /************************/ 00177 00178 #elif (defined(__i386__)||defined(__x86_64__)) 00179 static inline long long 00180 get_cycles( void ) 00181 { 00182 long long ret = 0; 00183 #ifdef __x86_64__ 00184 do { 00185 unsigned int a, d; 00186 asm volatile ( "rdtsc":"=a" ( a ), "=d"( d ) ); 00187 ( ret ) = ( ( long long ) a ) | ( ( ( long long ) d ) << 32 ); 00188 } 00189 while ( 0 ); 00190 #else 00191 __asm__ __volatile__( "rdtsc":"=A"( ret ): ); 00192 #endif 00193 return ret; 00194 } 00195 00196 /************************/ 00197 /* SPARC get_cycles() */ 00198 /************************/ 00199 00200 /* #define get_cycles _rtc ?? */ 00201 #elif defined(__sparc__) 00202 static inline long long 00203 get_cycles( void ) 00204 { 00205 register unsigned long ret asm( "g1" ); 00206 00207 __asm__ __volatile__( ".word 0x83410000" /* rd %tick, %g1 */ 00208 :"=r"( ret ) ); 00209 return ret; 00210 } 00211 00212 /************************/ 00213 /* POWER get_cycles() */ 00214 /************************/ 00215 00216 #elif (defined(__powerpc__) || defined(__arm__) || defined(__mips__)) 00217 /* 00218 * It's not possible to read the cycles from user space on ppc970. 00219 * There is a 64-bit time-base register (TBU|TBL), but its 00220 * update rate is implementation-specific and cannot easily be translated 00221 * into a cycle count. So don't implement get_cycles for now, 00222 * but instead, rely on the definition of HAVE_CLOCK_GETTIME_REALTIME in 00223 * _papi_hwd_get_real_usec() for the needed functionality. 00224 */ 00225 00226 static inline long long 00227 get_cycles( void ) 00228 { 00229 return 0; 00230 } 00231 00232 00233 #elif !defined(HAVE_GETTIMEOFDAY) && !defined(HAVE_CLOCK_GETTIME) 00234 #error "No get_cycles support for this architecture. " 00235 #endif 00236 00237 00238 00239 00240 00241 00242 long long 00243 _linux_get_real_cycles( void ) 00244 { 00245 long long retval; 00246 #if defined(HAVE_GETTIMEOFDAY)||defined(__powerpc__)||defined(__arm__)||defined(__mips__) 00247 00248 /* Crude estimate, not accurate in prescence of DVFS */ 00249 00250 retval = _papi_os_vector.get_real_usec( ) * 00251 ( long long ) _papi_hwi_system_info.hw_info.cpu_max_mhz; 00252 #else 00253 retval = get_cycles( ); 00254 #endif 00255 return retval; 00256 } 00257 00258 00259 00260 00261 /******************************************************************** 00262 * microsecond timers * 00263 ********************************************************************/ 00264 00265 00266 /******************************* 00267 * HAVE_CLOCK_GETTIME * 00268 *******************************/ 00269 00270 long long 00271 _linux_get_real_usec_gettime( void ) 00272 { 00273 00274 long long retval; 00275 00276 struct timespec foo; 00277 #ifdef HAVE_CLOCK_GETTIME_REALTIME_HR 00278 syscall( __NR_clock_gettime, CLOCK_REALTIME_HR, &foo ); 00279 #else 00280 syscall( __NR_clock_gettime, CLOCK_REALTIME, &foo ); 00281 #endif 00282 retval = ( long long ) foo.tv_sec * ( long long ) 1000000; 00283 retval += ( long long ) ( foo.tv_nsec / 1000 ); 00284 00285 return retval; 00286 } 00287 00288 /********************** 00289 * HAVE_GETTIMEOFDAY * 00290 **********************/ 00291 00292 long long 00293 _linux_get_real_usec_gettimeofday( void ) 00294 { 00295 00296 long long retval; 00297 00298 struct timeval buffer; 00299 gettimeofday( &buffer, NULL ); 00300 retval = ( long long ) buffer.tv_sec * ( long long ) 1000000; 00301 retval += ( long long ) ( buffer.tv_usec ); 00302 00303 return retval; 00304 } 00305 00306 00307 long long 00308 _linux_get_real_usec_cycles( void ) 00309 { 00310 00311 long long retval; 00312 00313 /* Not accurate in the prescence of DVFS */ 00314 00315 retval = get_cycles( ) / 00316 ( long long ) _papi_hwi_system_info.hw_info.cpu_max_mhz; 00317 00318 return retval; 00319 } 00320 00321 00322 00323 /******************************* 00324 * HAVE_PER_THREAD_GETRUSAGE * 00325 *******************************/ 00326 00327 long long 00328 _linux_get_virt_usec_rusage( void ) 00329 { 00330 00331 long long retval; 00332 00333 struct rusage buffer; 00334 00335 getrusage( RUSAGE_SELF, &buffer ); 00336 SUBDBG( "user %d system %d\n", ( int ) buffer.ru_utime.tv_sec, 00337 ( int ) buffer.ru_stime.tv_sec ); 00338 retval = ( long long ) ( buffer.ru_utime.tv_sec + buffer.ru_stime.tv_sec ) 00339 * ( long long ) 1000000; 00340 retval += (long long) ( buffer.ru_utime.tv_usec + buffer.ru_stime.tv_usec ); 00341 00342 return retval; 00343 } 00344 00345 /************************** 00346 * HAVE_PER_THREAD_TIMES * 00347 **************************/ 00348 00349 long long 00350 _linux_get_virt_usec_times( void ) 00351 { 00352 00353 long long retval; 00354 00355 struct tms buffer; 00356 00357 times( &buffer ); 00358 00359 SUBDBG( "user %d system %d\n", ( int ) buffer.tms_utime, 00360 ( int ) buffer.tms_stime ); 00361 retval = ( long long ) ( ( buffer.tms_utime + buffer.tms_stime ) * 00362 1000000 / sysconf( _SC_CLK_TCK )); 00363 00364 /* NOT CLOCKS_PER_SEC as in the headers! */ 00365 00366 return retval; 00367 } 00368 00369 /******************************/ 00370 /* HAVE_CLOCK_GETTIME_THREAD */ 00371 /******************************/ 00372 00373 long long 00374 _linux_get_virt_usec_gettime( void ) 00375 { 00376 00377 long long retval; 00378 00379 struct timespec foo; 00380 00381 syscall( __NR_clock_gettime, CLOCK_THREAD_CPUTIME_ID, &foo ); 00382 retval = ( long long ) foo.tv_sec * ( long long ) 1000000; 00383 retval += ( long long ) foo.tv_nsec / 1000; 00384 00385 return retval; 00386 } 00387 00388 /********************/ 00389 /* USE_PROC_PTTIMER */ 00390 /********************/ 00391 00392 long long 00393 _linux_get_virt_usec_pttimer( void ) 00394 { 00395 00396 long long retval; 00397 char buf[LINE_MAX]; 00398 long long utime, stime; 00399 int rv, cnt = 0, i = 0; 00400 int stat_fd; 00401 00402 00403 again: 00404 sprintf( buf, "/proc/%d/task/%d/stat", getpid( ), mygettid( ) ); 00405 stat_fd = open( buf, O_RDONLY ); 00406 if ( stat_fd == -1 ) { 00407 PAPIERROR( "open(%s)", buf ); 00408 return PAPI_ESYS; 00409 } 00410 00411 rv = read( stat_fd, buf, LINE_MAX * sizeof ( char ) ); 00412 if ( rv == -1 ) { 00413 if ( errno == EBADF ) { 00414 close(stat_fd); 00415 goto again; 00416 } 00417 PAPIERROR( "read()" ); 00418 close(stat_fd); 00419 return PAPI_ESYS; 00420 } 00421 lseek( stat_fd, 0, SEEK_SET ); 00422 00423 buf[rv] = '\0'; 00424 SUBDBG( "Thread stat file is:%s\n", buf ); 00425 while ( ( cnt != 13 ) && ( i < rv ) ) { 00426 if ( buf[i] == ' ' ) { 00427 cnt++; 00428 } 00429 i++; 00430 } 00431 00432 if ( cnt != 13 ) { 00433 PAPIERROR( "utime and stime not in thread stat file?" ); 00434 close(stat_fd); 00435 return PAPI_ESYS; 00436 } 00437 00438 if ( sscanf( buf + i, "%llu %llu", &utime, &stime ) != 2 ) { 00439 close(stat_fd); 00440 PAPIERROR("Unable to scan two items from thread stat file at 13th space?"); 00441 return PAPI_ESYS; 00442 } 00443 00444 retval = ( utime + stime ) * ( long long ) 1000000 /_papi_os_info.clock_ticks; 00445 00446 close(stat_fd); 00447 00448 return retval; 00449 } 00450 00451 00452 /******************************************************************** 00453 * nanosecond timers * 00454 ********************************************************************/ 00455 00456 00457 00458 /******************************* 00459 * HAVE_CLOCK_GETTIME * 00460 *******************************/ 00461 00462 long long 00463 _linux_get_real_nsec_gettime( void ) 00464 { 00465 00466 long long retval; 00467 00468 struct timespec foo; 00469 #ifdef HAVE_CLOCK_GETTIME_REALTIME_HR 00470 syscall( __NR_clock_gettime, CLOCK_REALTIME_HR, &foo ); 00471 #else 00472 syscall( __NR_clock_gettime, CLOCK_REALTIME, &foo ); 00473 #endif 00474 retval = ( long long ) foo.tv_sec * ( long long ) 1000000000; 00475 retval += ( long long ) ( foo.tv_nsec ); 00476 00477 return retval; 00478 } 00479 00480 00481 /******************************/ 00482 /* HAVE_CLOCK_GETTIME_THREAD */ 00483 /******************************/ 00484 00485 long long 00486 _linux_get_virt_nsec_gettime( void ) 00487 { 00488 00489 long long retval; 00490 00491 struct timespec foo; 00492 00493 syscall( __NR_clock_gettime, CLOCK_THREAD_CPUTIME_ID, &foo ); 00494 retval = ( long long ) foo.tv_sec * ( long long ) 1000000000; 00495 retval += ( long long ) foo.tv_nsec ; 00496 00497 return retval; 00498 } 00499 00500 00501 00502