PAPI  5.3.0.0
linux-timer.c
Go to the documentation of this file.
00001 /*
00002  * File:    linux-timer.c
00003  *
00004  */
00005 
00006 #include <time.h>
00007 #include <sys/syscall.h>
00008 
00009 #include "papi.h"
00010 #include "papi_internal.h"
00011 #include "papi_vector.h"
00012 
00013 #include <fcntl.h>
00014 #include <errno.h>
00015 #include <string.h>
00016 
00017 #include <sys/time.h>
00018 
00019 #include <fcntl.h>
00020 #include "linux-common.h"
00021 
00022 #include <sys/time.h>
00023 #include <sys/resource.h>
00024 
00025 #include <sys/times.h>
00026 
00027 #ifdef __ia64__
00028 #include "perfmon/pfmlib_itanium2.h"
00029 #include "perfmon/pfmlib_montecito.h"
00030 #endif
00031 
00032 #if defined(HAVE_MMTIMER)
00033 #include <sys/mman.h>
00034 #include <linux/mmtimer.h>
00035 #include <sys/ioctl.h>
00036 #ifndef MMTIMER_FULLNAME
00037 #define MMTIMER_FULLNAME "/dev/mmtimer"
00038 #endif
00039 
00040 static int mmdev_fd;
00041 static unsigned long mmdev_mask;
00042 static unsigned long mmdev_ratio;
00043 static volatile unsigned long *mmdev_timer_addr;
00044 
00045         /* setup mmtimer */
00046 int mmtimer_setup(void) {
00047 
00048       unsigned long femtosecs_per_tick = 0;
00049       unsigned long freq = 0;
00050       int result;
00051       int offset;
00052 
00053       SUBDBG( "MMTIMER Opening %s\n", MMTIMER_FULLNAME );
00054       if ( ( mmdev_fd = open( MMTIMER_FULLNAME, O_RDONLY ) ) == -1 ) {
00055         PAPIERROR( "Failed to open MM timer %s", MMTIMER_FULLNAME );
00056         return PAPI_ESYS;
00057       }
00058       SUBDBG( "MMTIMER checking if we can mmap" );
00059       if ( ioctl( mmdev_fd, MMTIMER_MMAPAVAIL, 0 ) != 1 ) {
00060         PAPIERROR( "mmap of MM timer unavailable" );
00061         return PAPI_ESYS;
00062       }
00063       SUBDBG( "MMTIMER setting close on EXEC flag\n" );
00064       if ( fcntl( mmdev_fd, F_SETFD, FD_CLOEXEC ) == -1 ) {
00065         PAPIERROR( "Failed to fcntl(FD_CLOEXEC) on MM timer FD %d: %s",
00066                mmdev_fd, strerror( errno ) );
00067         return PAPI_ESYS;
00068       }
00069       SUBDBG( "MMTIMER is on FD %d, getting offset\n", mmdev_fd );
00070       if ( ( offset = ioctl( mmdev_fd, MMTIMER_GETOFFSET, 0 ) ) < 0 ) {
00071         PAPIERROR( "Failed to get offset of MM timer" );
00072         return PAPI_ESYS;
00073       }
00074       SUBDBG( "MMTIMER has offset of %d, getting frequency\n", offset );
00075       if ( ioctl( mmdev_fd, MMTIMER_GETFREQ, &freq ) == -1 ) {
00076         PAPIERROR( "Failed to get frequency of MM timer" );
00077         return PAPI_ESYS;
00078       }
00079       SUBDBG( "MMTIMER has frequency %lu Mhz\n", freq / 1000000 );
00080       // don't know for sure, but I think this ratio is inverted
00081       //     mmdev_ratio = (freq/1000000) / (unsigned long)_papi_hwi_system_info.hw_info.mhz;
00082           mmdev_ratio =
00083           ( unsigned long ) _papi_hwi_system_info.hw_info.cpu_max_mhz / 
00084                           ( freq / 1000000 );
00085           SUBDBG( "MMTIMER has a ratio of %ld to the CPU's clock, getting resolution\n",
00086             mmdev_ratio );
00087           if ( ioctl( mmdev_fd, MMTIMER_GETRES, &femtosecs_per_tick ) == -1 ) {
00088           PAPIERROR( "Failed to get femtoseconds per tick" );
00089           return PAPI_ESYS;
00090           }
00091           SUBDBG( "MMTIMER res is %lu femtosecs/tick (10^-15s) or %f Mhz, getting valid bits\n",
00092       femtosecs_per_tick, 1.0e9 / ( double ) femtosecs_per_tick );
00093           if ( ( result = ioctl( mmdev_fd, MMTIMER_GETBITS, 0 ) ) == -ENOSYS ) {
00094          PAPIERROR( "Failed to get number of bits in MMTIMER" );
00095          return PAPI_ESYS;
00096           }
00097           mmdev_mask = ~( 0xffffffffffffffff << result );
00098           SUBDBG( "MMTIMER has %d valid bits, mask 0x%16lx, getting mmaped page\n",
00099             result, mmdev_mask );
00100           if ( ( mmdev_timer_addr =
00101                ( unsigned long * ) mmap( 0, getpagesize(  ), PROT_READ,
00102                          MAP_PRIVATE, mmdev_fd,
00103                          0 ) ) == NULL ) {
00104          PAPIERROR( "Failed to mmap MM timer" );
00105          return PAPI_ESYS;
00106           }
00107           SUBDBG( "MMTIMER page is at %p, actual address is %p\n",
00108             mmdev_timer_addr, mmdev_timer_addr + offset );
00109           mmdev_timer_addr += offset;
00110           /* mmdev_fd should be closed and page should be unmapped in a global shutdown routine */
00111       return PAPI_OK;
00112 
00113 }
00114 
00115 #else
00116 int mmtimer_setup(void) { return PAPI_OK; }
00117 #endif
00118 
00119 
00120 
00121 
00122 
00123 /* Hardware clock functions */
00124 
00125 /* All architectures should set HAVE_CYCLES in configure if they have these. 
00126    Not all do so for now, we have to guard at the end of the statement, 
00127    instead of the top. When all archs set this, this region will be guarded 
00128    with:
00129      #if defined(HAVE_CYCLE)
00130    which is equivalent to
00131      #if !defined(HAVE_GETTIMEOFDAY) && !defined(HAVE_CLOCK_GETTIME)
00132 */
00133 
00134 /************************/
00135 /* MMTIMER get_cycles() */
00136 /************************/
00137 
00138 #if defined(HAVE_MMTIMER)
00139 
00140 static inline long long
00141 get_cycles( void )
00142 {
00143     long long tmp = 0;
00144 
00145         tmp = *mmdev_timer_addr & mmdev_mask;
00146     SUBDBG("MMTIMER is %llu, scaled %llu\n",tmp,tmp*mmdev_ratio);
00147         tmp *= mmdev_ratio;
00148 
00149     return tmp;
00150 }
00151 
00152 /************************/
00153 /* ia64 get_cycles()    */
00154 /************************/
00155 
00156 #elif defined(__ia64__)
00157 extern int _perfmon2_pfm_pmu_type;
00158 
00159 static inline long long
00160 get_cycles( void )
00161 {
00162     long long tmp = 0;
00163 #if defined(__INTEL_COMPILER)
00164     tmp = __getReg( _IA64_REG_AR_ITC );
00165 #else
00166     __asm__ __volatile__( "mov %0=ar.itc":"=r"( tmp )::"memory" );
00167 #endif
00168     switch ( _perfmon2_pfm_pmu_type ) {
00169     case PFMLIB_MONTECITO_PMU:
00170         tmp = tmp * 4;
00171         break;
00172     }
00173     return tmp;
00174 }
00175 
00176 /************************/
00177 /* x86 get_cycles()     */
00178 /************************/
00179 
00180 #elif (defined(__i386__)||defined(__x86_64__))
00181 static inline long long
00182 get_cycles( void )
00183 {
00184     long long ret = 0;
00185 #ifdef __x86_64__
00186     do {
00187         unsigned int a, d;
00188         asm volatile ( "rdtsc":"=a" ( a ), "=d"( d ) );
00189         ( ret ) = ( ( long long ) a ) | ( ( ( long long ) d ) << 32 );
00190     }
00191     while ( 0 );
00192 #else
00193     __asm__ __volatile__( "rdtsc":"=A"( ret ): );
00194 #endif
00195     return ret;
00196 }
00197 
00198 /************************/
00199 /* SPARC get_cycles()   */
00200 /************************/
00201 
00202 /* #define get_cycles _rtc ?? */
00203 #elif defined(__sparc__)
00204 static inline long long
00205 get_cycles( void )
00206 {
00207     register unsigned long ret asm( "g1" );
00208 
00209     __asm__ __volatile__( ".word 0x83410000"    /* rd %tick, %g1 */
00210                           :"=r"( ret ) );
00211     return ret;
00212 }
00213 
00214 /************************/
00215 /* POWER get_cycles()   */
00216 /************************/
00217 
00218 #elif (defined(__powerpc__) || defined(__arm__) || defined(__mips__) || defined(__aarch64__))
00219 /*
00220  * It's not possible to read the cycles from user space on ppc970.
00221  * There is a 64-bit time-base register (TBU|TBL), but its
00222  * update rate is implementation-specific and cannot easily be translated
00223  * into a cycle count.  So don't implement get_cycles for now,
00224  * but instead, rely on the definition of HAVE_CLOCK_GETTIME_REALTIME in
00225  * _papi_hwd_get_real_usec() for the needed functionality.
00226 */
00227 
00228 static inline long long
00229 get_cycles( void )
00230 {
00231   return 0;
00232 }
00233 
00234 
00235 #elif !defined(HAVE_GETTIMEOFDAY) && !defined(HAVE_CLOCK_GETTIME)
00236 #error "No get_cycles support for this architecture. "
00237 #endif
00238 
00239 
00240 
00241 
00242 
00243 
00244 long long
00245 _linux_get_real_cycles( void )
00246 {
00247     long long retval;
00248 #if defined(HAVE_GETTIMEOFDAY)||defined(__powerpc__)||defined(__arm__)||defined(__mips__)
00249 
00250     /* Crude estimate, not accurate in prescence of DVFS */
00251 
00252     retval = _papi_os_vector.get_real_usec(  ) *
00253         ( long long ) _papi_hwi_system_info.hw_info.cpu_max_mhz;
00254 #else
00255     retval = get_cycles(  );
00256 #endif
00257     return retval;
00258 }
00259 
00260 
00261 
00262 
00263 /********************************************************************
00264  * microsecond timers                                               *
00265  ********************************************************************/
00266 
00267 
00268 /*******************************
00269  * HAVE_CLOCK_GETTIME          *
00270  *******************************/
00271 
00272 long long
00273 _linux_get_real_usec_gettime( void )
00274 {
00275     
00276    long long retval;
00277 
00278    struct timespec foo;
00279 #ifdef HAVE_CLOCK_GETTIME_REALTIME_HR
00280    syscall( __NR_clock_gettime, CLOCK_REALTIME_HR, &foo );
00281 #else
00282    syscall( __NR_clock_gettime, CLOCK_REALTIME, &foo );
00283 #endif
00284    retval = ( long long ) foo.tv_sec * ( long long ) 1000000;
00285    retval += ( long long ) ( foo.tv_nsec / 1000 );
00286 
00287    return retval;
00288 }
00289 
00290 /**********************
00291  * HAVE_GETTIMEOFDAY  *
00292  **********************/
00293 
00294 long long
00295 _linux_get_real_usec_gettimeofday( void )
00296 {
00297     
00298    long long retval;
00299 
00300    struct timeval buffer;
00301    gettimeofday( &buffer, NULL );
00302    retval = ( long long ) buffer.tv_sec * ( long long ) 1000000;
00303    retval += ( long long ) ( buffer.tv_usec );
00304     
00305    return retval;
00306 }
00307 
00308 
00309 long long
00310 _linux_get_real_usec_cycles( void )
00311 {
00312     
00313    long long retval;
00314 
00315    /* Not accurate in the prescence of DVFS */
00316 
00317    retval = get_cycles(  ) / 
00318             ( long long ) _papi_hwi_system_info.hw_info.cpu_max_mhz;
00319 
00320    return retval;
00321 }
00322 
00323 
00324 
00325 /******************************* 
00326  * HAVE_PER_THREAD_GETRUSAGE   *
00327  *******************************/
00328 
00329 long long
00330 _linux_get_virt_usec_rusage( void )
00331 {
00332 
00333     long long retval;
00334 
00335     struct rusage buffer;
00336 
00337     getrusage( RUSAGE_SELF, &buffer );
00338     SUBDBG( "user %d system %d\n", ( int ) buffer.ru_utime.tv_sec,
00339                 ( int ) buffer.ru_stime.tv_sec );
00340     retval = ( long long ) ( buffer.ru_utime.tv_sec + buffer.ru_stime.tv_sec )
00341              * ( long long ) 1000000;
00342     retval += (long long) ( buffer.ru_utime.tv_usec + buffer.ru_stime.tv_usec );
00343 
00344     return retval;
00345 }
00346 
00347 /**************************
00348  * HAVE_PER_THREAD_TIMES  *
00349  **************************/
00350 
00351 long long
00352 _linux_get_virt_usec_times( void )
00353 {
00354 
00355    long long retval;
00356 
00357    struct tms buffer;
00358 
00359    times( &buffer );
00360 
00361    SUBDBG( "user %d system %d\n", ( int ) buffer.tms_utime,
00362                 ( int ) buffer.tms_stime );
00363    retval = ( long long ) ( ( buffer.tms_utime + buffer.tms_stime ) * 
00364                 1000000 / sysconf( _SC_CLK_TCK ));
00365 
00366    /* NOT CLOCKS_PER_SEC as in the headers! */
00367     
00368    return retval;
00369 }
00370 
00371 /******************************/
00372 /* HAVE_CLOCK_GETTIME_THREAD  */
00373 /******************************/
00374 
00375 long long
00376 _linux_get_virt_usec_gettime( void )
00377 {
00378 
00379     long long retval;
00380 
00381     struct timespec foo;
00382 
00383     syscall( __NR_clock_gettime, CLOCK_THREAD_CPUTIME_ID, &foo );
00384     retval = ( long long ) foo.tv_sec * ( long long ) 1000000;
00385     retval += ( long long ) foo.tv_nsec / 1000;
00386     
00387     return retval;
00388 }
00389 
00390 /********************/
00391 /* USE_PROC_PTTIMER */
00392 /********************/
00393 
00394 long long
00395 _linux_get_virt_usec_pttimer( void )
00396 {
00397 
00398    long long retval;
00399    char buf[LINE_MAX];
00400    long long utime, stime;
00401    int rv, cnt = 0, i = 0;
00402    int stat_fd;
00403 
00404 
00405 again:
00406    sprintf( buf, "/proc/%d/task/%d/stat", getpid(  ), mygettid(  ) );
00407    stat_fd = open( buf, O_RDONLY );
00408    if ( stat_fd == -1 ) {
00409       PAPIERROR( "open(%s)", buf );
00410       return PAPI_ESYS;
00411    }
00412 
00413    rv = read( stat_fd, buf, LINE_MAX * sizeof ( char ) );
00414    if ( rv == -1 ) {
00415       if ( errno == EBADF ) {
00416      close(stat_fd);     
00417      goto again;
00418       }
00419       PAPIERROR( "read()" );
00420       close(stat_fd);
00421       return PAPI_ESYS;
00422    }
00423    lseek( stat_fd, 0, SEEK_SET );
00424 
00425    if (rv == LINE_MAX) rv--;
00426    buf[rv] = '\0';
00427    SUBDBG( "Thread stat file is:%s\n", buf );
00428    while ( ( cnt != 13 ) && ( i < rv ) ) {
00429       if ( buf[i] == ' ' ) {
00430      cnt++;
00431       }
00432       i++;
00433    }
00434 
00435    if ( cnt != 13 ) {
00436       PAPIERROR( "utime and stime not in thread stat file?" );
00437       close(stat_fd);
00438       return PAPI_ESYS;
00439    }
00440 
00441    if ( sscanf( buf + i, "%llu %llu", &utime, &stime ) != 2 ) {
00442       close(stat_fd);
00443       PAPIERROR("Unable to scan two items from thread stat file at 13th space?");
00444       return PAPI_ESYS;
00445    }
00446 
00447    retval = ( utime + stime ) * ( long long ) 1000000 /_papi_os_info.clock_ticks;
00448 
00449    close(stat_fd);
00450 
00451    return retval;
00452 }
00453 
00454 
00455 /********************************************************************
00456  * nanosecond timers                                                *
00457  ********************************************************************/
00458 
00459 
00460 
00461 /*******************************
00462  * HAVE_CLOCK_GETTIME          *
00463  *******************************/
00464 
00465 long long
00466 _linux_get_real_nsec_gettime( void )
00467 {
00468     
00469    long long retval;
00470 
00471    struct timespec foo;
00472 #ifdef HAVE_CLOCK_GETTIME_REALTIME_HR
00473    syscall( __NR_clock_gettime, CLOCK_REALTIME_HR, &foo );
00474 #else
00475    syscall( __NR_clock_gettime, CLOCK_REALTIME, &foo );
00476 #endif
00477    retval = ( long long ) foo.tv_sec * ( long long ) 1000000000;
00478    retval += ( long long ) ( foo.tv_nsec );
00479 
00480    return retval;
00481 }
00482 
00483 
00484 /******************************/
00485 /* HAVE_CLOCK_GETTIME_THREAD  */
00486 /******************************/
00487 
00488 long long
00489 _linux_get_virt_nsec_gettime( void )
00490 {
00491 
00492     long long retval;
00493 
00494     struct timespec foo;
00495 
00496     syscall( __NR_clock_gettime, CLOCK_THREAD_CPUTIME_ID, &foo );
00497     retval = ( long long ) foo.tv_sec * ( long long ) 1000000000;
00498     retval += ( long long ) foo.tv_nsec ;
00499     
00500     return retval;
00501 }
00502 
00503 
00504 
00505 
 All Data Structures Files Functions Variables Typedefs Enumerations Enumerator Defines