PAPI  5.0.1.0
linux-timer.c
Go to the documentation of this file.
00001 /*
00002  * File:    linux-timer.c
00003  *
00004  */
00005 
00006 #include <time.h>
00007 #include <sys/syscall.h>
00008 
00009 #include "papi.h"
00010 #include "papi_internal.h"
00011 #include "papi_vector.h"
00012 
00013 #include <fcntl.h>
00014 #include <errno.h>
00015 #include <string.h>
00016 
00017 #include <sys/time.h>
00018 
00019 #include <fcntl.h>
00020 #include "linux-common.h"
00021 
00022 #include <sys/time.h>
00023 #include <sys/resource.h>
00024 
00025 #include <sys/times.h>
00026 
00027 
00028 
00029 
00030 #if defined(HAVE_MMTIMER)
00031 #include <sys/mman.h>
00032 #include <linux/mmtimer.h>
00033 #include <sys/ioctl.h>
00034 #ifndef MMTIMER_FULLNAME
00035 #define MMTIMER_FULLNAME "/dev/mmtimer"
00036 #endif
00037 
00038 static int mmdev_fd;
00039 static unsigned long mmdev_mask;
00040 static unsigned long mmdev_ratio;
00041 static volatile unsigned long *mmdev_timer_addr;
00042 
00043         /* setup mmtimer */
00044 int mmtimer_setup(void) {
00045 
00046       unsigned long femtosecs_per_tick = 0;
00047       unsigned long freq = 0;
00048       int result;
00049       int offset;
00050 
00051       SUBDBG( "MMTIMER Opening %s\n", MMTIMER_FULLNAME );
00052       if ( ( mmdev_fd = open( MMTIMER_FULLNAME, O_RDONLY ) ) == -1 ) {
00053         PAPIERROR( "Failed to open MM timer %s", MMTIMER_FULLNAME );
00054         return PAPI_ESYS;
00055       }
00056       SUBDBG( "MMTIMER checking if we can mmap" );
00057       if ( ioctl( mmdev_fd, MMTIMER_MMAPAVAIL, 0 ) != 1 ) {
00058         PAPIERROR( "mmap of MM timer unavailable" );
00059         return PAPI_ESYS;
00060       }
00061       SUBDBG( "MMTIMER setting close on EXEC flag\n" );
00062       if ( fcntl( mmdev_fd, F_SETFD, FD_CLOEXEC ) == -1 ) {
00063         PAPIERROR( "Failed to fcntl(FD_CLOEXEC) on MM timer FD %d: %s",
00064                mmdev_fd, strerror( errno ) );
00065         return PAPI_ESYS;
00066       }
00067       SUBDBG( "MMTIMER is on FD %d, getting offset\n", mmdev_fd );
00068       if ( ( offset = ioctl( mmdev_fd, MMTIMER_GETOFFSET, 0 ) ) < 0 ) {
00069         PAPIERROR( "Failed to get offset of MM timer" );
00070         return PAPI_ESYS;
00071       }
00072       SUBDBG( "MMTIMER has offset of %d, getting frequency\n", offset );
00073       if ( ioctl( mmdev_fd, MMTIMER_GETFREQ, &freq ) == -1 ) {
00074         PAPIERROR( "Failed to get frequency of MM timer" );
00075         return PAPI_ESYS;
00076       }
00077       SUBDBG( "MMTIMER has frequency %lu Mhz\n", freq / 1000000 );
00078       // don't know for sure, but I think this ratio is inverted
00079       //     mmdev_ratio = (freq/1000000) / (unsigned long)_papi_hwi_system_info.hw_info.mhz;
00080           mmdev_ratio =
00081           ( unsigned long ) _papi_hwi_system_info.hw_info.cpu_max_mhz / 
00082                           ( freq / 1000000 );
00083           SUBDBG( "MMTIMER has a ratio of %ld to the CPU's clock, getting resolution\n",
00084             mmdev_ratio );
00085           if ( ioctl( mmdev_fd, MMTIMER_GETRES, &femtosecs_per_tick ) == -1 ) {
00086           PAPIERROR( "Failed to get femtoseconds per tick" );
00087           return PAPI_ESYS;
00088           }
00089           SUBDBG( "MMTIMER res is %lu femtosecs/tick (10^-15s) or %f Mhz, getting valid bits\n",
00090       femtosecs_per_tick, 1.0e9 / ( double ) femtosecs_per_tick );
00091           if ( ( result = ioctl( mmdev_fd, MMTIMER_GETBITS, 0 ) ) == -ENOSYS ) {
00092          PAPIERROR( "Failed to get number of bits in MMTIMER" );
00093          return PAPI_ESYS;
00094           }
00095           mmdev_mask = ~( 0xffffffffffffffff << result );
00096           SUBDBG( "MMTIMER has %d valid bits, mask 0x%16lx, getting mmaped page\n",
00097             result, mmdev_mask );
00098           if ( ( mmdev_timer_addr =
00099                ( unsigned long * ) mmap( 0, getpagesize(  ), PROT_READ,
00100                          MAP_PRIVATE, mmdev_fd,
00101                          0 ) ) == NULL ) {
00102          PAPIERROR( "Failed to mmap MM timer" );
00103          return PAPI_ESYS;
00104           }
00105           SUBDBG( "MMTIMER page is at %p, actual address is %p\n",
00106             mmdev_timer_addr, mmdev_timer_addr + offset );
00107           mmdev_timer_addr += offset;
00108           /* mmdev_fd should be closed and page should be unmapped in a global shutdown routine */
00109       return PAPI_OK;
00110 
00111 }
00112 
00113 #else
00114 int mmtimer_setup(void) { return PAPI_OK; }
00115 #endif
00116 
00117 
00118 
00119 
00120 
00121 /* Hardware clock functions */
00122 
00123 /* All architectures should set HAVE_CYCLES in configure if they have these. 
00124    Not all do so for now, we have to guard at the end of the statement, 
00125    instead of the top. When all archs set this, this region will be guarded 
00126    with:
00127      #if defined(HAVE_CYCLE)
00128    which is equivalent to
00129      #if !defined(HAVE_GETTIMEOFDAY) && !defined(HAVE_CLOCK_GETTIME)
00130 */
00131 
00132 /************************/
00133 /* MMTIMER get_cycles() */
00134 /************************/
00135 
00136 #if defined(HAVE_MMTIMER)
00137 
00138 static inline long long
00139 get_cycles( void )
00140 {
00141     long long tmp = 0;
00142 
00143         tmp = *mmdev_timer_addr & mmdev_mask;
00144     SUBDBG("MMTIMER is %llu, scaled %llu\n",tmp,tmp*mmdev_ratio);
00145         tmp *= mmdev_ratio;
00146 
00147     return tmp;
00148 }
00149 
00150 /************************/
00151 /* ia64 get_cycles()    */
00152 /************************/
00153 
00154 #elif defined(__ia64__)
00155 extern int _perfmon2_pfm_pmu_type;
00156 
00157 static inline long long
00158 get_cycles( void )
00159 {
00160     long long tmp = 0;
00161 #if defined(__INTEL_COMPILER)
00162     tmp = __getReg( _IA64_REG_AR_ITC );
00163 #else
00164     __asm__ __volatile__( "mov %0=ar.itc":"=r"( tmp )::"memory" );
00165 #endif
00166     switch ( _perfmon2_pfm_pmu_type ) {
00167     case PFMLIB_MONTECITO_PMU:
00168         tmp = tmp * 4;
00169         break;
00170     }
00171     return tmp;
00172 }
00173 
00174 /************************/
00175 /* x86 get_cycles()     */
00176 /************************/
00177 
00178 #elif (defined(__i386__)||defined(__x86_64__))
00179 static inline long long
00180 get_cycles( void )
00181 {
00182     long long ret = 0;
00183 #ifdef __x86_64__
00184     do {
00185         unsigned int a, d;
00186         asm volatile ( "rdtsc":"=a" ( a ), "=d"( d ) );
00187         ( ret ) = ( ( long long ) a ) | ( ( ( long long ) d ) << 32 );
00188     }
00189     while ( 0 );
00190 #else
00191     __asm__ __volatile__( "rdtsc":"=A"( ret ): );
00192 #endif
00193     return ret;
00194 }
00195 
00196 /************************/
00197 /* SPARC get_cycles()   */
00198 /************************/
00199 
00200 /* #define get_cycles _rtc ?? */
00201 #elif defined(__sparc__)
00202 static inline long long
00203 get_cycles( void )
00204 {
00205     register unsigned long ret asm( "g1" );
00206 
00207     __asm__ __volatile__( ".word 0x83410000"    /* rd %tick, %g1 */
00208                           :"=r"( ret ) );
00209     return ret;
00210 }
00211 
00212 /************************/
00213 /* POWER get_cycles()   */
00214 /************************/
00215 
00216 #elif (defined(__powerpc__) || defined(__arm__) || defined(__mips__))
00217 /*
00218  * It's not possible to read the cycles from user space on ppc970.
00219  * There is a 64-bit time-base register (TBU|TBL), but its
00220  * update rate is implementation-specific and cannot easily be translated
00221  * into a cycle count.  So don't implement get_cycles for now,
00222  * but instead, rely on the definition of HAVE_CLOCK_GETTIME_REALTIME in
00223  * _papi_hwd_get_real_usec() for the needed functionality.
00224 */
00225 
00226 static inline long long
00227 get_cycles( void )
00228 {
00229   return 0;
00230 }
00231 
00232 
00233 #elif !defined(HAVE_GETTIMEOFDAY) && !defined(HAVE_CLOCK_GETTIME)
00234 #error "No get_cycles support for this architecture. "
00235 #endif
00236 
00237 
00238 
00239 
00240 
00241 
00242 long long
00243 _linux_get_real_cycles( void )
00244 {
00245     long long retval;
00246 #if defined(HAVE_GETTIMEOFDAY)||defined(__powerpc__)||defined(__arm__)||defined(__mips__)
00247 
00248     /* Crude estimate, not accurate in prescence of DVFS */
00249 
00250     retval = _papi_os_vector.get_real_usec(  ) *
00251         ( long long ) _papi_hwi_system_info.hw_info.cpu_max_mhz;
00252 #else
00253     retval = get_cycles(  );
00254 #endif
00255     return retval;
00256 }
00257 
00258 
00259 
00260 
00261 /********************************************************************
00262  * microsecond timers                                               *
00263  ********************************************************************/
00264 
00265 
00266 /*******************************
00267  * HAVE_CLOCK_GETTIME          *
00268  *******************************/
00269 
00270 long long
00271 _linux_get_real_usec_gettime( void )
00272 {
00273     
00274    long long retval;
00275 
00276    struct timespec foo;
00277 #ifdef HAVE_CLOCK_GETTIME_REALTIME_HR
00278    syscall( __NR_clock_gettime, CLOCK_REALTIME_HR, &foo );
00279 #else
00280    syscall( __NR_clock_gettime, CLOCK_REALTIME, &foo );
00281 #endif
00282    retval = ( long long ) foo.tv_sec * ( long long ) 1000000;
00283    retval += ( long long ) ( foo.tv_nsec / 1000 );
00284 
00285    return retval;
00286 }
00287 
00288 /**********************
00289  * HAVE_GETTIMEOFDAY  *
00290  **********************/
00291 
00292 long long
00293 _linux_get_real_usec_gettimeofday( void )
00294 {
00295     
00296    long long retval;
00297 
00298    struct timeval buffer;
00299    gettimeofday( &buffer, NULL );
00300    retval = ( long long ) buffer.tv_sec * ( long long ) 1000000;
00301    retval += ( long long ) ( buffer.tv_usec );
00302     
00303    return retval;
00304 }
00305 
00306 
00307 long long
00308 _linux_get_real_usec_cycles( void )
00309 {
00310     
00311    long long retval;
00312 
00313    /* Not accurate in the prescence of DVFS */
00314 
00315    retval = get_cycles(  ) / 
00316             ( long long ) _papi_hwi_system_info.hw_info.cpu_max_mhz;
00317 
00318    return retval;
00319 }
00320 
00321 
00322 
00323 /******************************* 
00324  * HAVE_PER_THREAD_GETRUSAGE   *
00325  *******************************/
00326 
00327 long long
00328 _linux_get_virt_usec_rusage( void )
00329 {
00330 
00331     long long retval;
00332 
00333     struct rusage buffer;
00334 
00335     getrusage( RUSAGE_SELF, &buffer );
00336     SUBDBG( "user %d system %d\n", ( int ) buffer.ru_utime.tv_sec,
00337                 ( int ) buffer.ru_stime.tv_sec );
00338     retval = ( long long ) ( buffer.ru_utime.tv_sec + buffer.ru_stime.tv_sec )
00339              * ( long long ) 1000000;
00340     retval += (long long) ( buffer.ru_utime.tv_usec + buffer.ru_stime.tv_usec );
00341 
00342     return retval;
00343 }
00344 
00345 /**************************
00346  * HAVE_PER_THREAD_TIMES  *
00347  **************************/
00348 
00349 long long
00350 _linux_get_virt_usec_times( void )
00351 {
00352 
00353    long long retval;
00354 
00355    struct tms buffer;
00356 
00357    times( &buffer );
00358 
00359    SUBDBG( "user %d system %d\n", ( int ) buffer.tms_utime,
00360                 ( int ) buffer.tms_stime );
00361    retval = ( long long ) ( ( buffer.tms_utime + buffer.tms_stime ) * 
00362                 1000000 / sysconf( _SC_CLK_TCK ));
00363 
00364    /* NOT CLOCKS_PER_SEC as in the headers! */
00365     
00366    return retval;
00367 }
00368 
00369 /******************************/
00370 /* HAVE_CLOCK_GETTIME_THREAD  */
00371 /******************************/
00372 
00373 long long
00374 _linux_get_virt_usec_gettime( void )
00375 {
00376 
00377     long long retval;
00378 
00379     struct timespec foo;
00380 
00381     syscall( __NR_clock_gettime, CLOCK_THREAD_CPUTIME_ID, &foo );
00382     retval = ( long long ) foo.tv_sec * ( long long ) 1000000;
00383     retval += ( long long ) foo.tv_nsec / 1000;
00384     
00385     return retval;
00386 }
00387 
00388 /********************/
00389 /* USE_PROC_PTTIMER */
00390 /********************/
00391 
00392 long long
00393 _linux_get_virt_usec_pttimer( void )
00394 {
00395 
00396    long long retval;
00397    char buf[LINE_MAX];
00398    long long utime, stime;
00399    int rv, cnt = 0, i = 0;
00400    int stat_fd;
00401 
00402 
00403 again:
00404    sprintf( buf, "/proc/%d/task/%d/stat", getpid(  ), mygettid(  ) );
00405    stat_fd = open( buf, O_RDONLY );
00406    if ( stat_fd == -1 ) {
00407       PAPIERROR( "open(%s)", buf );
00408       return PAPI_ESYS;
00409    }
00410 
00411    rv = read( stat_fd, buf, LINE_MAX * sizeof ( char ) );
00412    if ( rv == -1 ) {
00413       if ( errno == EBADF ) {
00414      close(stat_fd);     
00415      goto again;
00416       }
00417       PAPIERROR( "read()" );
00418       close(stat_fd);
00419       return PAPI_ESYS;
00420    }
00421    lseek( stat_fd, 0, SEEK_SET );
00422 
00423    buf[rv] = '\0';
00424    SUBDBG( "Thread stat file is:%s\n", buf );
00425    while ( ( cnt != 13 ) && ( i < rv ) ) {
00426       if ( buf[i] == ' ' ) {
00427      cnt++;
00428       }
00429       i++;
00430    }
00431 
00432    if ( cnt != 13 ) {
00433       PAPIERROR( "utime and stime not in thread stat file?" );
00434       close(stat_fd);
00435       return PAPI_ESYS;
00436    }
00437 
00438    if ( sscanf( buf + i, "%llu %llu", &utime, &stime ) != 2 ) {
00439       close(stat_fd);
00440       PAPIERROR("Unable to scan two items from thread stat file at 13th space?");
00441       return PAPI_ESYS;
00442    }
00443 
00444    retval = ( utime + stime ) * ( long long ) 1000000 /_papi_os_info.clock_ticks;
00445 
00446    close(stat_fd);
00447 
00448    return retval;
00449 }
00450 
00451 
00452 /********************************************************************
00453  * nanosecond timers                                                *
00454  ********************************************************************/
00455 
00456 
00457 
00458 /*******************************
00459  * HAVE_CLOCK_GETTIME          *
00460  *******************************/
00461 
00462 long long
00463 _linux_get_real_nsec_gettime( void )
00464 {
00465     
00466    long long retval;
00467 
00468    struct timespec foo;
00469 #ifdef HAVE_CLOCK_GETTIME_REALTIME_HR
00470    syscall( __NR_clock_gettime, CLOCK_REALTIME_HR, &foo );
00471 #else
00472    syscall( __NR_clock_gettime, CLOCK_REALTIME, &foo );
00473 #endif
00474    retval = ( long long ) foo.tv_sec * ( long long ) 1000000000;
00475    retval += ( long long ) ( foo.tv_nsec );
00476 
00477    return retval;
00478 }
00479 
00480 
00481 /******************************/
00482 /* HAVE_CLOCK_GETTIME_THREAD  */
00483 /******************************/
00484 
00485 long long
00486 _linux_get_virt_nsec_gettime( void )
00487 {
00488 
00489     long long retval;
00490 
00491     struct timespec foo;
00492 
00493     syscall( __NR_clock_gettime, CLOCK_THREAD_CPUTIME_ID, &foo );
00494     retval = ( long long ) foo.tv_sec * ( long long ) 1000000000;
00495     retval += ( long long ) foo.tv_nsec ;
00496     
00497     return retval;
00498 }
00499 
00500 
00501 
00502 
 All Data Structures Files Functions Variables Typedefs Enumerations Enumerator Defines