|
PAPI
5.0.1.0
|
00001 /* 00002 * File: perfctr.c 00003 * Author: Philip Mucci 00004 * mucci at cs.utk.edu 00005 * Mods: Kevin London 00006 * london at cs.utk.edu 00007 * Mods: Maynard Johnson 00008 * maynardj at us.ibm.com 00009 * Mods: Brian Sheely 00010 * bsheely at eecs.utk.edu 00011 */ 00012 00013 #include <string.h> 00014 #include <linux/unistd.h> 00015 #include <errno.h> 00016 #include <sys/time.h> 00017 00018 #include "papi.h" 00019 #include "papi_internal.h" 00020 00021 #ifdef PPC64 00022 #include "perfctr-ppc64.h" 00023 #else 00024 #include "perfctr-x86.h" 00025 #endif 00026 00027 #include "papi_vector.h" 00028 00029 #include "papi_memory.h" 00030 #include "extras.h" 00031 00032 #include "linux-common.h" 00033 #include "linux-context.h" 00034 00035 extern papi_vector_t _perfctr_vector; 00036 00037 #ifdef PPC64 00038 extern int setup_ppc64_presets( int cputype ); 00039 #else 00040 extern int setup_x86_presets( int cputype ); 00041 #endif 00042 00043 /* This should be in a linux.h header file maybe. */ 00044 #define FOPEN_ERROR "fopen(%s) returned NULL" 00045 00046 #if defined(PERFCTR26) 00047 #define PERFCTR_CPU_NAME(pi) perfctr_info_cpu_name(pi) 00048 #define PERFCTR_CPU_NRCTRS(pi) perfctr_info_nrctrs(pi) 00049 #else 00050 #define PERFCTR_CPU_NAME perfctr_cpu_name 00051 #define PERFCTR_CPU_NRCTRS perfctr_cpu_nrctrs 00052 #endif 00053 00054 #if !defined(PPC64) 00055 static inline int 00056 xlate_cpu_type_to_vendor( unsigned perfctr_cpu_type ) 00057 { 00058 switch ( perfctr_cpu_type ) { 00059 case PERFCTR_X86_INTEL_P5: 00060 case PERFCTR_X86_INTEL_P5MMX: 00061 case PERFCTR_X86_INTEL_P6: 00062 case PERFCTR_X86_INTEL_PII: 00063 case PERFCTR_X86_INTEL_PIII: 00064 case PERFCTR_X86_INTEL_P4: 00065 case PERFCTR_X86_INTEL_P4M2: 00066 #ifdef PERFCTR_X86_INTEL_P4M3 00067 case PERFCTR_X86_INTEL_P4M3: 00068 #endif 00069 #ifdef PERFCTR_X86_INTEL_PENTM 00070 case PERFCTR_X86_INTEL_PENTM: 00071 #endif 00072 #ifdef PERFCTR_X86_INTEL_CORE 00073 case PERFCTR_X86_INTEL_CORE: 00074 #endif 00075 #ifdef PERFCTR_X86_INTEL_CORE2 00076 case PERFCTR_X86_INTEL_CORE2: 00077 #endif 00078 #ifdef PERFCTR_X86_INTEL_ATOM /* family 6 model 28 */ 00079 case PERFCTR_X86_INTEL_ATOM: 00080 #endif 00081 #ifdef PERFCTR_X86_INTEL_NHLM /* family 6 model 26 */ 00082 case PERFCTR_X86_INTEL_NHLM: 00083 #endif 00084 #ifdef PERFCTR_X86_INTEL_WSTMR 00085 case PERFCTR_X86_INTEL_WSTMR: 00086 #endif 00087 return ( PAPI_VENDOR_INTEL ); 00088 #ifdef PERFCTR_X86_AMD_K8 00089 case PERFCTR_X86_AMD_K8: 00090 #endif 00091 #ifdef PERFCTR_X86_AMD_K8C 00092 case PERFCTR_X86_AMD_K8C: 00093 #endif 00094 #ifdef PERFCTR_X86_AMD_FAM10 /* this is defined in perfctr 2.6.29 */ 00095 case PERFCTR_X86_AMD_FAM10: 00096 #endif 00097 case PERFCTR_X86_AMD_K7: 00098 return ( PAPI_VENDOR_AMD ); 00099 default: 00100 return ( PAPI_VENDOR_UNKNOWN ); 00101 } 00102 } 00103 #endif 00104 00105 long long tb_scale_factor = ( long long ) 1; /* needed to scale get_cycles on PPC series */ 00106 00107 int 00108 _perfctr_init_component( int cidx ) 00109 { 00110 int retval; 00111 struct perfctr_info info; 00112 char abiv[PAPI_MIN_STR_LEN]; 00113 00114 #if defined(PERFCTR26) 00115 int fd; 00116 #else 00117 struct vperfctr *dev; 00118 #endif 00119 00120 #if defined(PERFCTR26) 00121 /* Get info from the kernel */ 00122 /* Use lower level calls per Mikael to get the perfctr info 00123 without actually creating a new kernel-side state. 00124 Also, close the fd immediately after retrieving the info. 00125 This is much lighter weight and doesn't reserve the counter 00126 resources. Also compatible with perfctr 2.6.14. 00127 */ 00128 fd = _vperfctr_open( 0 ); 00129 if ( fd < 0 ) { 00130 strncpy(_perfctr_vector.cmp_info.disabled_reason, 00131 VOPEN_ERROR,PAPI_MAX_STR_LEN); 00132 return PAPI_ESYS; 00133 } 00134 retval = perfctr_info( fd, &info ); 00135 close( fd ); 00136 if ( retval < 0 ) { 00137 strncpy(_perfctr_vector.cmp_info.disabled_reason, 00138 VINFO_ERROR,PAPI_MAX_STR_LEN); 00139 return PAPI_ESYS; 00140 } 00141 00142 /* copy tsc multiplier to local variable */ 00143 /* this field appears in perfctr 2.6 and higher */ 00144 tb_scale_factor = ( long long ) info.tsc_to_cpu_mult; 00145 #else 00146 /* Opened once for all threads. */ 00147 if ( ( dev = vperfctr_open( ) ) == NULL ) { 00148 strncpy(_perfctr_vector.cmp_info.disabled_reason, 00149 VOPEN_ERROR,PAPI_MAX_STR_LEN); 00150 return PAPI_ESYS; 00151 } 00152 SUBDBG( "_perfctr_init_component vperfctr_open = %p\n", dev ); 00153 00154 /* Get info from the kernel */ 00155 retval = vperfctr_info( dev, &info ); 00156 if ( retval < 0 ) { 00157 strncpy(_perfctr_vector.cmp_info.disabled_reason, 00158 VINFO_ERROR,PAPI_MAX_STR_LEN); 00159 return ( PAPI_ESYS ); 00160 } 00161 vperfctr_close( dev ); 00162 #endif 00163 00164 /* Fill in what we can of the papi_system_info. */ 00165 retval = _papi_os_vector.get_system_info( &_papi_hwi_system_info ); 00166 if ( retval != PAPI_OK ) 00167 return ( retval ); 00168 00169 /* Setup memory info */ 00170 retval = _papi_os_vector.get_memory_info( &_papi_hwi_system_info.hw_info, 00171 ( int ) info.cpu_type ); 00172 if ( retval ) 00173 return ( retval ); 00174 00175 strcpy( _perfctr_vector.cmp_info.name,"perfctr.c" ); 00176 strcpy( _perfctr_vector.cmp_info.version, "$Revision$" ); 00177 sprintf( abiv, "0x%08X", info.abi_version ); 00178 strcpy( _perfctr_vector.cmp_info.support_version, abiv ); 00179 strcpy( _perfctr_vector.cmp_info.kernel_version, info.driver_version ); 00180 _perfctr_vector.cmp_info.CmpIdx = cidx; 00181 _perfctr_vector.cmp_info.num_cntrs = ( int ) PERFCTR_CPU_NRCTRS( &info ); 00182 _perfctr_vector.cmp_info.num_mpx_cntrs=_perfctr_vector.cmp_info.num_cntrs; 00183 if ( info.cpu_features & PERFCTR_FEATURE_RDPMC ) 00184 _perfctr_vector.cmp_info.fast_counter_read = 1; 00185 else 00186 _perfctr_vector.cmp_info.fast_counter_read = 0; 00187 _perfctr_vector.cmp_info.fast_real_timer = 1; 00188 _perfctr_vector.cmp_info.fast_virtual_timer = 1; 00189 _perfctr_vector.cmp_info.attach = 1; 00190 _perfctr_vector.cmp_info.attach_must_ptrace = 1; 00191 _perfctr_vector.cmp_info.default_domain = PAPI_DOM_USER; 00192 #if !defined(PPC64) 00193 /* AMD and Intel ia386 processors all support unit mask bits */ 00194 _perfctr_vector.cmp_info.cntr_umasks = 1; 00195 #endif 00196 #if defined(PPC64) 00197 _perfctr_vector.cmp_info.available_domains = 00198 PAPI_DOM_USER | PAPI_DOM_KERNEL | PAPI_DOM_SUPERVISOR; 00199 #else 00200 _perfctr_vector.cmp_info.available_domains = PAPI_DOM_USER | PAPI_DOM_KERNEL; 00201 #endif 00202 _perfctr_vector.cmp_info.default_granularity = PAPI_GRN_THR; 00203 _perfctr_vector.cmp_info.available_granularities = PAPI_GRN_THR; 00204 if ( info.cpu_features & PERFCTR_FEATURE_PCINT ) 00205 _perfctr_vector.cmp_info.hardware_intr = 1; 00206 else 00207 _perfctr_vector.cmp_info.hardware_intr = 0; 00208 SUBDBG( "Hardware/OS %s support counter generated interrupts\n", 00209 _perfctr_vector.cmp_info.hardware_intr ? "does" : "does not" ); 00210 00211 strcpy( _papi_hwi_system_info.hw_info.model_string, 00212 PERFCTR_CPU_NAME( &info ) ); 00213 _papi_hwi_system_info.hw_info.model = ( int ) info.cpu_type; 00214 #if defined(PPC64) 00215 _papi_hwi_system_info.hw_info.vendor = PAPI_VENDOR_IBM; 00216 if ( strlen( _papi_hwi_system_info.hw_info.vendor_string ) == 0 ) 00217 strcpy( _papi_hwi_system_info.hw_info.vendor_string, "IBM" ); 00218 #else 00219 _papi_hwi_system_info.hw_info.vendor = 00220 xlate_cpu_type_to_vendor( info.cpu_type ); 00221 #endif 00222 00223 /* Setup presets last. Some platforms depend on earlier info */ 00224 #if !defined(PPC64) 00225 // retval = setup_p3_vector_table(vtable); 00226 if ( !retval ) 00227 retval = setup_x86_presets( ( int ) info.cpu_type ); 00228 #else 00229 /* Setup native and preset events */ 00230 // retval = ppc64_setup_vector_table(vtable); 00231 if ( !retval ) 00232 retval = perfctr_ppc64_setup_native_table( ); 00233 if ( !retval ) 00234 retval = setup_ppc64_presets( info.cpu_type ); 00235 #endif 00236 if ( retval ) 00237 return ( retval ); 00238 00239 return ( PAPI_OK ); 00240 } 00241 00242 static int 00243 attach( hwd_control_state_t * ctl, unsigned long tid ) 00244 { 00245 struct vperfctr_control tmp; 00246 00247 #ifdef VPERFCTR_CONTROL_CLOEXEC 00248 tmp.flags = VPERFCTR_CONTROL_CLOEXEC; 00249 #endif 00250 00251 ctl->rvperfctr = rvperfctr_open( ( int ) tid ); 00252 if ( ctl->rvperfctr == NULL ) { 00253 PAPIERROR( VOPEN_ERROR ); 00254 return ( PAPI_ESYS ); 00255 } 00256 SUBDBG( "_papi_hwd_ctl rvperfctr_open() = %p\n", ctl->rvperfctr ); 00257 00258 /* Initialize the per thread/process virtualized TSC */ 00259 memset( &tmp, 0x0, sizeof ( tmp ) ); 00260 tmp.cpu_control.tsc_on = 1; 00261 00262 /* Start the per thread/process virtualized TSC */ 00263 if ( rvperfctr_control( ctl->rvperfctr, &tmp ) < 0 ) { 00264 PAPIERROR( RCNTRL_ERROR ); 00265 return ( PAPI_ESYS ); 00266 } 00267 00268 return ( PAPI_OK ); 00269 } /* end attach() */ 00270 00271 static int 00272 detach( hwd_control_state_t * ctl ) 00273 { 00274 rvperfctr_close( ctl->rvperfctr ); 00275 return ( PAPI_OK ); 00276 } /* end detach() */ 00277 00278 static inline int 00279 round_requested_ns( int ns ) 00280 { 00281 if ( ns < _papi_os_info.itimer_res_ns ) { 00282 return _papi_os_info.itimer_res_ns; 00283 } else { 00284 int leftover_ns = ns % _papi_os_info.itimer_res_ns; 00285 return ns + leftover_ns; 00286 } 00287 } 00288 00289 int 00290 _perfctr_ctl( hwd_context_t * ctx, int code, _papi_int_option_t * option ) 00291 { 00292 ( void ) ctx; /*unused */ 00293 switch ( code ) { 00294 case PAPI_DOMAIN: 00295 case PAPI_DEFDOM: 00296 #if defined(PPC64) 00297 return ( _perfctr_vector. 00298 set_domain( option->domain.ESI, option->domain.domain ) ); 00299 #else 00300 return ( _perfctr_vector. 00301 set_domain( option->domain.ESI->ctl_state, 00302 option->domain.domain ) ); 00303 #endif 00304 case PAPI_GRANUL: 00305 case PAPI_DEFGRN: 00306 return PAPI_ECMP; 00307 case PAPI_ATTACH: 00308 return ( attach( option->attach.ESI->ctl_state, option->attach.tid ) ); 00309 case PAPI_DETACH: 00310 return ( detach( option->attach.ESI->ctl_state ) ); 00311 case PAPI_DEF_ITIMER: 00312 { 00313 /* flags are currently ignored, eventually the flags will be able 00314 to specify whether or not we use POSIX itimers (clock_gettimer) */ 00315 if ( ( option->itimer.itimer_num == ITIMER_REAL ) && 00316 ( option->itimer.itimer_sig != SIGALRM ) ) 00317 return PAPI_EINVAL; 00318 if ( ( option->itimer.itimer_num == ITIMER_VIRTUAL ) && 00319 ( option->itimer.itimer_sig != SIGVTALRM ) ) 00320 return PAPI_EINVAL; 00321 if ( ( option->itimer.itimer_num == ITIMER_PROF ) && 00322 ( option->itimer.itimer_sig != SIGPROF ) ) 00323 return PAPI_EINVAL; 00324 if ( option->itimer.ns > 0 ) 00325 option->itimer.ns = round_requested_ns( option->itimer.ns ); 00326 /* At this point, we assume the user knows what he or 00327 she is doing, they maybe doing something arch specific */ 00328 return PAPI_OK; 00329 } 00330 case PAPI_DEF_MPX_NS: 00331 { 00332 option->multiplex.ns = 00333 ( unsigned long ) round_requested_ns( ( int ) option->multiplex. 00334 ns ); 00335 return ( PAPI_OK ); 00336 } 00337 case PAPI_DEF_ITIMER_NS: 00338 { 00339 option->itimer.ns = round_requested_ns( option->itimer.ns ); 00340 return ( PAPI_OK ); 00341 } 00342 default: 00343 return ( PAPI_ENOSUPP ); 00344 } 00345 } 00346 00347 void 00348 _perfctr_dispatch_timer( int signal, siginfo_t * si, void *context ) 00349 { 00350 ( void ) signal; /*unused */ 00351 _papi_hwi_context_t ctx; 00352 ThreadInfo_t *master = NULL; 00353 int isHardware = 0; 00354 caddr_t address; 00355 int cidx = _perfctr_vector.cmp_info.CmpIdx; 00356 hwd_context_t *our_context; 00357 00358 ctx.si = si; 00359 ctx.ucontext = ( ucontext_t * ) context; 00360 00361 #define OVERFLOW_MASK si->si_pmc_ovf_mask 00362 #define GEN_OVERFLOW 0 00363 00364 address = ( caddr_t ) GET_OVERFLOW_ADDRESS( ( ctx ) ); 00365 _papi_hwi_dispatch_overflow_signal( ( void * ) &ctx, address, &isHardware, 00366 OVERFLOW_MASK, GEN_OVERFLOW, &master, 00367 _perfctr_vector.cmp_info.CmpIdx ); 00368 00369 /* We are done, resume interrupting counters */ 00370 if ( isHardware ) { 00371 our_context=(hwd_context_t *) master->context[cidx]; 00372 errno = vperfctr_iresume( our_context->perfctr ); 00373 if ( errno < 0 ) { 00374 PAPIERROR( "vperfctr_iresume errno %d", errno ); 00375 } 00376 } 00377 } 00378 00379 00380 int 00381 _perfctr_init_thread( hwd_context_t * ctx ) 00382 { 00383 struct vperfctr_control tmp; 00384 int error; 00385 00386 /* Initialize our thread/process pointer. */ 00387 if ( ( ctx->perfctr = vperfctr_open( ) ) == NULL ) { 00388 #ifdef VPERFCTR_OPEN_CREAT_EXCL 00389 /* New versions of perfctr have this, which allows us to 00390 get a previously created context, i.e. one created after 00391 a fork and now we're inside a new process that has been exec'd */ 00392 if ( errno ) { 00393 if ( ( ctx->perfctr = vperfctr_open_mode( 0 ) ) == NULL ) { 00394 return PAPI_ESYS; 00395 } 00396 } else { 00397 return PAPI_ESYS; 00398 } 00399 #else 00400 return PAPI_ESYS; 00401 #endif 00402 } 00403 SUBDBG( "_papi_hwd_init vperfctr_open() = %p\n", ctx->perfctr ); 00404 00405 /* Initialize the per thread/process virtualized TSC */ 00406 memset( &tmp, 0x0, sizeof ( tmp ) ); 00407 tmp.cpu_control.tsc_on = 1; 00408 00409 #ifdef VPERFCTR_CONTROL_CLOEXEC 00410 tmp.flags = VPERFCTR_CONTROL_CLOEXEC; 00411 SUBDBG( "close on exec\t\t\t%u\n", tmp.flags ); 00412 #endif 00413 00414 /* Start the per thread/process virtualized TSC */ 00415 error = vperfctr_control( ctx->perfctr, &tmp ); 00416 if ( error < 0 ) { 00417 SUBDBG( "starting virtualized TSC; vperfctr_control returns %d\n", 00418 error ); 00419 return PAPI_ESYS; 00420 } 00421 00422 return PAPI_OK; 00423 } 00424 00425 /* This routine is for shutting down threads, including the 00426 master thread. */ 00427 00428 int 00429 _perfctr_shutdown_thread( hwd_context_t * ctx ) 00430 { 00431 #ifdef DEBUG 00432 int retval = vperfctr_unlink( ctx->perfctr ); 00433 SUBDBG( "_papi_hwd_shutdown vperfctr_unlink(%p) = %d\n", ctx->perfctr, 00434 retval ); 00435 #else 00436 vperfctr_unlink( ctx->perfctr ); 00437 #endif 00438 vperfctr_close( ctx->perfctr ); 00439 SUBDBG( "_perfctr_shutdown vperfctr_close(%p)\n", ctx->perfctr ); 00440 memset( ctx, 0x0, sizeof ( hwd_context_t ) ); 00441 return ( PAPI_OK ); 00442 }