|
PAPI
5.3.0.0
|
00001 /* 00002 * File: perfctr.c 00003 * Author: Philip Mucci 00004 * mucci at cs.utk.edu 00005 * Mods: Kevin London 00006 * london at cs.utk.edu 00007 * Mods: Maynard Johnson 00008 * maynardj at us.ibm.com 00009 * Mods: Brian Sheely 00010 * bsheely at eecs.utk.edu 00011 */ 00012 00013 #include <string.h> 00014 #include <linux/unistd.h> 00015 #include <errno.h> 00016 #include <sys/time.h> 00017 00018 #include "papi.h" 00019 #include "papi_internal.h" 00020 00021 #ifdef PPC64 00022 #include "perfctr-ppc64.h" 00023 #else 00024 #include "perfctr-x86.h" 00025 #include "papi_libpfm_events.h" 00026 #endif 00027 00028 #include "papi_vector.h" 00029 00030 #include "papi_memory.h" 00031 #include "extras.h" 00032 00033 #include "linux-common.h" 00034 #include "linux-context.h" 00035 00036 extern papi_vector_t _perfctr_vector; 00037 00038 #ifdef PPC64 00039 extern int setup_ppc64_presets( int cputype, int cidx ); 00040 #endif 00041 00042 /* This should be in a linux.h header file maybe. */ 00043 #define FOPEN_ERROR "fopen(%s) returned NULL" 00044 00045 #if defined(PERFCTR26) 00046 #define PERFCTR_CPU_NAME(pi) perfctr_info_cpu_name(pi) 00047 #define PERFCTR_CPU_NRCTRS(pi) perfctr_info_nrctrs(pi) 00048 #else 00049 #define PERFCTR_CPU_NAME perfctr_cpu_name 00050 #define PERFCTR_CPU_NRCTRS perfctr_cpu_nrctrs 00051 #endif 00052 00053 #if !defined(PPC64) 00054 static inline int 00055 xlate_cpu_type_to_vendor( unsigned perfctr_cpu_type ) 00056 { 00057 switch ( perfctr_cpu_type ) { 00058 case PERFCTR_X86_INTEL_P5: 00059 case PERFCTR_X86_INTEL_P5MMX: 00060 case PERFCTR_X86_INTEL_P6: 00061 case PERFCTR_X86_INTEL_PII: 00062 case PERFCTR_X86_INTEL_PIII: 00063 case PERFCTR_X86_INTEL_P4: 00064 case PERFCTR_X86_INTEL_P4M2: 00065 #ifdef PERFCTR_X86_INTEL_P4M3 00066 case PERFCTR_X86_INTEL_P4M3: 00067 #endif 00068 #ifdef PERFCTR_X86_INTEL_PENTM 00069 case PERFCTR_X86_INTEL_PENTM: 00070 #endif 00071 #ifdef PERFCTR_X86_INTEL_CORE 00072 case PERFCTR_X86_INTEL_CORE: 00073 #endif 00074 #ifdef PERFCTR_X86_INTEL_CORE2 00075 case PERFCTR_X86_INTEL_CORE2: 00076 #endif 00077 #ifdef PERFCTR_X86_INTEL_ATOM /* family 6 model 28 */ 00078 case PERFCTR_X86_INTEL_ATOM: 00079 #endif 00080 #ifdef PERFCTR_X86_INTEL_NHLM /* family 6 model 26 */ 00081 case PERFCTR_X86_INTEL_NHLM: 00082 #endif 00083 #ifdef PERFCTR_X86_INTEL_WSTMR 00084 case PERFCTR_X86_INTEL_WSTMR: 00085 #endif 00086 return ( PAPI_VENDOR_INTEL ); 00087 #ifdef PERFCTR_X86_AMD_K8 00088 case PERFCTR_X86_AMD_K8: 00089 #endif 00090 #ifdef PERFCTR_X86_AMD_K8C 00091 case PERFCTR_X86_AMD_K8C: 00092 #endif 00093 #ifdef PERFCTR_X86_AMD_FAM10 /* this is defined in perfctr 2.6.29 */ 00094 case PERFCTR_X86_AMD_FAM10: 00095 #endif 00096 case PERFCTR_X86_AMD_K7: 00097 return ( PAPI_VENDOR_AMD ); 00098 default: 00099 return ( PAPI_VENDOR_UNKNOWN ); 00100 } 00101 } 00102 #endif 00103 00104 long long tb_scale_factor = ( long long ) 1; /* needed to scale get_cycles on PPC series */ 00105 00106 int 00107 _perfctr_init_component( int cidx ) 00108 { 00109 int retval; 00110 struct perfctr_info info; 00111 char abiv[PAPI_MIN_STR_LEN]; 00112 00113 #if defined(PERFCTR26) 00114 int fd; 00115 #else 00116 struct vperfctr *dev; 00117 #endif 00118 00119 #if defined(PERFCTR26) 00120 /* Get info from the kernel */ 00121 /* Use lower level calls per Mikael to get the perfctr info 00122 without actually creating a new kernel-side state. 00123 Also, close the fd immediately after retrieving the info. 00124 This is much lighter weight and doesn't reserve the counter 00125 resources. Also compatible with perfctr 2.6.14. 00126 */ 00127 fd = _vperfctr_open( 0 ); 00128 if ( fd < 0 ) { 00129 strncpy(_perfctr_vector.cmp_info.disabled_reason, 00130 VOPEN_ERROR,PAPI_MAX_STR_LEN); 00131 return PAPI_ESYS; 00132 } 00133 retval = perfctr_info( fd, &info ); 00134 close( fd ); 00135 if ( retval < 0 ) { 00136 strncpy(_perfctr_vector.cmp_info.disabled_reason, 00137 VINFO_ERROR,PAPI_MAX_STR_LEN); 00138 return PAPI_ESYS; 00139 } 00140 00141 /* copy tsc multiplier to local variable */ 00142 /* this field appears in perfctr 2.6 and higher */ 00143 tb_scale_factor = ( long long ) info.tsc_to_cpu_mult; 00144 #else 00145 /* Opened once for all threads. */ 00146 if ( ( dev = vperfctr_open( ) ) == NULL ) { 00147 strncpy(_perfctr_vector.cmp_info.disabled_reason, 00148 VOPEN_ERROR,PAPI_MAX_STR_LEN); 00149 return PAPI_ESYS; 00150 } 00151 SUBDBG( "_perfctr_init_component vperfctr_open = %p\n", dev ); 00152 00153 /* Get info from the kernel */ 00154 retval = vperfctr_info( dev, &info ); 00155 if ( retval < 0 ) { 00156 strncpy(_perfctr_vector.cmp_info.disabled_reason, 00157 VINFO_ERROR,PAPI_MAX_STR_LEN); 00158 return ( PAPI_ESYS ); 00159 } 00160 vperfctr_close( dev ); 00161 #endif 00162 00163 /* Fill in what we can of the papi_system_info. */ 00164 retval = _papi_os_vector.get_system_info( &_papi_hwi_system_info ); 00165 if ( retval != PAPI_OK ) 00166 return ( retval ); 00167 00168 /* Setup memory info */ 00169 retval = _papi_os_vector.get_memory_info( &_papi_hwi_system_info.hw_info, 00170 ( int ) info.cpu_type ); 00171 if ( retval ) 00172 return ( retval ); 00173 00174 strcpy( _perfctr_vector.cmp_info.name,"perfctr.c" ); 00175 strcpy( _perfctr_vector.cmp_info.version, "$Revision$" ); 00176 sprintf( abiv, "0x%08X", info.abi_version ); 00177 strcpy( _perfctr_vector.cmp_info.support_version, abiv ); 00178 strcpy( _perfctr_vector.cmp_info.kernel_version, info.driver_version ); 00179 _perfctr_vector.cmp_info.CmpIdx = cidx; 00180 _perfctr_vector.cmp_info.num_cntrs = ( int ) PERFCTR_CPU_NRCTRS( &info ); 00181 _perfctr_vector.cmp_info.num_mpx_cntrs=_perfctr_vector.cmp_info.num_cntrs; 00182 if ( info.cpu_features & PERFCTR_FEATURE_RDPMC ) 00183 _perfctr_vector.cmp_info.fast_counter_read = 1; 00184 else 00185 _perfctr_vector.cmp_info.fast_counter_read = 0; 00186 _perfctr_vector.cmp_info.fast_real_timer = 1; 00187 _perfctr_vector.cmp_info.fast_virtual_timer = 1; 00188 _perfctr_vector.cmp_info.attach = 1; 00189 _perfctr_vector.cmp_info.attach_must_ptrace = 1; 00190 _perfctr_vector.cmp_info.default_domain = PAPI_DOM_USER; 00191 #if !defined(PPC64) 00192 /* AMD and Intel ia386 processors all support unit mask bits */ 00193 _perfctr_vector.cmp_info.cntr_umasks = 1; 00194 #endif 00195 #if defined(PPC64) 00196 _perfctr_vector.cmp_info.available_domains = 00197 PAPI_DOM_USER | PAPI_DOM_KERNEL | PAPI_DOM_SUPERVISOR; 00198 #else 00199 _perfctr_vector.cmp_info.available_domains = PAPI_DOM_USER | PAPI_DOM_KERNEL; 00200 #endif 00201 _perfctr_vector.cmp_info.default_granularity = PAPI_GRN_THR; 00202 _perfctr_vector.cmp_info.available_granularities = PAPI_GRN_THR; 00203 if ( info.cpu_features & PERFCTR_FEATURE_PCINT ) 00204 _perfctr_vector.cmp_info.hardware_intr = 1; 00205 else 00206 _perfctr_vector.cmp_info.hardware_intr = 0; 00207 SUBDBG( "Hardware/OS %s support counter generated interrupts\n", 00208 _perfctr_vector.cmp_info.hardware_intr ? "does" : "does not" ); 00209 00210 strcpy( _papi_hwi_system_info.hw_info.model_string, 00211 PERFCTR_CPU_NAME( &info ) ); 00212 _papi_hwi_system_info.hw_info.model = ( int ) info.cpu_type; 00213 #if defined(PPC64) 00214 _papi_hwi_system_info.hw_info.vendor = PAPI_VENDOR_IBM; 00215 if ( strlen( _papi_hwi_system_info.hw_info.vendor_string ) == 0 ) 00216 strcpy( _papi_hwi_system_info.hw_info.vendor_string, "IBM" ); 00217 #else 00218 _papi_hwi_system_info.hw_info.vendor = 00219 xlate_cpu_type_to_vendor( info.cpu_type ); 00220 #endif 00221 00222 /* Setup presets last. Some platforms depend on earlier info */ 00223 #if !defined(PPC64) 00224 // retval = setup_p3_vector_table(vtable); 00225 if ( !retval ) 00226 retval = _papi_libpfm_init(&_perfctr_vector, cidx ); 00227 #else 00228 /* Setup native and preset events */ 00229 // retval = ppc64_setup_vector_table(vtable); 00230 if ( !retval ) 00231 retval = perfctr_ppc64_setup_native_table( ); 00232 if ( !retval ) 00233 retval = setup_ppc64_presets( info.cpu_type, cidx ); 00234 #endif 00235 if ( retval ) 00236 return ( retval ); 00237 00238 return ( PAPI_OK ); 00239 } 00240 00241 static int 00242 attach( hwd_control_state_t * ctl, unsigned long tid ) 00243 { 00244 struct vperfctr_control tmp; 00245 00246 #ifdef VPERFCTR_CONTROL_CLOEXEC 00247 tmp.flags = VPERFCTR_CONTROL_CLOEXEC; 00248 #endif 00249 00250 ctl->rvperfctr = rvperfctr_open( ( int ) tid ); 00251 if ( ctl->rvperfctr == NULL ) { 00252 PAPIERROR( VOPEN_ERROR ); 00253 return ( PAPI_ESYS ); 00254 } 00255 SUBDBG( "_papi_hwd_ctl rvperfctr_open() = %p\n", ctl->rvperfctr ); 00256 00257 /* Initialize the per thread/process virtualized TSC */ 00258 memset( &tmp, 0x0, sizeof ( tmp ) ); 00259 tmp.cpu_control.tsc_on = 1; 00260 00261 /* Start the per thread/process virtualized TSC */ 00262 if ( rvperfctr_control( ctl->rvperfctr, &tmp ) < 0 ) { 00263 PAPIERROR( RCNTRL_ERROR ); 00264 return ( PAPI_ESYS ); 00265 } 00266 00267 return ( PAPI_OK ); 00268 } /* end attach() */ 00269 00270 static int 00271 detach( hwd_control_state_t * ctl ) 00272 { 00273 rvperfctr_close( ctl->rvperfctr ); 00274 return ( PAPI_OK ); 00275 } /* end detach() */ 00276 00277 static inline int 00278 round_requested_ns( int ns ) 00279 { 00280 if ( ns < _papi_os_info.itimer_res_ns ) { 00281 return _papi_os_info.itimer_res_ns; 00282 } else { 00283 int leftover_ns = ns % _papi_os_info.itimer_res_ns; 00284 return ns + leftover_ns; 00285 } 00286 } 00287 00288 int 00289 _perfctr_ctl( hwd_context_t * ctx, int code, _papi_int_option_t * option ) 00290 { 00291 ( void ) ctx; /*unused */ 00292 switch ( code ) { 00293 case PAPI_DOMAIN: 00294 case PAPI_DEFDOM: 00295 #if defined(PPC64) 00296 return ( _perfctr_vector. 00297 set_domain( option->domain.ESI, option->domain.domain ) ); 00298 #else 00299 return ( _perfctr_vector. 00300 set_domain( option->domain.ESI->ctl_state, 00301 option->domain.domain ) ); 00302 #endif 00303 case PAPI_GRANUL: 00304 case PAPI_DEFGRN: 00305 return PAPI_ECMP; 00306 case PAPI_ATTACH: 00307 return ( attach( option->attach.ESI->ctl_state, option->attach.tid ) ); 00308 case PAPI_DETACH: 00309 return ( detach( option->attach.ESI->ctl_state ) ); 00310 case PAPI_DEF_ITIMER: 00311 { 00312 /* flags are currently ignored, eventually the flags will be able 00313 to specify whether or not we use POSIX itimers (clock_gettimer) */ 00314 if ( ( option->itimer.itimer_num == ITIMER_REAL ) && 00315 ( option->itimer.itimer_sig != SIGALRM ) ) 00316 return PAPI_EINVAL; 00317 if ( ( option->itimer.itimer_num == ITIMER_VIRTUAL ) && 00318 ( option->itimer.itimer_sig != SIGVTALRM ) ) 00319 return PAPI_EINVAL; 00320 if ( ( option->itimer.itimer_num == ITIMER_PROF ) && 00321 ( option->itimer.itimer_sig != SIGPROF ) ) 00322 return PAPI_EINVAL; 00323 if ( option->itimer.ns > 0 ) 00324 option->itimer.ns = round_requested_ns( option->itimer.ns ); 00325 /* At this point, we assume the user knows what he or 00326 she is doing, they maybe doing something arch specific */ 00327 return PAPI_OK; 00328 } 00329 case PAPI_DEF_MPX_NS: 00330 { 00331 option->multiplex.ns = 00332 ( unsigned long ) round_requested_ns( ( int ) option->multiplex. 00333 ns ); 00334 return ( PAPI_OK ); 00335 } 00336 case PAPI_DEF_ITIMER_NS: 00337 { 00338 option->itimer.ns = round_requested_ns( option->itimer.ns ); 00339 return ( PAPI_OK ); 00340 } 00341 default: 00342 return ( PAPI_ENOSUPP ); 00343 } 00344 } 00345 00346 void 00347 _perfctr_dispatch_timer( int signal, siginfo_t * si, void *context ) 00348 { 00349 ( void ) signal; /*unused */ 00350 _papi_hwi_context_t ctx; 00351 ThreadInfo_t *master = NULL; 00352 int isHardware = 0; 00353 caddr_t address; 00354 int cidx = _perfctr_vector.cmp_info.CmpIdx; 00355 hwd_context_t *our_context; 00356 00357 ctx.si = si; 00358 ctx.ucontext = ( ucontext_t * ) context; 00359 00360 #define OVERFLOW_MASK si->si_pmc_ovf_mask 00361 #define GEN_OVERFLOW 0 00362 00363 address = ( caddr_t ) GET_OVERFLOW_ADDRESS( ( ctx ) ); 00364 _papi_hwi_dispatch_overflow_signal( ( void * ) &ctx, address, &isHardware, 00365 OVERFLOW_MASK, GEN_OVERFLOW, &master, 00366 _perfctr_vector.cmp_info.CmpIdx ); 00367 00368 /* We are done, resume interrupting counters */ 00369 if ( isHardware ) { 00370 our_context=(hwd_context_t *) master->context[cidx]; 00371 errno = vperfctr_iresume( our_context->perfctr ); 00372 if ( errno < 0 ) { 00373 PAPIERROR( "vperfctr_iresume errno %d", errno ); 00374 } 00375 } 00376 } 00377 00378 00379 int 00380 _perfctr_init_thread( hwd_context_t * ctx ) 00381 { 00382 struct vperfctr_control tmp; 00383 int error; 00384 00385 /* Initialize our thread/process pointer. */ 00386 if ( ( ctx->perfctr = vperfctr_open( ) ) == NULL ) { 00387 #ifdef VPERFCTR_OPEN_CREAT_EXCL 00388 /* New versions of perfctr have this, which allows us to 00389 get a previously created context, i.e. one created after 00390 a fork and now we're inside a new process that has been exec'd */ 00391 if ( errno ) { 00392 if ( ( ctx->perfctr = vperfctr_open_mode( 0 ) ) == NULL ) { 00393 return PAPI_ESYS; 00394 } 00395 } else { 00396 return PAPI_ESYS; 00397 } 00398 #else 00399 return PAPI_ESYS; 00400 #endif 00401 } 00402 SUBDBG( "_papi_hwd_init vperfctr_open() = %p\n", ctx->perfctr ); 00403 00404 /* Initialize the per thread/process virtualized TSC */ 00405 memset( &tmp, 0x0, sizeof ( tmp ) ); 00406 tmp.cpu_control.tsc_on = 1; 00407 00408 #ifdef VPERFCTR_CONTROL_CLOEXEC 00409 tmp.flags = VPERFCTR_CONTROL_CLOEXEC; 00410 SUBDBG( "close on exec\t\t\t%u\n", tmp.flags ); 00411 #endif 00412 00413 /* Start the per thread/process virtualized TSC */ 00414 error = vperfctr_control( ctx->perfctr, &tmp ); 00415 if ( error < 0 ) { 00416 SUBDBG( "starting virtualized TSC; vperfctr_control returns %d\n", 00417 error ); 00418 return PAPI_ESYS; 00419 } 00420 00421 return PAPI_OK; 00422 } 00423 00424 /* This routine is for shutting down threads, including the 00425 master thread. */ 00426 00427 int 00428 _perfctr_shutdown_thread( hwd_context_t * ctx ) 00429 { 00430 #ifdef DEBUG 00431 int retval = vperfctr_unlink( ctx->perfctr ); 00432 SUBDBG( "_papi_hwd_shutdown vperfctr_unlink(%p) = %d\n", ctx->perfctr, 00433 retval ); 00434 #else 00435 vperfctr_unlink( ctx->perfctr ); 00436 #endif 00437 vperfctr_close( ctx->perfctr ); 00438 SUBDBG( "_perfctr_shutdown vperfctr_close(%p)\n", ctx->perfctr ); 00439 memset( ctx, 0x0, sizeof ( hwd_context_t ) ); 00440 return ( PAPI_OK ); 00441 }