|
PAPI
5.3.0.0
|
00001 /* 00002 * File: perfctr-x86.c 00003 * Author: Brian Sheely 00004 * bsheely@eecs.utk.edu 00005 * Mods: <your name here> 00006 * <your email address> 00007 */ 00008 00009 #include <string.h> 00010 #include <linux/unistd.h> 00011 00012 #include "papi.h" 00013 #include "papi_memory.h" 00014 #include "papi_internal.h" 00015 #include "perfctr-x86.h" 00016 #include "perfmon/pfmlib.h" 00017 #include "extras.h" 00018 #include "papi_vector.h" 00019 #include "papi_libpfm_events.h" 00020 00021 #include "papi_preset.h" 00022 #include "linux-memory.h" 00023 00024 /* Contains source for the Modified Bipartite Allocation scheme */ 00025 #include "papi_bipartite.h" 00026 00027 /* Prototypes for entry points found in perfctr.c */ 00028 extern int _perfctr_init_component( int ); 00029 extern int _perfctr_ctl( hwd_context_t * ctx, int code, 00030 _papi_int_option_t * option ); 00031 extern void _perfctr_dispatch_timer( int signal, hwd_siginfo_t * si, 00032 void *context ); 00033 00034 extern int _perfctr_init_thread( hwd_context_t * ctx ); 00035 extern int _perfctr_shutdown_thread( hwd_context_t * ctx ); 00036 00037 #include "linux-common.h" 00038 #include "linux-timer.h" 00039 00040 extern papi_mdi_t _papi_hwi_system_info; 00041 00042 extern papi_vector_t _perfctr_vector; 00043 00044 #if defined(PERFCTR26) 00045 #define evntsel_aux p4.escr 00046 #endif 00047 00048 #if defined(PAPI_PENTIUM4_VEC_MMX) 00049 #define P4_VEC "MMX" 00050 #else 00051 #define P4_VEC "SSE" 00052 #endif 00053 00054 #if defined(PAPI_PENTIUM4_FP_X87) 00055 #define P4_FPU " X87" 00056 #elif defined(PAPI_PENTIUM4_FP_X87_SSE_SP) 00057 #define P4_FPU " X87 SSE_SP" 00058 #elif defined(PAPI_PENTIUM4_FP_SSE_SP_DP) 00059 #define P4_FPU " SSE_SP SSE_DP" 00060 #else 00061 #define P4_FPU " X87 SSE_DP" 00062 #endif 00063 00064 /* CODE TO SUPPORT CUSTOMIZABLE FP COUNTS ON OPTERON */ 00065 #if defined(PAPI_OPTERON_FP_RETIRED) 00066 #define AMD_FPU "RETIRED" 00067 #elif defined(PAPI_OPTERON_FP_SSE_SP) 00068 #define AMD_FPU "SSE_SP" 00069 #elif defined(PAPI_OPTERON_FP_SSE_DP) 00070 #define AMD_FPU "SSE_DP" 00071 #else 00072 #define AMD_FPU "SPECULATIVE" 00073 #endif 00074 00075 static inline int is_pentium4(void) { 00076 if ( ( _papi_hwi_system_info.hw_info.vendor == PAPI_VENDOR_INTEL ) && 00077 ( _papi_hwi_system_info.hw_info.cpuid_family == 15 )) { 00078 return 1; 00079 } 00080 00081 return 0; 00082 00083 } 00084 00085 #ifdef DEBUG 00086 static void 00087 print_alloc( X86_reg_alloc_t * a ) 00088 { 00089 SUBDBG( "X86_reg_alloc:\n" ); 00090 SUBDBG( " selector: %#x\n", a->ra_selector ); 00091 SUBDBG( " rank: %#x\n", a->ra_rank ); 00092 SUBDBG( " escr: %#x %#x\n", a->ra_escr[0], a->ra_escr[1] ); 00093 } 00094 00095 void 00096 print_control( const struct perfctr_cpu_control *control ) 00097 { 00098 unsigned int i; 00099 SUBDBG( "Control used:\n" ); 00100 SUBDBG( "tsc_on\t\t\t%u\n", control->tsc_on ); 00101 SUBDBG( "nractrs\t\t\t%u\n", control->nractrs ); 00102 SUBDBG( "nrictrs\t\t\t%u\n", control->nrictrs ); 00103 00104 for ( i = 0; i < ( control->nractrs + control->nrictrs ); ++i ) { 00105 if ( control->pmc_map[i] >= 18 ) { 00106 SUBDBG( "pmc_map[%u]\t\t0x%08X\n", i, control->pmc_map[i] ); 00107 } else { 00108 SUBDBG( "pmc_map[%u]\t\t%u\n", i, control->pmc_map[i] ); 00109 } 00110 SUBDBG( "evntsel[%u]\t\t0x%08X\n", i, control->evntsel[i] ); 00111 if ( control->ireset[i] ) { 00112 SUBDBG( "ireset[%u]\t%d\n", i, control->ireset[i] ); 00113 } 00114 } 00115 } 00116 #endif 00117 00118 static int 00119 _x86_init_control_state( hwd_control_state_t *ptr ) 00120 { 00121 int i, def_mode = 0; 00122 00123 if ( is_pentium4() ) { 00124 if ( _perfctr_vector.cmp_info.default_domain & PAPI_DOM_USER ) 00125 def_mode |= ESCR_T0_USR; 00126 if ( _perfctr_vector.cmp_info.default_domain & PAPI_DOM_KERNEL ) 00127 def_mode |= ESCR_T0_OS; 00128 00129 for ( i = 0; i < _perfctr_vector.cmp_info.num_cntrs; i++ ) { 00130 ptr->control.cpu_control.evntsel_aux[i] |= def_mode; 00131 } 00132 ptr->control.cpu_control.tsc_on = 1; 00133 ptr->control.cpu_control.nractrs = 0; 00134 ptr->control.cpu_control.nrictrs = 0; 00135 00136 #ifdef VPERFCTR_CONTROL_CLOEXEC 00137 ptr->control.flags = VPERFCTR_CONTROL_CLOEXEC; 00138 SUBDBG( "close on exec\t\t\t%u\n", ptr->control.flags ); 00139 #endif 00140 } else { 00141 00142 if ( _perfctr_vector.cmp_info.default_domain & PAPI_DOM_USER ) 00143 def_mode |= PERF_USR; 00144 if ( _perfctr_vector.cmp_info.default_domain & PAPI_DOM_KERNEL ) 00145 def_mode |= PERF_OS; 00146 00147 ptr->allocated_registers.selector = 0; 00148 switch ( _papi_hwi_system_info.hw_info.model ) { 00149 case PERFCTR_X86_GENERIC: 00150 case PERFCTR_X86_WINCHIP_C6: 00151 case PERFCTR_X86_WINCHIP_2: 00152 case PERFCTR_X86_VIA_C3: 00153 case PERFCTR_X86_INTEL_P5: 00154 case PERFCTR_X86_INTEL_P5MMX: 00155 case PERFCTR_X86_INTEL_PII: 00156 case PERFCTR_X86_INTEL_P6: 00157 case PERFCTR_X86_INTEL_PIII: 00158 #ifdef PERFCTR_X86_INTEL_CORE 00159 case PERFCTR_X86_INTEL_CORE: 00160 #endif 00161 #ifdef PERFCTR_X86_INTEL_PENTM 00162 case PERFCTR_X86_INTEL_PENTM: 00163 #endif 00164 ptr->control.cpu_control.evntsel[0] |= PERF_ENABLE; 00165 for ( i = 0; i < _perfctr_vector.cmp_info.num_cntrs; i++ ) { 00166 ptr->control.cpu_control.evntsel[i] |= def_mode; 00167 ptr->control.cpu_control.pmc_map[i] = ( unsigned int ) i; 00168 } 00169 break; 00170 #ifdef PERFCTR_X86_INTEL_CORE2 00171 case PERFCTR_X86_INTEL_CORE2: 00172 #endif 00173 #ifdef PERFCTR_X86_INTEL_ATOM 00174 case PERFCTR_X86_INTEL_ATOM: 00175 #endif 00176 #ifdef PERFCTR_X86_INTEL_NHLM 00177 case PERFCTR_X86_INTEL_NHLM: 00178 #endif 00179 #ifdef PERFCTR_X86_INTEL_WSTMR 00180 case PERFCTR_X86_INTEL_WSTMR: 00181 #endif 00182 #ifdef PERFCTR_X86_AMD_K8 00183 case PERFCTR_X86_AMD_K8: 00184 #endif 00185 #ifdef PERFCTR_X86_AMD_K8C 00186 case PERFCTR_X86_AMD_K8C: 00187 #endif 00188 #ifdef PERFCTR_X86_AMD_FAM10H /* this is defined in perfctr 2.6.29 */ 00189 case PERFCTR_X86_AMD_FAM10H: 00190 #endif 00191 case PERFCTR_X86_AMD_K7: 00192 for ( i = 0; i < _perfctr_vector.cmp_info.num_cntrs; i++ ) { 00193 ptr->control.cpu_control.evntsel[i] |= PERF_ENABLE | def_mode; 00194 ptr->control.cpu_control.pmc_map[i] = ( unsigned int ) i; 00195 } 00196 break; 00197 } 00198 #ifdef VPERFCTR_CONTROL_CLOEXEC 00199 ptr->control.flags = VPERFCTR_CONTROL_CLOEXEC; 00200 SUBDBG( "close on exec\t\t\t%u\n", ptr->control.flags ); 00201 #endif 00202 00203 /* Make sure the TSC is always on */ 00204 ptr->control.cpu_control.tsc_on = 1; 00205 } 00206 return ( PAPI_OK ); 00207 } 00208 00209 int 00210 _x86_set_domain( hwd_control_state_t * cntrl, int domain ) 00211 { 00212 int i, did = 0; 00213 int num_cntrs = _perfctr_vector.cmp_info.num_cntrs; 00214 00215 /* Clear the current domain set for this event set */ 00216 /* We don't touch the Enable bit in this code */ 00217 if ( is_pentium4() ) { 00218 for ( i = 0; i < _perfctr_vector.cmp_info.num_cntrs; i++ ) { 00219 cntrl->control.cpu_control.evntsel_aux[i] &= 00220 ~( ESCR_T0_OS | ESCR_T0_USR ); 00221 } 00222 00223 if ( domain & PAPI_DOM_USER ) { 00224 did = 1; 00225 for ( i = 0; i < _perfctr_vector.cmp_info.num_cntrs; i++ ) { 00226 cntrl->control.cpu_control.evntsel_aux[i] |= ESCR_T0_USR; 00227 } 00228 } 00229 00230 if ( domain & PAPI_DOM_KERNEL ) { 00231 did = 1; 00232 for ( i = 0; i < _perfctr_vector.cmp_info.num_cntrs; i++ ) { 00233 cntrl->control.cpu_control.evntsel_aux[i] |= ESCR_T0_OS; 00234 } 00235 } 00236 } else { 00237 for ( i = 0; i < num_cntrs; i++ ) { 00238 cntrl->control.cpu_control.evntsel[i] &= ~( PERF_OS | PERF_USR ); 00239 } 00240 00241 if ( domain & PAPI_DOM_USER ) { 00242 did = 1; 00243 for ( i = 0; i < num_cntrs; i++ ) { 00244 cntrl->control.cpu_control.evntsel[i] |= PERF_USR; 00245 } 00246 } 00247 00248 if ( domain & PAPI_DOM_KERNEL ) { 00249 did = 1; 00250 for ( i = 0; i < num_cntrs; i++ ) { 00251 cntrl->control.cpu_control.evntsel[i] |= PERF_OS; 00252 } 00253 } 00254 } 00255 00256 if ( !did ) 00257 return ( PAPI_EINVAL ); 00258 else 00259 return ( PAPI_OK ); 00260 } 00261 00262 /* This function examines the event to determine 00263 if it can be mapped to counter ctr. 00264 Returns true if it can, false if it can't. */ 00265 static int 00266 _bpt_map_avail( hwd_reg_alloc_t * dst, int ctr ) 00267 { 00268 return ( int ) ( dst->ra_selector & ( 1 << ctr ) ); 00269 } 00270 00271 /* This function forces the event to 00272 be mapped to only counter ctr. 00273 Returns nothing. */ 00274 static void 00275 _bpt_map_set( hwd_reg_alloc_t * dst, int ctr ) 00276 { 00277 dst->ra_selector = ( unsigned int ) ( 1 << ctr ); 00278 dst->ra_rank = 1; 00279 00280 if ( is_pentium4() ) { 00281 /* Pentium 4 requires that both an escr and a counter are selected. 00282 Find which counter mask contains this counter. 00283 Set the opposite escr to empty (-1) */ 00284 if ( dst->ra_bits.counter[0] & dst->ra_selector ) 00285 dst->ra_escr[1] = -1; 00286 else 00287 dst->ra_escr[0] = -1; 00288 } 00289 } 00290 00291 /* This function examines the event to determine 00292 if it has a single exclusive mapping. 00293 Returns true if exlusive, false if non-exclusive. */ 00294 static int 00295 _bpt_map_exclusive( hwd_reg_alloc_t * dst ) 00296 { 00297 return ( dst->ra_rank == 1 ); 00298 } 00299 00300 /* This function compares the dst and src events 00301 to determine if any resources are shared. Typically the src event 00302 is exclusive, so this detects a conflict if true. 00303 Returns true if conflict, false if no conflict. */ 00304 static int 00305 _bpt_map_shared( hwd_reg_alloc_t * dst, hwd_reg_alloc_t * src ) 00306 { 00307 if ( is_pentium4() ) { 00308 int retval1, retval2; 00309 /* Pentium 4 needs to check for conflict of both counters and esc registers */ 00310 /* selectors must share bits */ 00311 retval1 = ( ( dst->ra_selector & src->ra_selector ) || 00312 /* or escrs must equal each other and not be set to -1 */ 00313 ( ( dst->ra_escr[0] == src->ra_escr[0] ) && 00314 ( ( int ) dst->ra_escr[0] != -1 ) ) || 00315 ( ( dst->ra_escr[1] == src->ra_escr[1] ) && 00316 ( ( int ) dst->ra_escr[1] != -1 ) ) ); 00317 /* Pentium 4 also needs to check for conflict on pebs registers */ 00318 /* pebs enables must both be non-zero */ 00319 retval2 = 00320 ( ( ( dst->ra_bits.pebs_enable && src->ra_bits.pebs_enable ) && 00321 /* and not equal to each other */ 00322 ( dst->ra_bits.pebs_enable != src->ra_bits.pebs_enable ) ) || 00323 /* same for pebs_matrix_vert */ 00324 ( ( dst->ra_bits.pebs_matrix_vert && 00325 src->ra_bits.pebs_matrix_vert ) && 00326 ( dst->ra_bits.pebs_matrix_vert != 00327 src->ra_bits.pebs_matrix_vert ) ) ); 00328 if ( retval2 ) { 00329 SUBDBG( "pebs conflict!\n" ); 00330 } 00331 return ( retval1 | retval2 ); 00332 } 00333 00334 return ( int ) ( dst->ra_selector & src->ra_selector ); 00335 } 00336 00337 /* This function removes shared resources available to the src event 00338 from the resources available to the dst event, 00339 and reduces the rank of the dst event accordingly. Typically, 00340 the src event will be exclusive, but the code shouldn't assume it. 00341 Returns nothing. */ 00342 static void 00343 _bpt_map_preempt( hwd_reg_alloc_t * dst, hwd_reg_alloc_t * src ) 00344 { 00345 int i; 00346 unsigned shared; 00347 00348 if ( is_pentium4() ) { 00349 #ifdef DEBUG 00350 SUBDBG( "src, dst\n" ); 00351 print_alloc( src ); 00352 print_alloc( dst ); 00353 #endif 00354 00355 /* check for a pebs conflict */ 00356 /* pebs enables must both be non-zero */ 00357 i = ( ( ( dst->ra_bits.pebs_enable && src->ra_bits.pebs_enable ) && 00358 /* and not equal to each other */ 00359 ( dst->ra_bits.pebs_enable != src->ra_bits.pebs_enable ) ) || 00360 /* same for pebs_matrix_vert */ 00361 ( ( dst->ra_bits.pebs_matrix_vert && 00362 src->ra_bits.pebs_matrix_vert ) 00363 && ( dst->ra_bits.pebs_matrix_vert != 00364 src->ra_bits.pebs_matrix_vert ) ) ); 00365 if ( i ) { 00366 SUBDBG( "pebs conflict! clearing selector\n" ); 00367 dst->ra_selector = 0; 00368 return; 00369 } else { 00370 /* remove counters referenced by any shared escrs */ 00371 if ( ( dst->ra_escr[0] == src->ra_escr[0] ) && 00372 ( ( int ) dst->ra_escr[0] != -1 ) ) { 00373 dst->ra_selector &= ~dst->ra_bits.counter[0]; 00374 dst->ra_escr[0] = -1; 00375 } 00376 if ( ( dst->ra_escr[1] == src->ra_escr[1] ) && 00377 ( ( int ) dst->ra_escr[1] != -1 ) ) { 00378 dst->ra_selector &= ~dst->ra_bits.counter[1]; 00379 dst->ra_escr[1] = -1; 00380 } 00381 00382 /* remove any remaining shared counters */ 00383 shared = ( dst->ra_selector & src->ra_selector ); 00384 if ( shared ) 00385 dst->ra_selector ^= shared; 00386 } 00387 /* recompute rank */ 00388 for ( i = 0, dst->ra_rank = 0; i < MAX_COUNTERS; i++ ) 00389 if ( dst->ra_selector & ( 1 << i ) ) 00390 dst->ra_rank++; 00391 #ifdef DEBUG 00392 SUBDBG( "new dst\n" ); 00393 print_alloc( dst ); 00394 #endif 00395 } else { 00396 shared = dst->ra_selector & src->ra_selector; 00397 if ( shared ) 00398 dst->ra_selector ^= shared; 00399 for ( i = 0, dst->ra_rank = 0; i < MAX_COUNTERS; i++ ) 00400 if ( dst->ra_selector & ( 1 << i ) ) 00401 dst->ra_rank++; 00402 } 00403 } 00404 00405 static void 00406 _bpt_map_update( hwd_reg_alloc_t * dst, hwd_reg_alloc_t * src ) 00407 { 00408 dst->ra_selector = src->ra_selector; 00409 00410 if ( is_pentium4() ) { 00411 dst->ra_escr[0] = src->ra_escr[0]; 00412 dst->ra_escr[1] = src->ra_escr[1]; 00413 } 00414 } 00415 00416 /* Register allocation */ 00417 static int 00418 _x86_allocate_registers( EventSetInfo_t * ESI ) 00419 { 00420 int i, j, natNum; 00421 hwd_reg_alloc_t event_list[MAX_COUNTERS]; 00422 hwd_register_t *ptr; 00423 00424 /* Initialize the local structure needed 00425 for counter allocation and optimization. */ 00426 natNum = ESI->NativeCount; 00427 00428 if ( is_pentium4() ) { 00429 SUBDBG( "native event count: %d\n", natNum ); 00430 } 00431 00432 for ( i = 0; i < natNum; i++ ) { 00433 /* retrieve the mapping information about this native event */ 00434 _papi_libpfm_ntv_code_to_bits( ( unsigned int ) ESI->NativeInfoArray[i]. 00435 ni_event, &event_list[i].ra_bits ); 00436 00437 if ( is_pentium4() ) { 00438 /* combine counter bit masks for both esc registers into selector */ 00439 event_list[i].ra_selector = 00440 event_list[i].ra_bits.counter[0] | event_list[i].ra_bits. 00441 counter[1]; 00442 } else { 00443 /* make sure register allocator only looks at legal registers */ 00444 event_list[i].ra_selector = 00445 event_list[i].ra_bits.selector & ALLCNTRS; 00446 #ifdef PERFCTR_X86_INTEL_CORE2 00447 if ( _papi_hwi_system_info.hw_info.model == 00448 PERFCTR_X86_INTEL_CORE2 ) 00449 event_list[i].ra_selector |= 00450 ( ( event_list[i].ra_bits. 00451 selector >> 16 ) << 2 ) & ALLCNTRS; 00452 #endif 00453 } 00454 /* calculate native event rank, which is no. of counters it can live on */ 00455 event_list[i].ra_rank = 0; 00456 for ( j = 0; j < MAX_COUNTERS; j++ ) { 00457 if ( event_list[i].ra_selector & ( 1 << j ) ) { 00458 event_list[i].ra_rank++; 00459 } 00460 } 00461 00462 if ( is_pentium4() ) { 00463 event_list[i].ra_escr[0] = event_list[i].ra_bits.escr[0]; 00464 event_list[i].ra_escr[1] = event_list[i].ra_bits.escr[1]; 00465 #ifdef DEBUG 00466 SUBDBG( "i: %d\n", i ); 00467 print_alloc( &event_list[i] ); 00468 #endif 00469 } 00470 } 00471 if ( _papi_bipartite_alloc( event_list, natNum, ESI->CmpIdx ) ) { /* successfully mapped */ 00472 for ( i = 0; i < natNum; i++ ) { 00473 #ifdef PERFCTR_X86_INTEL_CORE2 00474 if ( _papi_hwi_system_info.hw_info.model == 00475 PERFCTR_X86_INTEL_CORE2 ) 00476 event_list[i].ra_bits.selector = event_list[i].ra_selector; 00477 #endif 00478 #ifdef DEBUG 00479 if ( is_pentium4() ) { 00480 SUBDBG( "i: %d\n", i ); 00481 print_alloc( &event_list[i] ); 00482 } 00483 #endif 00484 /* Copy all info about this native event to the NativeInfo struct */ 00485 ptr = ESI->NativeInfoArray[i].ni_bits; 00486 *ptr = event_list[i].ra_bits; 00487 00488 if ( is_pentium4() ) { 00489 /* The selector contains the counter bit position. Turn it into a number 00490 and store it in the first counter value, zeroing the second. */ 00491 ptr->counter[0] = ffs( event_list[i].ra_selector ) - 1; 00492 ptr->counter[1] = 0; 00493 } 00494 00495 /* Array order on perfctr is event ADD order, not counter #... */ 00496 ESI->NativeInfoArray[i].ni_position = i; 00497 } 00498 return PAPI_OK; 00499 } else 00500 return PAPI_ECNFLCT; 00501 } 00502 00503 static void 00504 clear_cs_events( hwd_control_state_t * this_state ) 00505 { 00506 unsigned int i, j; 00507 00508 /* total counters is sum of accumulating (nractrs) and interrupting (nrictrs) */ 00509 j = this_state->control.cpu_control.nractrs + 00510 this_state->control.cpu_control.nrictrs; 00511 00512 /* Remove all counter control command values from eventset. */ 00513 for ( i = 0; i < j; i++ ) { 00514 SUBDBG( "Clearing pmc event entry %d\n", i ); 00515 if ( is_pentium4() ) { 00516 this_state->control.cpu_control.pmc_map[i] = 0; 00517 this_state->control.cpu_control.evntsel[i] = 0; 00518 this_state->control.cpu_control.evntsel_aux[i] = 00519 this_state->control.cpu_control. 00520 evntsel_aux[i] & ( ESCR_T0_OS | ESCR_T0_USR ); 00521 } else { 00522 this_state->control.cpu_control.pmc_map[i] = i; 00523 this_state->control.cpu_control.evntsel[i] 00524 = this_state->control.cpu_control. 00525 evntsel[i] & ( PERF_ENABLE | PERF_OS | PERF_USR ); 00526 } 00527 this_state->control.cpu_control.ireset[i] = 0; 00528 } 00529 00530 if ( is_pentium4() ) { 00531 /* Clear pebs stuff */ 00532 this_state->control.cpu_control.p4.pebs_enable = 0; 00533 this_state->control.cpu_control.p4.pebs_matrix_vert = 0; 00534 } 00535 00536 /* clear both a and i counter counts */ 00537 this_state->control.cpu_control.nractrs = 0; 00538 this_state->control.cpu_control.nrictrs = 0; 00539 00540 #ifdef DEBUG 00541 if ( is_pentium4() ) 00542 print_control( &this_state->control.cpu_control ); 00543 #endif 00544 } 00545 00546 /* This function clears the current contents of the control structure and 00547 updates it with whatever resources are allocated for all the native events 00548 in the native info structure array. */ 00549 static int 00550 _x86_update_control_state( hwd_control_state_t * this_state, 00551 NativeInfo_t * native, int count, 00552 hwd_context_t * ctx ) 00553 { 00554 ( void ) ctx; /*unused */ 00555 unsigned int i, k, retval = PAPI_OK; 00556 hwd_register_t *bits,*bits2; 00557 struct perfctr_cpu_control *cpu_control = &this_state->control.cpu_control; 00558 00559 /* clear out the events from the control state */ 00560 clear_cs_events( this_state ); 00561 00562 if ( is_pentium4() ) { 00563 /* fill the counters we're using */ 00564 for ( i = 0; i < ( unsigned int ) count; i++ ) { 00565 /* dereference the mapping information about this native event */ 00566 bits = native[i].ni_bits; 00567 00568 /* Add counter control command values to eventset */ 00569 cpu_control->pmc_map[i] = bits->counter[0]; 00570 cpu_control->evntsel[i] = bits->cccr; 00571 cpu_control->ireset[i] = bits->ireset; 00572 cpu_control->pmc_map[i] |= FAST_RDPMC; 00573 cpu_control->evntsel_aux[i] |= bits->event; 00574 00575 /* pebs_enable and pebs_matrix_vert are shared registers used for replay_events. 00576 Replay_events count L1 and L2 cache events. There is only one of each for 00577 the entire eventset. Therefore, there can be only one unique replay_event 00578 per eventset. This means L1 and L2 can't be counted together. Which stinks. 00579 This conflict should be trapped in the allocation scheme, but we'll test for it 00580 here too, just in case. */ 00581 if ( bits->pebs_enable ) { 00582 /* if pebs_enable isn't set, just copy */ 00583 if ( cpu_control->p4.pebs_enable == 0 ) { 00584 cpu_control->p4.pebs_enable = bits->pebs_enable; 00585 /* if pebs_enable conflicts, flag an error */ 00586 } else if ( cpu_control->p4.pebs_enable != bits->pebs_enable ) { 00587 SUBDBG 00588 ( "WARNING: P4_update_control_state -- pebs_enable conflict!" ); 00589 retval = PAPI_ECNFLCT; 00590 } 00591 /* if pebs_enable == bits->pebs_enable, do nothing */ 00592 } 00593 if ( bits->pebs_matrix_vert ) { 00594 /* if pebs_matrix_vert isn't set, just copy */ 00595 if ( cpu_control->p4.pebs_matrix_vert == 0 ) { 00596 cpu_control->p4.pebs_matrix_vert = bits->pebs_matrix_vert; 00597 /* if pebs_matrix_vert conflicts, flag an error */ 00598 } else if ( cpu_control->p4.pebs_matrix_vert != 00599 bits->pebs_matrix_vert ) { 00600 SUBDBG 00601 ( "WARNING: P4_update_control_state -- pebs_matrix_vert conflict!" ); 00602 retval = PAPI_ECNFLCT; 00603 } 00604 /* if pebs_matrix_vert == bits->pebs_matrix_vert, do nothing */ 00605 } 00606 } 00607 this_state->control.cpu_control.nractrs = count; 00608 00609 /* Make sure the TSC is always on */ 00610 this_state->control.cpu_control.tsc_on = 1; 00611 00612 #ifdef DEBUG 00613 print_control( &this_state->control.cpu_control ); 00614 #endif 00615 } else { 00616 switch ( _papi_hwi_system_info.hw_info.model ) { 00617 #ifdef PERFCTR_X86_INTEL_CORE2 00618 case PERFCTR_X86_INTEL_CORE2: 00619 /* fill the counters we're using */ 00620 for ( i = 0; i < ( unsigned int ) count; i++ ) { 00621 bits2 = native[i].ni_bits; 00622 for ( k = 0; k < MAX_COUNTERS; k++ ) 00623 if ( bits2->selector & ( 1 << k ) ) { 00624 break; 00625 } 00626 if ( k > 1 ) 00627 this_state->control.cpu_control.pmc_map[i] = 00628 ( k - 2 ) | 0x40000000; 00629 else 00630 this_state->control.cpu_control.pmc_map[i] = k; 00631 00632 /* Add counter control command values to eventset */ 00633 this_state->control.cpu_control.evntsel[i] |= 00634 bits2->counter_cmd; 00635 } 00636 break; 00637 #endif 00638 default: 00639 /* fill the counters we're using */ 00640 for ( i = 0; i < ( unsigned int ) count; i++ ) { 00641 /* Add counter control command values to eventset */ 00642 bits2 = native[i].ni_bits; 00643 this_state->control.cpu_control.evntsel[i] |= 00644 bits2->counter_cmd; 00645 } 00646 } 00647 this_state->control.cpu_control.nractrs = ( unsigned int ) count; 00648 } 00649 return retval; 00650 } 00651 00652 static int 00653 _x86_start( hwd_context_t * ctx, hwd_control_state_t * state ) 00654 { 00655 int error; 00656 #ifdef DEBUG 00657 print_control( &state->control.cpu_control ); 00658 #endif 00659 00660 if ( state->rvperfctr != NULL ) { 00661 if ( ( error = 00662 rvperfctr_control( state->rvperfctr, &state->control ) ) < 0 ) { 00663 SUBDBG( "rvperfctr_control returns: %d\n", error ); 00664 PAPIERROR( RCNTRL_ERROR ); 00665 return ( PAPI_ESYS ); 00666 } 00667 return ( PAPI_OK ); 00668 } 00669 00670 if ( ( error = vperfctr_control( ctx->perfctr, &state->control ) ) < 0 ) { 00671 SUBDBG( "vperfctr_control returns: %d\n", error ); 00672 PAPIERROR( VCNTRL_ERROR ); 00673 return ( PAPI_ESYS ); 00674 } 00675 return ( PAPI_OK ); 00676 } 00677 00678 static int 00679 _x86_stop( hwd_context_t * ctx, hwd_control_state_t * state ) 00680 { 00681 int error; 00682 00683 if ( state->rvperfctr != NULL ) { 00684 if ( rvperfctr_stop( ( struct rvperfctr * ) ctx->perfctr ) < 0 ) { 00685 PAPIERROR( RCNTRL_ERROR ); 00686 return ( PAPI_ESYS ); 00687 } 00688 return ( PAPI_OK ); 00689 } 00690 00691 error = vperfctr_stop( ctx->perfctr ); 00692 if ( error < 0 ) { 00693 SUBDBG( "vperfctr_stop returns: %d\n", error ); 00694 PAPIERROR( VCNTRL_ERROR ); 00695 return ( PAPI_ESYS ); 00696 } 00697 return ( PAPI_OK ); 00698 } 00699 00700 static int 00701 _x86_read( hwd_context_t * ctx, hwd_control_state_t * spc, long long **dp, 00702 int flags ) 00703 { 00704 if ( flags & PAPI_PAUSED ) { 00705 vperfctr_read_state( ctx->perfctr, &spc->state, NULL ); 00706 if ( !is_pentium4() ) { 00707 unsigned int i = 0; 00708 for ( i = 0; 00709 i < 00710 spc->control.cpu_control.nractrs + 00711 spc->control.cpu_control.nrictrs; i++ ) { 00712 SUBDBG( "vperfctr_read_state: counter %d = %lld\n", i, 00713 spc->state.pmc[i] ); 00714 } 00715 } 00716 } else { 00717 SUBDBG( "vperfctr_read_ctrs\n" ); 00718 if ( spc->rvperfctr != NULL ) { 00719 rvperfctr_read_ctrs( spc->rvperfctr, &spc->state ); 00720 } else { 00721 vperfctr_read_ctrs( ctx->perfctr, &spc->state ); 00722 } 00723 } 00724 *dp = ( long long * ) spc->state.pmc; 00725 #ifdef DEBUG 00726 { 00727 if ( ISLEVEL( DEBUG_SUBSTRATE ) ) { 00728 unsigned int i; 00729 if ( is_pentium4() ) { 00730 for ( i = 0; i < spc->control.cpu_control.nractrs; i++ ) { 00731 SUBDBG( "raw val hardware index %d is %lld\n", i, 00732 ( long long ) spc->state.pmc[i] ); 00733 } 00734 } else { 00735 for ( i = 0; 00736 i < 00737 spc->control.cpu_control.nractrs + 00738 spc->control.cpu_control.nrictrs; i++ ) { 00739 SUBDBG( "raw val hardware index %d is %lld\n", i, 00740 ( long long ) spc->state.pmc[i] ); 00741 } 00742 } 00743 } 00744 } 00745 #endif 00746 return ( PAPI_OK ); 00747 } 00748 00749 static int 00750 _x86_reset( hwd_context_t * ctx, hwd_control_state_t * cntrl ) 00751 { 00752 return ( _x86_start( ctx, cntrl ) ); 00753 } 00754 00755 /* Perfctr requires that interrupting counters appear at the end of the pmc list 00756 In the case a user wants to interrupt on a counter in an evntset that is not 00757 among the last events, we need to move the perfctr virtual events around to 00758 make it last. This function swaps two perfctr events, and then adjust the 00759 position entries in both the NativeInfoArray and the EventInfoArray to keep 00760 everything consistent. */ 00761 static void 00762 swap_events( EventSetInfo_t * ESI, struct hwd_pmc_control *contr, int cntr1, 00763 int cntr2 ) 00764 { 00765 unsigned int ui; 00766 int si, i, j; 00767 00768 for ( i = 0; i < ESI->NativeCount; i++ ) { 00769 if ( ESI->NativeInfoArray[i].ni_position == cntr1 ) 00770 ESI->NativeInfoArray[i].ni_position = cntr2; 00771 else if ( ESI->NativeInfoArray[i].ni_position == cntr2 ) 00772 ESI->NativeInfoArray[i].ni_position = cntr1; 00773 } 00774 00775 for ( i = 0; i < ESI->NumberOfEvents; i++ ) { 00776 for ( j = 0; ESI->EventInfoArray[i].pos[j] >= 0; j++ ) { 00777 if ( ESI->EventInfoArray[i].pos[j] == cntr1 ) 00778 ESI->EventInfoArray[i].pos[j] = cntr2; 00779 else if ( ESI->EventInfoArray[i].pos[j] == cntr2 ) 00780 ESI->EventInfoArray[i].pos[j] = cntr1; 00781 } 00782 } 00783 00784 ui = contr->cpu_control.pmc_map[cntr1]; 00785 contr->cpu_control.pmc_map[cntr1] = contr->cpu_control.pmc_map[cntr2]; 00786 contr->cpu_control.pmc_map[cntr2] = ui; 00787 00788 ui = contr->cpu_control.evntsel[cntr1]; 00789 contr->cpu_control.evntsel[cntr1] = contr->cpu_control.evntsel[cntr2]; 00790 contr->cpu_control.evntsel[cntr2] = ui; 00791 00792 if ( is_pentium4() ) { 00793 ui = contr->cpu_control.evntsel_aux[cntr1]; 00794 contr->cpu_control.evntsel_aux[cntr1] = 00795 contr->cpu_control.evntsel_aux[cntr2]; 00796 contr->cpu_control.evntsel_aux[cntr2] = ui; 00797 } 00798 00799 si = contr->cpu_control.ireset[cntr1]; 00800 contr->cpu_control.ireset[cntr1] = contr->cpu_control.ireset[cntr2]; 00801 contr->cpu_control.ireset[cntr2] = si; 00802 } 00803 00804 static int 00805 _x86_set_overflow( EventSetInfo_t *ESI, int EventIndex, int threshold ) 00806 { 00807 hwd_control_state_t *ctl = ( hwd_control_state_t * ) ( ESI->ctl_state ); 00808 struct hwd_pmc_control *contr = &(ctl->control); 00809 int i, ncntrs, nricntrs = 0, nracntrs = 0, retval = 0; 00810 OVFDBG( "EventIndex=%d\n", EventIndex ); 00811 00812 #ifdef DEBUG 00813 if ( is_pentium4() ) 00814 print_control( &(contr->cpu_control) ); 00815 #endif 00816 00817 /* The correct event to overflow is EventIndex */ 00818 ncntrs = _perfctr_vector.cmp_info.num_cntrs; 00819 i = ESI->EventInfoArray[EventIndex].pos[0]; 00820 00821 if ( i >= ncntrs ) { 00822 PAPIERROR( "Selector id %d is larger than ncntrs %d", i, ncntrs ); 00823 return PAPI_EINVAL; 00824 } 00825 00826 if ( threshold != 0 ) { /* Set an overflow threshold */ 00827 retval = _papi_hwi_start_signal( _perfctr_vector.cmp_info.hardware_intr_sig, 00828 NEED_CONTEXT, 00829 _perfctr_vector.cmp_info.CmpIdx ); 00830 if ( retval != PAPI_OK ) 00831 return ( retval ); 00832 00833 /* overflow interrupt occurs on the NEXT event after overflow occurs 00834 thus we subtract 1 from the threshold. */ 00835 contr->cpu_control.ireset[i] = ( -threshold + 1 ); 00836 00837 if ( is_pentium4() ) 00838 contr->cpu_control.evntsel[i] |= CCCR_OVF_PMI_T0; 00839 else 00840 contr->cpu_control.evntsel[i] |= PERF_INT_ENABLE; 00841 00842 contr->cpu_control.nrictrs++; 00843 contr->cpu_control.nractrs--; 00844 nricntrs = ( int ) contr->cpu_control.nrictrs; 00845 nracntrs = ( int ) contr->cpu_control.nractrs; 00846 contr->si_signo = _perfctr_vector.cmp_info.hardware_intr_sig; 00847 00848 /* move this event to the bottom part of the list if needed */ 00849 if ( i < nracntrs ) 00850 swap_events( ESI, contr, i, nracntrs ); 00851 OVFDBG( "Modified event set\n" ); 00852 } else { 00853 if ( is_pentium4() && contr->cpu_control.evntsel[i] & CCCR_OVF_PMI_T0 ) { 00854 contr->cpu_control.ireset[i] = 0; 00855 contr->cpu_control.evntsel[i] &= ( ~CCCR_OVF_PMI_T0 ); 00856 contr->cpu_control.nrictrs--; 00857 contr->cpu_control.nractrs++; 00858 } else if ( !is_pentium4() && 00859 contr->cpu_control.evntsel[i] & PERF_INT_ENABLE ) { 00860 contr->cpu_control.ireset[i] = 0; 00861 contr->cpu_control.evntsel[i] &= ( ~PERF_INT_ENABLE ); 00862 contr->cpu_control.nrictrs--; 00863 contr->cpu_control.nractrs++; 00864 } 00865 00866 nricntrs = ( int ) contr->cpu_control.nrictrs; 00867 nracntrs = ( int ) contr->cpu_control.nractrs; 00868 00869 /* move this event to the top part of the list if needed */ 00870 if ( i >= nracntrs ) 00871 swap_events( ESI, contr, i, nracntrs - 1 ); 00872 00873 if ( !nricntrs ) 00874 contr->si_signo = 0; 00875 00876 OVFDBG( "Modified event set\n" ); 00877 00878 retval = _papi_hwi_stop_signal( _perfctr_vector.cmp_info.hardware_intr_sig ); 00879 } 00880 00881 #ifdef DEBUG 00882 if ( is_pentium4() ) 00883 print_control( &(contr->cpu_control) ); 00884 #endif 00885 OVFDBG( "End of call. Exit code: %d\n", retval ); 00886 return ( retval ); 00887 } 00888 00889 static int 00890 _x86_stop_profiling( ThreadInfo_t * master, EventSetInfo_t * ESI ) 00891 { 00892 ( void ) master; /*unused */ 00893 ( void ) ESI; /*unused */ 00894 return ( PAPI_OK ); 00895 } 00896 00897 00898 00899 /* these define cccr and escr register bits, and the p4 event structure */ 00900 #include "perfmon/pfmlib_pentium4.h" 00901 #include "../lib/pfmlib_pentium4_priv.h" 00902 00903 #define P4_REPLAY_REAL_MASK 0x00000003 00904 00905 extern pentium4_escr_reg_t pentium4_escrs[]; 00906 extern pentium4_cccr_reg_t pentium4_cccrs[]; 00907 extern pentium4_event_t pentium4_events[]; 00908 00909 00910 static pentium4_replay_regs_t p4_replay_regs[] = { 00911 /* 0 */ {.enb = 0, 00912 /* dummy */ 00913 .mat_vert = 0, 00914 }, 00915 /* 1 */ {.enb = 0, 00916 /* dummy */ 00917 .mat_vert = 0, 00918 }, 00919 /* 2 */ {.enb = 0x01000001, 00920 /* 1stL_cache_load_miss_retired */ 00921 .mat_vert = 0x00000001, 00922 }, 00923 /* 3 */ {.enb = 0x01000002, 00924 /* 2ndL_cache_load_miss_retired */ 00925 .mat_vert = 0x00000001, 00926 }, 00927 /* 4 */ {.enb = 0x01000004, 00928 /* DTLB_load_miss_retired */ 00929 .mat_vert = 0x00000001, 00930 }, 00931 /* 5 */ {.enb = 0x01000004, 00932 /* DTLB_store_miss_retired */ 00933 .mat_vert = 0x00000002, 00934 }, 00935 /* 6 */ {.enb = 0x01000004, 00936 /* DTLB_all_miss_retired */ 00937 .mat_vert = 0x00000003, 00938 }, 00939 /* 7 */ {.enb = 0x01018001, 00940 /* Tagged_mispred_branch */ 00941 .mat_vert = 0x00000010, 00942 }, 00943 /* 8 */ {.enb = 0x01000200, 00944 /* MOB_load_replay_retired */ 00945 .mat_vert = 0x00000001, 00946 }, 00947 /* 9 */ {.enb = 0x01000400, 00948 /* split_load_retired */ 00949 .mat_vert = 0x00000001, 00950 }, 00951 /* 10 */ {.enb = 0x01000400, 00952 /* split_store_retired */ 00953 .mat_vert = 0x00000002, 00954 }, 00955 }; 00956 00957 /* this maps the arbitrary pmd index in libpfm/pentium4_events.h to the intel documentation */ 00958 static int pfm2intel[] = 00959 { 0, 1, 4, 5, 8, 9, 12, 13, 16, 2, 3, 6, 7, 10, 11, 14, 15, 17 }; 00960 00961 00962 00963 00964 /* This call is broken. Selector can be much bigger than 32 bits. It should be a pfmlib_regmask_t - pjm */ 00965 /* Also, libpfm assumes events can live on different counters with different codes. This call only returns 00966 the first occurence found. */ 00967 /* Right now its only called by ntv_code_to_bits in perfctr-p3, so we're ok. But for it to be 00968 generally useful it should be fixed. - dkt */ 00969 static int 00970 _pfm_get_counter_info( unsigned int event, unsigned int *selector, int *code ) 00971 { 00972 pfmlib_regmask_t cnt, impl; 00973 unsigned int num; 00974 unsigned int i, first = 1; 00975 int ret; 00976 00977 if ( ( ret = pfm_get_event_counters( event, &cnt ) ) != PFMLIB_SUCCESS ) { 00978 PAPIERROR( "pfm_get_event_counters(%d,%p): %s", event, &cnt, 00979 pfm_strerror( ret ) ); 00980 return PAPI_ESYS; 00981 } 00982 if ( ( ret = pfm_get_num_counters( &num ) ) != PFMLIB_SUCCESS ) { 00983 PAPIERROR( "pfm_get_num_counters(%p): %s", num, pfm_strerror( ret ) ); 00984 return PAPI_ESYS; 00985 } 00986 if ( ( ret = pfm_get_impl_counters( &impl ) ) != PFMLIB_SUCCESS ) { 00987 PAPIERROR( "pfm_get_impl_counters(%p): %s", &impl, 00988 pfm_strerror( ret ) ); 00989 return PAPI_ESYS; 00990 } 00991 00992 *selector = 0; 00993 for ( i = 0; num; i++ ) { 00994 if ( pfm_regmask_isset( &impl, i ) ) 00995 num--; 00996 if ( pfm_regmask_isset( &cnt, i ) ) { 00997 if ( first ) { 00998 if ( ( ret = 00999 pfm_get_event_code_counter( event, i, 01000 code ) ) != 01001 PFMLIB_SUCCESS ) { 01002 PAPIERROR( "pfm_get_event_code_counter(%d, %d, %p): %s", 01003 event, i, code, pfm_strerror( ret ) ); 01004 return PAPI_ESYS; 01005 } 01006 first = 0; 01007 } 01008 *selector |= 1 << i; 01009 } 01010 } 01011 return PAPI_OK; 01012 } 01013 01014 int 01015 _papi_libpfm_ntv_code_to_bits_perfctr( unsigned int EventCode, 01016 hwd_register_t *newbits ) 01017 { 01018 unsigned int event, umask; 01019 01020 X86_register_t *bits = (X86_register_t *)newbits; 01021 01022 if ( is_pentium4() ) { 01023 pentium4_escr_value_t escr_value; 01024 pentium4_cccr_value_t cccr_value; 01025 unsigned int num_masks, replay_mask, unit_masks[12]; 01026 unsigned int event_mask; 01027 unsigned int tag_value, tag_enable; 01028 unsigned int i; 01029 int j, escr, cccr, pmd; 01030 01031 if ( _pfm_decode_native_event( EventCode, &event, &umask ) != PAPI_OK ) 01032 return PAPI_ENOEVNT; 01033 01034 /* for each allowed escr (1 or 2) find the allowed cccrs. 01035 for each allowed cccr find the pmd index 01036 convert to an intel counter number; or it into bits->counter */ 01037 for ( i = 0; i < MAX_ESCRS_PER_EVENT; i++ ) { 01038 bits->counter[i] = 0; 01039 escr = pentium4_events[event].allowed_escrs[i]; 01040 if ( escr < 0 ) { 01041 continue; 01042 } 01043 01044 bits->escr[i] = escr; 01045 01046 for ( j = 0; j < MAX_CCCRS_PER_ESCR; j++ ) { 01047 cccr = pentium4_escrs[escr].allowed_cccrs[j]; 01048 if ( cccr < 0 ) { 01049 continue; 01050 } 01051 01052 pmd = pentium4_cccrs[cccr].pmd; 01053 bits->counter[i] |= ( 1 << pfm2intel[pmd] ); 01054 } 01055 } 01056 01057 /* if there's only one valid escr, copy the values */ 01058 if ( escr < 0 ) { 01059 bits->escr[1] = bits->escr[0]; 01060 bits->counter[1] = bits->counter[0]; 01061 } 01062 01063 /* Calculate the event-mask value. Invalid masks 01064 * specified by the caller are ignored. */ 01065 tag_value = 0; 01066 tag_enable = 0; 01067 event_mask = _pfm_convert_umask( event, umask ); 01068 01069 if ( event_mask & 0xF0000 ) { 01070 tag_enable = 1; 01071 tag_value = ( ( event_mask & 0xF0000 ) >> EVENT_MASK_BITS ); 01072 } 01073 01074 event_mask &= 0x0FFFF; /* mask off possible tag bits */ 01075 01076 /* Set up the ESCR and CCCR register values. */ 01077 escr_value.val = 0; 01078 escr_value.bits.t1_usr = 0; /* controlled by kernel */ 01079 escr_value.bits.t1_os = 0; /* controlled by kernel */ 01080 // escr_value.bits.t0_usr = (plm & PFM_PLM3) ? 1 : 0; 01081 // escr_value.bits.t0_os = (plm & PFM_PLM0) ? 1 : 0; 01082 escr_value.bits.tag_enable = tag_enable; 01083 escr_value.bits.tag_value = tag_value; 01084 escr_value.bits.event_mask = event_mask; 01085 escr_value.bits.event_select = pentium4_events[event].event_select; 01086 escr_value.bits.reserved = 0; 01087 01088 /* initialize the proper bits in the cccr register */ 01089 cccr_value.val = 0; 01090 cccr_value.bits.reserved1 = 0; 01091 cccr_value.bits.enable = 1; 01092 cccr_value.bits.escr_select = pentium4_events[event].escr_select; 01093 cccr_value.bits.active_thread = 3; 01094 /* FIXME: This is set to count when either logical 01095 * CPU is active. Need a way to distinguish 01096 * between logical CPUs when HT is enabled. 01097 * the docs say these bits should always 01098 * be set. */ 01099 cccr_value.bits.compare = 0; 01100 /* FIXME: What do we do with "threshold" settings? */ 01101 cccr_value.bits.complement = 0; 01102 /* FIXME: What do we do with "threshold" settings? */ 01103 cccr_value.bits.threshold = 0; 01104 /* FIXME: What do we do with "threshold" settings? */ 01105 cccr_value.bits.force_ovf = 0; 01106 /* FIXME: Do we want to allow "forcing" overflow 01107 * interrupts on all counter increments? */ 01108 cccr_value.bits.ovf_pmi_t0 = 0; 01109 cccr_value.bits.ovf_pmi_t1 = 0; 01110 /* PMI taken care of by kernel typically */ 01111 cccr_value.bits.reserved2 = 0; 01112 cccr_value.bits.cascade = 0; 01113 /* FIXME: How do we handle "cascading" counters? */ 01114 cccr_value.bits.overflow = 0; 01115 01116 /* these flags are always zero, from what I can tell... */ 01117 bits->pebs_enable = 0; /* flag for PEBS counting */ 01118 bits->pebs_matrix_vert = 0; 01119 /* flag for PEBS_MATRIX_VERT, whatever that is */ 01120 01121 /* ...unless the event is replay_event */ 01122 if ( !strcmp( pentium4_events[event].name, "replay_event" ) ) { 01123 escr_value.bits.event_mask = event_mask & P4_REPLAY_REAL_MASK; 01124 num_masks = prepare_umask( umask, unit_masks ); 01125 for ( i = 0; i < num_masks; i++ ) { 01126 replay_mask = unit_masks[i]; 01127 if ( replay_mask > 1 && replay_mask < 11 ) { 01128 /* process each valid mask we find */ 01129 bits->pebs_enable |= p4_replay_regs[replay_mask].enb; 01130 bits->pebs_matrix_vert |= p4_replay_regs[replay_mask].mat_vert; 01131 } 01132 } 01133 } 01134 01135 /* store the escr and cccr values */ 01136 bits->event = escr_value.val; 01137 bits->cccr = cccr_value.val; 01138 bits->ireset = 0; /* I don't really know what this does */ 01139 SUBDBG( "escr: 0x%lx; cccr: 0x%lx\n", escr_value.val, cccr_value.val ); 01140 } else { 01141 01142 int ret, code; 01143 01144 if ( _pfm_decode_native_event( EventCode, &event, &umask ) != PAPI_OK ) 01145 return PAPI_ENOEVNT; 01146 01147 if ( ( ret = _pfm_get_counter_info( event, &bits->selector, 01148 &code ) ) != PAPI_OK ) 01149 return ret; 01150 01151 bits->counter_cmd=(int) (code | ((_pfm_convert_umask(event,umask))<< 8) ); 01152 01153 SUBDBG( "selector: %#x\n", bits->selector ); 01154 SUBDBG( "event: %#x; umask: %#x; code: %#x; cmd: %#x\n", event, 01155 umask, code, ( ( hwd_register_t * ) bits )->counter_cmd ); 01156 } 01157 01158 return PAPI_OK; 01159 } 01160 01161 01162 01163 papi_vector_t _perfctr_vector = { 01164 .cmp_info = { 01165 /* default component information (unspecified values are initialized to 0) */ 01166 .name = "perfctr", 01167 .description = "Linux perfctr CPU counters", 01168 .default_domain = PAPI_DOM_USER, 01169 .available_domains = PAPI_DOM_USER | PAPI_DOM_KERNEL, 01170 .default_granularity = PAPI_GRN_THR, 01171 .available_granularities = PAPI_GRN_THR, 01172 .hardware_intr_sig = PAPI_INT_SIGNAL, 01173 01174 /* component specific cmp_info initializations */ 01175 .fast_real_timer = 1, 01176 .fast_virtual_timer = 1, 01177 .attach = 1, 01178 .attach_must_ptrace = 1, 01179 .cntr_umasks = 1, 01180 } 01181 , 01182 01183 /* sizes of framework-opaque component-private structures */ 01184 .size = { 01185 .context = sizeof ( X86_perfctr_context_t ), 01186 .control_state = sizeof ( X86_perfctr_control_t ), 01187 .reg_value = sizeof ( X86_register_t ), 01188 .reg_alloc = sizeof ( X86_reg_alloc_t ), 01189 } 01190 , 01191 01192 /* function pointers in this component */ 01193 .init_control_state = _x86_init_control_state, 01194 .start = _x86_start, 01195 .stop = _x86_stop, 01196 .read = _x86_read, 01197 .allocate_registers = _x86_allocate_registers, 01198 .update_control_state = _x86_update_control_state, 01199 .set_domain = _x86_set_domain, 01200 .reset = _x86_reset, 01201 .set_overflow = _x86_set_overflow, 01202 .stop_profiling = _x86_stop_profiling, 01203 01204 .init_component = _perfctr_init_component, 01205 .ctl = _perfctr_ctl, 01206 .dispatch_timer = _perfctr_dispatch_timer, 01207 .init_thread = _perfctr_init_thread, 01208 .shutdown_thread = _perfctr_shutdown_thread, 01209 01210 /* from libpfm */ 01211 .ntv_enum_events = _papi_libpfm_ntv_enum_events, 01212 .ntv_name_to_code = _papi_libpfm_ntv_name_to_code, 01213 .ntv_code_to_name = _papi_libpfm_ntv_code_to_name, 01214 .ntv_code_to_descr = _papi_libpfm_ntv_code_to_descr, 01215 .ntv_code_to_bits = _papi_libpfm_ntv_code_to_bits_perfctr, 01216 01217 }; 01218 01219