|
PAPI
5.0.1.0
|
00001 /* 00002 * File: perfmon.c 00003 * Author: Philip Mucci 00004 * mucci@cs.utk.edu 00005 * Mods: Brian Sheely 00006 * bsheely@eecs.utk.edu 00007 */ 00008 00009 /* TODO LIST: 00010 - Events for all platforms 00011 - Derived events for all platforms 00012 - Latency profiling 00013 - BTB/IPIEAR sampling 00014 - Test on ITA2, Pentium 4 00015 - hwd_ntv_code_to_name 00016 - Make native map carry major events, not umasks 00017 - Enum event uses native_map not pfm() 00018 - Hook up globals to be freed to sub_info 00019 - Better feature bit support for IEAR 00020 */ 00021 00022 #include "papi.h" 00023 #include "papi_internal.h" 00024 #include "papi_vector.h" 00025 #include "papi_memory.h" 00026 #include "papi_libpfm_events.h" 00027 #include "extras.h" 00028 00029 #include "perfmon.h" 00030 00031 #include "linux-memory.h" 00032 #include "linux-timer.h" 00033 #include "linux-common.h" 00034 00035 #ifdef __ia64__ 00036 #include "perfmon/pfmlib_itanium2.h" 00037 #include "perfmon/pfmlib_montecito.h" 00038 #endif 00039 00040 typedef unsigned uint; 00041 00042 /* Advance declarations */ 00043 static int _papi_pfm_set_overflow( EventSetInfo_t * ESI, int EventIndex, 00044 int threshold ); 00045 papi_vector_t _papi_pfm_vector; 00046 00047 00048 /* Static locals */ 00049 00050 static int _perfmon2_pfm_pmu_type = -1; 00051 static pfmlib_regmask_t _perfmon2_pfm_unavailable_pmcs; 00052 static pfmlib_regmask_t _perfmon2_pfm_unavailable_pmds; 00053 00054 /* Debug functions */ 00055 00056 #ifdef DEBUG 00057 static void 00058 dump_smpl_arg( pfm_dfl_smpl_arg_t * arg ) 00059 { 00060 SUBDBG( "SMPL_ARG.buf_size = %llu\n", 00061 ( unsigned long long ) arg->buf_size ); 00062 SUBDBG( "SMPL_ARG.buf_flags = %d\n", arg->buf_flags ); 00063 } 00064 00065 static void 00066 dump_sets( pfarg_setdesc_t * set, int num_sets ) 00067 { 00068 int i; 00069 00070 for ( i = 0; i < num_sets; i++ ) { 00071 SUBDBG( "SET[%d]\n", i ); 00072 SUBDBG( "SET[%d].set_id = %d\n", i, set[i].set_id ); 00073 // SUBDBG("SET[%d].set_id_next = %d\n",i,set[i].set_id_next); 00074 SUBDBG( "SET[%d].set_flags = %d\n", i, set[i].set_flags ); 00075 SUBDBG( "SET[%d].set_timeout = %llu\n", i, 00076 ( unsigned long long ) set[i].set_timeout ); 00077 // SUBDBG("SET[%d].set_mmap_offset = 0x%016llx\n",i,(unsigned long long)set[i].set_mmap_offset); 00078 } 00079 } 00080 00081 static void 00082 dump_setinfo( pfarg_setinfo_t * setinfo, int num_sets ) 00083 { 00084 int i; 00085 00086 for ( i = 0; i < num_sets; i++ ) { 00087 SUBDBG( "SETINFO[%d]\n", i ); 00088 SUBDBG( "SETINFO[%d].set_id = %d\n", i, setinfo[i].set_id ); 00089 // SUBDBG("SETINFO[%d].set_id_next = %d\n",i,setinfo[i].set_id_next); 00090 SUBDBG( "SETINFO[%d].set_flags = %d\n", i, setinfo[i].set_flags ); 00091 SUBDBG( "SETINFO[%d].set_ovfl_pmds[0] = 0x%016llx\n", i, 00092 ( unsigned long long ) setinfo[i].set_ovfl_pmds[0] ); 00093 SUBDBG( "SETINFO[%d].set_runs = %llu\n", i, 00094 ( unsigned long long ) setinfo[i].set_runs ); 00095 SUBDBG( "SETINFO[%d].set_timeout = %llu\n", i, 00096 ( unsigned long long ) setinfo[i].set_timeout ); 00097 SUBDBG( "SETINFO[%d].set_act_duration = %llu\n", i, 00098 ( unsigned long long ) setinfo[i].set_act_duration ); 00099 // SUBDBG("SETINFO[%d].set_mmap_offset = 0x%016llx\n",i,(unsigned long long)setinfo[i].set_mmap_offset); 00100 SUBDBG( "SETINFO[%d].set_avail_pmcs[0] = 0x%016llx\n", i, 00101 ( unsigned long long ) setinfo[i].set_avail_pmcs[0] ); 00102 SUBDBG( "SETINFO[%d].set_avail_pmds[0] = 0x%016llx\n", i, 00103 ( unsigned long long ) setinfo[i].set_avail_pmds[0] ); 00104 } 00105 } 00106 00107 static void 00108 dump_pmc( pfm_control_state_t * ctl ) 00109 { 00110 unsigned int i; 00111 pfarg_pmc_t *pc = ctl->pc; 00112 00113 for ( i = 0; i < ctl->out.pfp_pmc_count; i++ ) { 00114 SUBDBG( "PC[%d]\n", i ); 00115 SUBDBG( "PC[%d].reg_num = %d\n", i, pc[i].reg_num ); 00116 SUBDBG( "PC[%d].reg_set = %d\n", i, pc[i].reg_set ); 00117 SUBDBG( "PC[%d].reg_flags = 0x%08x\n", i, pc[i].reg_flags ); 00118 SUBDBG( "PC[%d].reg_value = 0x%016llx\n", i, 00119 ( unsigned long long ) pc[i].reg_value ); 00120 } 00121 } 00122 00123 static void 00124 dump_pmd( pfm_control_state_t * ctl ) 00125 { 00126 unsigned int i; 00127 pfarg_pmd_t *pd = ctl->pd; 00128 00129 for ( i = 0; i < ctl->in.pfp_event_count; i++ ) { 00130 SUBDBG( "PD[%d]\n", i ); 00131 SUBDBG( "PD[%d].reg_num = %d\n", i, pd[i].reg_num ); 00132 SUBDBG( "PD[%d].reg_set = %d\n", i, pd[i].reg_set ); 00133 SUBDBG( "PD[%d].reg_flags = 0x%08x\n", i, pd[i].reg_flags ); 00134 SUBDBG( "PD[%d].reg_value = 0x%016llx\n", i, 00135 ( unsigned long long ) pd[i].reg_value ); 00136 SUBDBG( "PD[%d].reg_long_reset = %llu\n", i, 00137 ( unsigned long long ) pd[i].reg_long_reset ); 00138 SUBDBG( "PD[%d].reg_short_reset = %llu\n", i, 00139 ( unsigned long long ) pd[i].reg_short_reset ); 00140 SUBDBG( "PD[%d].reg_last_reset_val = %llu\n", i, 00141 ( unsigned long long ) pd[i].reg_last_reset_val ); 00142 SUBDBG( "PD[%d].reg_ovfl_switch_cnt = %llu\n", i, 00143 ( unsigned long long ) pd[i].reg_ovfl_switch_cnt ); 00144 SUBDBG( "PD[%d].reg_reset_pmds[0] = 0x%016llx\n", i, 00145 ( unsigned long long ) pd[i].reg_reset_pmds[0] ); 00146 SUBDBG( "PD[%d].reg_smpl_pmds[0] = 0x%016llx\n", i, 00147 ( unsigned long long ) pd[i].reg_smpl_pmds[0] ); 00148 SUBDBG( "PD[%d].reg_smpl_eventid = %llu\n", i, 00149 ( unsigned long long ) pd[i].reg_smpl_eventid ); 00150 SUBDBG( "PD[%d].reg_random_mask = %llu\n", i, 00151 ( unsigned long long ) pd[i].reg_random_mask ); 00152 SUBDBG( "PD[%d].reg_random_seed = %d\n", i, pd[i].reg_random_seed ); 00153 } 00154 } 00155 00156 static void 00157 dump_smpl_hdr( pfm_dfl_smpl_hdr_t * hdr ) 00158 { 00159 SUBDBG( "SMPL_HDR.hdr_count = %llu\n", 00160 ( unsigned long long ) hdr->hdr_count ); 00161 SUBDBG( "SMPL_HDR.hdr_cur_offs = %llu\n", 00162 ( unsigned long long ) hdr->hdr_cur_offs ); 00163 SUBDBG( "SMPL_HDR.hdr_overflows = %llu\n", 00164 ( unsigned long long ) hdr->hdr_overflows ); 00165 SUBDBG( "SMPL_HDR.hdr_buf_size = %llu\n", 00166 ( unsigned long long ) hdr->hdr_buf_size ); 00167 SUBDBG( "SMPL_HDR.hdr_min_buf_space = %llu\n", 00168 ( unsigned long long ) hdr->hdr_min_buf_space ); 00169 SUBDBG( "SMPL_HDR.hdr_version = %d\n", hdr->hdr_version ); 00170 SUBDBG( "SMPL_HDR.hdr_buf_flags = %d\n", hdr->hdr_buf_flags ); 00171 } 00172 00173 static void 00174 dump_smpl( pfm_dfl_smpl_entry_t * entry ) 00175 { 00176 SUBDBG( "SMPL.pid = %d\n", entry->pid ); 00177 SUBDBG( "SMPL.ovfl_pmd = %d\n", entry->ovfl_pmd ); 00178 SUBDBG( "SMPL.last_reset_val = %llu\n", 00179 ( unsigned long long ) entry->last_reset_val ); 00180 SUBDBG( "SMPL.ip = 0x%llx\n", ( unsigned long long ) entry->ip ); 00181 SUBDBG( "SMPL.tstamp = %llu\n", ( unsigned long long ) entry->tstamp ); 00182 SUBDBG( "SMPL.cpu = %d\n", entry->cpu ); 00183 SUBDBG( "SMPL.set = %d\n", entry->set ); 00184 SUBDBG( "SMPL.tgid = %d\n", entry->tgid ); 00185 } 00186 #endif 00187 00188 #define PFM_MAX_PMCDS 20 00189 00190 int 00191 _papi_pfm_write_pmcs( pfm_context_t * ctx, pfm_control_state_t * ctl ) 00192 { 00193 ( void ) ctx; /*unused */ 00194 unsigned int i = 0; 00195 int ret; 00196 00197 SUBDBG( "PFM_WRITE_PMCS(%d,%p,%d)\n", ctl->ctx_fd, ctl->pc, 00198 ctl->out.pfp_pmc_count ); 00199 if ( ctl->out.pfp_pmc_count > PFM_MAX_PMCDS ) { 00200 for ( i = 0; i < ctl->out.pfp_pmc_count - PFM_MAX_PMCDS; 00201 i += PFM_MAX_PMCDS ) { 00202 if ( ( ret = 00203 pfm_write_pmcs( ctl->ctx_fd, ctl->pc + i, 00204 PFM_MAX_PMCDS ) ) ) { 00205 DEBUGCALL( DEBUG_SUBSTRATE, dump_pmc( ctl ) ); 00206 PAPIERROR( "pfm_write_pmcs(%d,%p,%d): %s", ctl->ctx_fd, ctl->pc, 00207 ctl->out.pfp_pmc_count, strerror( ret ) ); 00208 return ( PAPI_ESYS ); 00209 } 00210 } 00211 DEBUGCALL( DEBUG_SUBSTRATE, dump_pmc( ctl ) ); 00212 } 00213 if ( ( ret = 00214 pfm_write_pmcs( ctl->ctx_fd, ctl->pc + i, 00215 ctl->out.pfp_pmc_count - i ) ) ) { 00216 DEBUGCALL( DEBUG_SUBSTRATE, dump_pmc( ctl ) ); 00217 PAPIERROR( "pfm_write_pmcs(%d,%p,%d): %s", ctl->ctx_fd, ctl->pc, 00218 ctl->out.pfp_pmc_count, strerror( ret ) ); 00219 return ( PAPI_ESYS ); 00220 } 00221 DEBUGCALL( DEBUG_SUBSTRATE, dump_pmc( ctl ) ); 00222 00223 return PAPI_OK; 00224 } 00225 00226 int 00227 _papi_pfm_write_pmds( pfm_context_t * ctx, pfm_control_state_t * ctl ) 00228 { 00229 ( void ) ctx; /*unused */ 00230 unsigned int i = 0; 00231 int ret; 00232 00233 SUBDBG( "PFM_WRITE_PMDS(%d,%p,%d)\n", ctl->ctx_fd, ctl->pd, 00234 ctl->in.pfp_event_count ); 00235 if ( ctl->in.pfp_event_count > PFM_MAX_PMCDS ) { 00236 for ( i = 0; i < ctl->in.pfp_event_count - PFM_MAX_PMCDS; 00237 i += PFM_MAX_PMCDS ) { 00238 if ( ( ret = 00239 pfm_write_pmds( ctl->ctx_fd, ctl->pd + i, 00240 PFM_MAX_PMCDS ) ) ) { 00241 DEBUGCALL( DEBUG_SUBSTRATE, dump_pmd( ctl ) ); 00242 PAPIERROR( "pfm_write_pmds(%d,%p,%d): errno=%d %s", ctl->ctx_fd, 00243 ctl->pd, ctl->in.pfp_event_count, errno, 00244 strerror( ret ) ); 00245 perror( "pfm_write_pmds" ); 00246 return ( PAPI_ESYS ); 00247 } 00248 } 00249 DEBUGCALL( DEBUG_SUBSTRATE, dump_pmd( ctl ) ); 00250 } 00251 if ( ( ret = 00252 pfm_write_pmds( ctl->ctx_fd, ctl->pd + i, 00253 ctl->in.pfp_event_count - i ) ) ) { 00254 DEBUGCALL( DEBUG_SUBSTRATE, dump_pmd( ctl ) ); 00255 PAPIERROR( "pfm_write_pmds(%d,%p,%d): errno=%d %s", ctl->ctx_fd, 00256 ctl->pd, ctl->in.pfp_event_count, errno, strerror( ret ) ); 00257 perror( "pfm_write_pmds" ); 00258 return ( PAPI_ESYS ); 00259 } 00260 DEBUGCALL( DEBUG_SUBSTRATE, dump_pmd( ctl ) ); 00261 00262 return PAPI_OK; 00263 } 00264 00265 int 00266 _papi_pfm_read_pmds( pfm_context_t * ctx, pfm_control_state_t * ctl ) 00267 { 00268 ( void ) ctx; /*unused */ 00269 unsigned int i = 0; 00270 int ret; 00271 00272 SUBDBG( "PFM_READ_PMDS(%d,%p,%d)\n", ctl->ctx_fd, ctl->pd, 00273 ctl->in.pfp_event_count ); 00274 if ( ctl->in.pfp_event_count > PFM_MAX_PMCDS ) { 00275 for ( i = 0; i < ctl->in.pfp_event_count - PFM_MAX_PMCDS; 00276 i += PFM_MAX_PMCDS ) { 00277 if ( ( ret = 00278 pfm_read_pmds( ctl->ctx_fd, ctl->pd + i, 00279 PFM_MAX_PMCDS ) ) ) { 00280 DEBUGCALL( DEBUG_SUBSTRATE, dump_pmd( ctl ) ); 00281 PAPIERROR( "pfm_read_pmds(%d,%p,%d): %s", ctl->ctx_fd, ctl->pd, 00282 ctl->in.pfp_event_count, strerror( ret ) ); 00283 return ( ( errno == EBADF ) ? PAPI_ECLOST : PAPI_ESYS ); 00284 } 00285 } 00286 DEBUGCALL( DEBUG_SUBSTRATE, dump_pmd( ctl ) ); 00287 } 00288 if ( ( ret = 00289 pfm_read_pmds( ctl->ctx_fd, ctl->pd + i, 00290 ctl->in.pfp_event_count - i ) ) ) { 00291 DEBUGCALL( DEBUG_SUBSTRATE, dump_pmd( ctl ) ); 00292 PAPIERROR( "pfm_read_pmds(%d,%p,%d): %s", ctl->ctx_fd, ctl->pd, 00293 ctl->in.pfp_event_count, strerror( ret ) ); 00294 return ( ( errno == EBADF ) ? PAPI_ECLOST : PAPI_ESYS ); 00295 } 00296 DEBUGCALL( DEBUG_SUBSTRATE, dump_pmd( ctl ) ); 00297 00298 return PAPI_OK; 00299 } 00300 00301 00302 /* This routine effectively does argument checking as the real magic will happen 00303 in compute_kernel_args. This just gets the value back from the kernel. */ 00304 00305 static int 00306 check_multiplex_timeout( int ctx_fd, unsigned long *timeout_ns ) 00307 { 00308 int ret; 00309 pfarg_setdesc_t set[2]; 00310 00311 memset( set, 0, sizeof ( pfarg_setdesc_t ) * 2 ); 00312 set[1].set_id = 1; 00313 set[1].set_flags = PFM_SETFL_TIME_SWITCH; 00314 set[1].set_timeout = *timeout_ns; 00315 SUBDBG( "Multiplexing interval requested is %llu ns.\n", 00316 ( unsigned long long ) set[1].set_timeout ); 00317 00318 /* Create a test eventset */ 00319 00320 SUBDBG( "PFM_CREATE_EVTSETS(%d,%p,1)\n", ctx_fd, &set[1] ); 00321 if ( ( ret = pfm_create_evtsets( ctx_fd, &set[1], 1 ) ) != PFMLIB_SUCCESS ) { 00322 DEBUGCALL( DEBUG_SUBSTRATE, dump_sets( &set[1], 1 ) ); 00323 PAPIERROR( "pfm_create_evtsets(%d,%p,%d): %s", ctx_fd, &set[1], 1, 00324 strerror( ret ) ); 00325 return ( PAPI_ESYS ); 00326 } 00327 00328 SUBDBG( "Multiplexing interval returned is %llu ns.\n", 00329 ( unsigned long long ) set[1].set_timeout ); 00330 *timeout_ns = set[1].set_timeout; 00331 00332 /* Delete the second eventset */ 00333 00334 pfm_delete_evtsets( ctx_fd, &set[1], 1 ); 00335 00336 return ( PAPI_OK ); 00337 } 00338 00339 /* The below function is stolen from libpfm from Stephane Eranian */ 00340 static int 00341 detect_timeout_and_unavail_pmu_regs( pfmlib_regmask_t * r_pmcs, 00342 pfmlib_regmask_t * r_pmds, 00343 unsigned long *timeout_ns ) 00344 { 00345 pfarg_ctx_t ctx; 00346 pfarg_setinfo_t setf; 00347 unsigned int i; 00348 int ret, j, myfd; 00349 00350 memset( r_pmcs, 0, sizeof ( *r_pmcs ) ); 00351 memset( r_pmds, 0, sizeof ( *r_pmds ) ); 00352 00353 memset( &ctx, 0, sizeof ( ctx ) ); 00354 memset( &setf, 0, sizeof ( setf ) ); 00355 /* 00356 * if no context descriptor is passed, then create 00357 * a temporary context 00358 */ 00359 SUBDBG( "PFM_CREATE_CONTEXT(%p,%p,%p,%d)\n", &ctx, NULL, NULL, 0 ); 00360 myfd = pfm_create_context( &ctx, NULL, NULL, 0 ); 00361 if ( myfd == -1 ) { 00362 PAPIERROR( "detect_unavail_pmu_regs:pfm_create_context(): %s", 00363 strerror( errno ) ); 00364 return ( PAPI_ESYS ); 00365 } 00366 SUBDBG( "PFM_CREATE_CONTEXT returned fd %d\n", myfd ); 00367 /* 00368 * retrieve available register bitmasks from set0 00369 * which is guaranteed to exist for every context 00370 */ 00371 ret = pfm_getinfo_evtsets( myfd, &setf, 1 ); 00372 if ( ret != PFMLIB_SUCCESS ) { 00373 PAPIERROR( "pfm_getinfo_evtsets(): %s", strerror( ret ) ); 00374 return ( PAPI_ESYS ); 00375 } 00376 DEBUGCALL( DEBUG_SUBSTRATE, dump_setinfo( &setf, 1 ) ); 00377 if ( r_pmcs ) 00378 for ( i = 0; i < PFM_PMC_BV; i++ ) { 00379 for ( j = 0; j < 64; j++ ) { 00380 if ( ( setf.set_avail_pmcs[i] & ( 1ULL << j ) ) == 0 ) 00381 pfm_regmask_set( r_pmcs, ( i << 6 ) + j ); 00382 } 00383 } 00384 if ( r_pmds ) 00385 for ( i = 0; i < PFM_PMD_BV; i++ ) { 00386 for ( j = 0; j < 64; j++ ) { 00387 if ( ( setf.set_avail_pmds[i] & ( 1ULL << j ) ) == 0 ) 00388 pfm_regmask_set( r_pmds, ( i << 6 ) + j ); 00389 } 00390 } 00391 check_multiplex_timeout( myfd, timeout_ns ); 00392 i = close( myfd ); 00393 SUBDBG( "CLOSE fd %d returned %d\n", myfd, i ); 00394 return PAPI_OK; 00395 } 00396 00397 /* BEGIN COMMON CODE */ 00398 00399 static inline int 00400 compute_kernel_args( hwd_control_state_t * ctl0 ) 00401 { 00402 pfm_control_state_t *ctl = ( pfm_control_state_t * ) ctl0; 00403 pfmlib_input_param_t *inp = &ctl->in; 00404 pfmlib_output_param_t *outp = &ctl->out; 00405 pfmlib_input_param_t tmpin; 00406 pfmlib_output_param_t tmpout; 00407 #if 0 00408 /* This will be used to fixup the overflow and sample args after re-allocation */ 00409 pfarg_pmd_t oldpd; 00410 #endif 00411 pfarg_pmd_t *pd = ctl->pd; 00412 pfarg_pmc_t *pc = ctl->pc; 00413 pfarg_setdesc_t *sets = ctl->set; 00414 pfarg_setinfo_t *setinfos = ctl->setinfo; 00415 int *num_sets = &ctl->num_sets; 00416 unsigned int set = 0; 00417 int donepc = 0, donepd = 0, ret, j; 00418 unsigned int i, dispatch_count = inp->pfp_event_count; 00419 int togo = inp->pfp_event_count, done = 0; 00420 00421 /* Save old PD array so we can reconstruct certain flags. */ 00422 /* This can be removed when we have higher level code call */ 00423 /* set_profile,set_overflow etc when there is hardware */ 00424 /* (component) support, but this change won't happen for PAPI 3.5 */ 00425 00426 SUBDBG 00427 ( "entry multiplexed %d, pfp_event_count %d, num_cntrs %d, num_sets %d\n", 00428 ctl->multiplexed, inp->pfp_event_count, _papi_pfm_vector.cmp_info.num_cntrs, 00429 *num_sets ); 00430 if ( ( ctl->multiplexed ) && 00431 ( inp->pfp_event_count > 00432 ( unsigned int ) _papi_pfm_vector.cmp_info.num_cntrs ) ) { 00433 dispatch_count = _papi_pfm_vector.cmp_info.num_cntrs; 00434 } 00435 00436 while ( togo ) { 00437 again: 00438 memset( &tmpin, 0x0, sizeof ( tmpin ) ); 00439 memset( &tmpout, 0x0, sizeof ( tmpout ) ); 00440 00441 SUBDBG( "togo %d, done %d, dispatch_count %d, num_cntrs %d\n", togo, 00442 done, dispatch_count, _papi_pfm_vector.cmp_info.num_cntrs ); 00443 tmpin.pfp_event_count = dispatch_count; 00444 tmpin.pfp_dfl_plm = inp->pfp_dfl_plm; 00445 00446 /* Make sure we tell dispatch that these PMC's are not available */ 00447 memcpy( &tmpin.pfp_unavail_pmcs, &_perfmon2_pfm_unavailable_pmcs, 00448 sizeof ( _perfmon2_pfm_unavailable_pmcs ) ); 00449 00450 for ( i = 0, j = done; i < dispatch_count; i++, j++ ) { 00451 memcpy( tmpin.pfp_events + i, inp->pfp_events + j, 00452 sizeof ( pfmlib_event_t ) ); 00453 } 00454 00455 if ( ( ret = 00456 pfm_dispatch_events( &tmpin, NULL, &tmpout, 00457 NULL ) ) != PFMLIB_SUCCESS ) { 00458 if ( ctl->multiplexed ) { 00459 dispatch_count--; 00460 if ( dispatch_count == 0 ) { 00461 PAPIERROR( "pfm_dispatch_events(): %s", 00462 pfm_strerror( ret ) ); 00463 return ( _papi_libpfm_error( ret ) ); 00464 } 00465 SUBDBG 00466 ( "Dispatch failed because of counter conflict, trying again with %d counters.\n", 00467 dispatch_count ); 00468 goto again; 00469 } 00470 PAPIERROR( "pfm_dispatch_events(): %s", pfm_strerror( ret ) ); 00471 return ( _papi_libpfm_error( ret ) ); 00472 } 00473 00474 /* 00475 * Now prepare the argument to initialize the PMDs and PMCS. 00476 * We must pfp_pmc_count to determine the number of PMC to intialize. 00477 * We must use pfp_event_count to determine the number of PMD to initialize. 00478 * Some events causes extra PMCs to be used, so pfp_pmc_count may be >= pfp_event_count. 00479 * 00480 * This step is new compared to libpfm-2.x. It is necessary because the library no 00481 * longer knows about the kernel data structures. 00482 */ 00483 00484 for ( i = 0; i < tmpout.pfp_pmc_count; i++, donepc++ ) { 00485 pc[donepc].reg_num = tmpout.pfp_pmcs[i].reg_num; 00486 pc[donepc].reg_value = tmpout.pfp_pmcs[i].reg_value; 00487 pc[donepc].reg_set = set; 00488 SUBDBG( "PC%d (i%d) is reg num %d, value %llx, set %d\n", donepc, i, 00489 pc[donepc].reg_num, 00490 ( unsigned long long ) pc[donepc].reg_value, 00491 pc[donepc].reg_set ); 00492 } 00493 00494 /* figure out pmd mapping from output pmc */ 00495 00496 #if defined(HAVE_PFM_REG_EVT_IDX) 00497 for ( i = 0, j = 0; i < tmpin.pfp_event_count; i++, donepd++ ) { 00498 pd[donepd].reg_num = tmpout.pfp_pmcs[j].reg_pmd_num; 00499 pd[donepd].reg_set = set; 00500 SUBDBG( "PD%d (i%d,j%d) is reg num %d, set %d\n", donepd, i, j, 00501 pd[donepd].reg_num, pd[donepd].reg_set ); 00502 00503 /* Skip over entries that map to the same PMD, 00504 PIV has 2 PMCS for every PMD */ 00505 00506 for ( ; j < tmpout.pfp_pmc_count; j++ ) 00507 if ( tmpout.pfp_pmcs[j].reg_evt_idx != i ) 00508 break; 00509 } 00510 #else 00511 for ( i = 0; i < tmpout.pfp_pmd_count; i++, donepd++ ) { 00512 pd[donepd].reg_num = tmpout.pfp_pmds[i].reg_num; 00513 pd[donepd].reg_set = set; 00514 SUBDBG( "PD%d (i%d) is reg num %d, set %d\n", donepd, i, 00515 pd[donepd].reg_num, pd[donepd].reg_set ); 00516 } 00517 #endif 00518 00519 togo -= dispatch_count; 00520 done += dispatch_count; 00521 if ( togo > _papi_pfm_vector.cmp_info.num_cntrs ) 00522 dispatch_count = _papi_pfm_vector.cmp_info.num_cntrs; 00523 else 00524 dispatch_count = togo; 00525 00526 setinfos[set].set_id = set; 00527 sets[set].set_id = set; 00528 set++; 00529 } 00530 00531 *num_sets = set; 00532 outp->pfp_pmc_count = donepc; 00533 00534 if ( ctl->multiplexed && ( set > 1 ) ) { 00535 for ( i = 0; i < set; i++ ) { 00536 sets[i].set_flags = PFM_SETFL_TIME_SWITCH; 00537 sets[i].set_timeout = ctl->multiplexed; 00538 } 00539 } 00540 SUBDBG 00541 ( "exit multiplexed %d (ns switch time), pfp_pmc_count %d, num_sets %d\n", 00542 ctl->multiplexed, outp->pfp_pmc_count, *num_sets ); 00543 return ( PAPI_OK ); 00544 } 00545 00546 int 00547 tune_up_fd( int ctx_fd ) 00548 { 00549 int ret; 00550 00551 /* set close-on-exec to ensure we will be getting the PFM_END_MSG, i.e., 00552 * fd not visible to child. */ 00553 ret = fcntl( ctx_fd, F_SETFD, FD_CLOEXEC ); 00554 if ( ret == -1 ) { 00555 PAPIERROR( "cannot fcntl(FD_CLOEXEC) on %d: %s", ctx_fd, 00556 strerror( errno ) ); 00557 return ( PAPI_ESYS ); 00558 } 00559 /* setup asynchronous notification on the file descriptor */ 00560 ret = fcntl( ctx_fd, F_SETFL, fcntl( ctx_fd, F_GETFL, 0 ) | O_ASYNC ); 00561 if ( ret == -1 ) { 00562 PAPIERROR( "cannot fcntl(O_ASYNC) on %d: %s", ctx_fd, 00563 strerror( errno ) ); 00564 return ( PAPI_ESYS ); 00565 } 00566 /* get ownership of the descriptor */ 00567 ret = fcntl( ctx_fd, F_SETOWN, mygettid( ) ); 00568 if ( ret == -1 ) { 00569 PAPIERROR( "cannot fcntl(F_SETOWN) on %d: %s", ctx_fd, 00570 strerror( errno ) ); 00571 return ( PAPI_ESYS ); 00572 } 00573 /* 00574 * when you explicitely declare that you want a particular signal, 00575 * even with you use the default signal, the kernel will send more 00576 * information concerning the event to the signal handler. 00577 * 00578 * In particular, it will send the file descriptor from which the 00579 * event is originating which can be quite useful when monitoring 00580 * multiple tasks from a single thread. 00581 */ 00582 ret = fcntl( ctx_fd, F_SETSIG, _papi_pfm_vector.cmp_info.hardware_intr_sig ); 00583 if ( ret == -1 ) { 00584 PAPIERROR( "cannot fcntl(F_SETSIG,%d) on %d: %s", 00585 _papi_pfm_vector.cmp_info.hardware_intr_sig, ctx_fd, 00586 strerror( errno ) ); 00587 return ( PAPI_ESYS ); 00588 } 00589 return ( PAPI_OK ); 00590 } 00591 00592 static int 00593 attach( hwd_control_state_t * ctl, unsigned long tid ) 00594 { 00595 pfarg_ctx_t *newctx = ( pfarg_ctx_t * ) malloc( sizeof ( pfarg_ctx_t ) ); 00596 pfarg_load_t *load_args = 00597 ( pfarg_load_t * ) malloc( sizeof ( pfarg_load_t ) ); 00598 int ret; 00599 00600 if ( ( newctx == NULL ) || ( load_args == NULL ) ) 00601 return ( PAPI_ENOMEM ); 00602 memset( newctx, 0x0, sizeof ( *newctx ) ); 00603 memset( load_args, 0, sizeof ( *load_args ) ); 00604 00605 /* Make sure the process exists and is being ptraced() */ 00606 00607 ret = ptrace( PTRACE_ATTACH, tid, NULL, NULL ); 00608 if ( ret == 0 ) { 00609 ptrace( PTRACE_DETACH, tid, NULL, NULL ); 00610 PAPIERROR( "Process/thread %d is not being ptraced", tid ); 00611 free( newctx ); 00612 free( load_args ); 00613 return ( PAPI_EINVAL ); 00614 } 00615 /* If we get here, then we should hope that the process is being 00616 ptraced, if not, then we probably can't attach to it. */ 00617 00618 if ( ( ret == -1 ) && ( errno != EPERM ) ) { 00619 PAPIERROR( "Process/thread %d cannot be ptraced: %s", tid, 00620 strerror( errno ) ); 00621 free( newctx ); 00622 free( load_args ); 00623 return ( PAPI_EINVAL ); 00624 } 00625 00626 SUBDBG( "PFM_CREATE_CONTEXT(%p,%p,%p,%d)\n", newctx, NULL, NULL, 0 ); 00627 if ( ( ret = pfm_create_context( newctx, NULL, NULL, 0 ) ) == -1 ) { 00628 PAPIERROR( "attach:pfm_create_context(): %s", strerror( errno ) ); 00629 free( newctx ); 00630 free( load_args ); 00631 return ( PAPI_ESYS ); 00632 } 00633 SUBDBG( "PFM_CREATE_CONTEXT returned fd %d\n", ret ); 00634 tune_up_fd( ret ); 00635 00636 ( ( pfm_control_state_t * ) ctl )->ctx_fd = ret; 00637 ( ( pfm_control_state_t * ) ctl )->ctx = newctx; 00638 load_args->load_pid = tid; 00639 ( ( pfm_control_state_t * ) ctl )->load = load_args; 00640 00641 return ( PAPI_OK ); 00642 } 00643 00644 static int 00645 detach( hwd_context_t * ctx, hwd_control_state_t * ctl ) 00646 { 00647 int i; 00648 00649 i = close( ( ( pfm_control_state_t * ) ctl )->ctx_fd ); 00650 SUBDBG( "CLOSE fd %d returned %d\n", 00651 ( ( pfm_control_state_t * ) ctl )->ctx_fd, i ); 00652 (void) i; 00653 00654 /* Restore to main threads context */ 00655 free( ( ( pfm_control_state_t * ) ctl )->ctx ); 00656 ( ( pfm_control_state_t * ) ctl )->ctx = &( ( pfm_context_t * ) ctx )->ctx; 00657 ( ( pfm_control_state_t * ) ctl )->ctx_fd = 00658 ( ( pfm_context_t * ) ctx )->ctx_fd; 00659 free( ( ( pfm_control_state_t * ) ctl )->load ); 00660 ( ( pfm_control_state_t * ) ctl )->load = 00661 &( ( pfm_context_t * ) ctx )->load; 00662 00663 return ( PAPI_OK ); 00664 } 00665 00666 static inline int 00667 set_domain( hwd_control_state_t * ctl0, int domain ) 00668 { 00669 pfm_control_state_t *ctl = ( pfm_control_state_t * ) ctl0; 00670 int mode = 0, did = 0; 00671 pfmlib_input_param_t *inp = &ctl->in; 00672 00673 if ( domain & PAPI_DOM_USER ) { 00674 did = 1; 00675 mode |= PFM_PLM3; 00676 } 00677 00678 if ( domain & PAPI_DOM_KERNEL ) { 00679 did = 1; 00680 mode |= PFM_PLM0; 00681 } 00682 00683 if ( domain & PAPI_DOM_SUPERVISOR ) { 00684 did = 1; 00685 mode |= PFM_PLM1; 00686 } 00687 00688 if ( domain & PAPI_DOM_OTHER ) { 00689 did = 1; 00690 mode |= PFM_PLM2; 00691 } 00692 00693 if ( !did ) 00694 return ( PAPI_EINVAL ); 00695 00696 inp->pfp_dfl_plm = mode; 00697 00698 return ( compute_kernel_args( ctl ) ); 00699 } 00700 00701 static inline int 00702 set_granularity( hwd_control_state_t * this_state, int domain ) 00703 { 00704 ( void ) this_state; /*unused */ 00705 switch ( domain ) { 00706 case PAPI_GRN_PROCG: 00707 case PAPI_GRN_SYS: 00708 case PAPI_GRN_SYS_CPU: 00709 case PAPI_GRN_PROC: 00710 return PAPI_ECMP; 00711 case PAPI_GRN_THR: 00712 break; 00713 default: 00714 return PAPI_EINVAL; 00715 } 00716 return PAPI_OK; 00717 } 00718 00719 /* This function should tell your kernel extension that your children 00720 inherit performance register information and propagate the values up 00721 upon child exit and parent wait. */ 00722 00723 static inline int 00724 set_inherit( int arg ) 00725 { 00726 ( void ) arg; /*unused */ 00727 return PAPI_ECMP; 00728 } 00729 00730 static int 00731 get_string_from_file( char *file, char *str, int len ) 00732 { 00733 FILE *f = fopen( file, "r" ); 00734 char buf[PAPI_HUGE_STR_LEN]; 00735 if ( f == NULL ) { 00736 PAPIERROR( "fopen(%s): %s", file, strerror( errno ) ); 00737 return ( PAPI_ESYS ); 00738 } 00739 if ( fscanf( f, "%s\n", buf ) != 1 ) { 00740 PAPIERROR( "fscanf(%s, %%s\\n): Unable to scan 1 token", file ); 00741 fclose( f ); 00742 return PAPI_ESYS; 00743 } 00744 strncpy( str, buf, ( len > PAPI_HUGE_STR_LEN ? PAPI_HUGE_STR_LEN : len ) ); 00745 fclose( f ); 00746 return ( PAPI_OK ); 00747 } 00748 00749 int 00750 _papi_pfm_init_component( int cidx ) 00751 { 00752 int retval; 00753 char buf[PAPI_HUGE_STR_LEN]; 00754 00755 /* The following checks the PFMLIB version 00756 against the perfmon2 kernel version... */ 00757 strncpy( _papi_pfm_vector.cmp_info.support_version, buf, 00758 sizeof ( _papi_pfm_vector.cmp_info.support_version ) ); 00759 00760 retval = get_string_from_file( "/sys/kernel/perfmon/version", 00761 _papi_pfm_vector.cmp_info.kernel_version, 00762 sizeof ( _papi_pfm_vector.cmp_info.kernel_version ) ); 00763 if ( retval != PAPI_OK ) { 00764 strncpy(_papi_pfm_vector.cmp_info.disabled_reason, 00765 "/sys/kernel/perfmon/version not found",PAPI_MAX_STR_LEN); 00766 return retval; 00767 } 00768 00769 #ifdef PFM_VERSION 00770 sprintf( buf, "%d.%d", PFM_VERSION_MAJOR( PFM_VERSION ), 00771 PFM_VERSION_MINOR( PFM_VERSION ) ); 00772 SUBDBG( "Perfmon2 library versions...kernel: %s library: %s\n", 00773 _papi_pfm_vector.cmp_info.kernel_version, buf ); 00774 if ( strcmp( _papi_pfm_vector.cmp_info.kernel_version, buf ) != 0 ) { 00775 /* do a little exception processing; 81 is compatible with 80 */ 00776 if ( !( ( PFM_VERSION_MINOR( PFM_VERSION ) == 81 ) && 00777 ( strncmp( _papi_pfm_vector.cmp_info.kernel_version, "2.8", 3 ) == 00778 0 ) ) ) { 00779 PAPIERROR( "Version mismatch of libpfm: compiled %s " 00780 "vs. installed %s\n", 00781 buf, _papi_pfm_vector.cmp_info.kernel_version ); 00782 return PAPI_ESYS; 00783 } 00784 } 00785 #endif 00786 00787 _papi_pfm_vector.cmp_info.hardware_intr_sig = SIGRTMIN + 2, 00788 00789 00790 /* Run the libpfm-specific setup */ 00791 retval=_papi_libpfm_init(&_papi_pfm_vector, cidx); 00792 if (retval) return retval; 00793 00794 /* Load the module, find out if any PMC's/PMD's are off limits */ 00795 00796 /* Perfmon2 timeouts are based on the clock tick, we need to check 00797 them otherwise it will complain at us when we multiplex */ 00798 00799 unsigned long min_timeout_ns; 00800 00801 struct timespec ts; 00802 00803 if ( syscall( __NR_clock_getres, CLOCK_REALTIME, &ts ) == -1 ) { 00804 PAPIERROR( "Could not detect proper HZ rate, multiplexing may fail\n" ); 00805 min_timeout_ns = 10000000; 00806 } else { 00807 min_timeout_ns = ts.tv_nsec; 00808 } 00809 00810 /* This will fail if we've done timeout detection wrong */ 00811 retval=detect_timeout_and_unavail_pmu_regs( &_perfmon2_pfm_unavailable_pmcs, 00812 &_perfmon2_pfm_unavailable_pmds, 00813 &min_timeout_ns ); 00814 if ( retval != PAPI_OK ) { 00815 return ( retval ); 00816 } 00817 00818 if ( _papi_hwi_system_info.hw_info.vendor == PAPI_VENDOR_IBM ) { 00819 /* powerpc */ 00820 _papi_pfm_vector.cmp_info.available_domains |= PAPI_DOM_KERNEL | 00821 PAPI_DOM_SUPERVISOR; 00822 if (strcmp(_papi_hwi_system_info.hw_info.model_string, "POWER6" ) == 0) { 00823 _papi_pfm_vector.cmp_info.default_domain = PAPI_DOM_USER | 00824 PAPI_DOM_KERNEL | 00825 PAPI_DOM_SUPERVISOR; 00826 } 00827 } else { 00828 _papi_pfm_vector.cmp_info.available_domains |= PAPI_DOM_KERNEL; 00829 } 00830 00831 if ( _papi_hwi_system_info.hw_info.vendor == PAPI_VENDOR_SUN ) { 00832 switch ( _perfmon2_pfm_pmu_type ) { 00833 #ifdef PFMLIB_SPARC_ULTRA12_PMU 00834 case PFMLIB_SPARC_ULTRA12_PMU: 00835 case PFMLIB_SPARC_ULTRA3_PMU: 00836 case PFMLIB_SPARC_ULTRA3I_PMU: 00837 case PFMLIB_SPARC_ULTRA3PLUS_PMU: 00838 case PFMLIB_SPARC_ULTRA4PLUS_PMU: 00839 break; 00840 #endif 00841 default: 00842 _papi_pfm_vector.cmp_info.available_domains |= 00843 PAPI_DOM_SUPERVISOR; 00844 break; 00845 } 00846 } 00847 00848 if ( _papi_hwi_system_info.hw_info.vendor == PAPI_VENDOR_CRAY ) { 00849 _papi_pfm_vector.cmp_info.available_domains |= PAPI_DOM_OTHER; 00850 } 00851 00852 if ( ( _papi_hwi_system_info.hw_info.vendor == PAPI_VENDOR_INTEL ) || 00853 ( _papi_hwi_system_info.hw_info.vendor == PAPI_VENDOR_AMD ) ) { 00854 _papi_pfm_vector.cmp_info.fast_counter_read = 1; 00855 _papi_pfm_vector.cmp_info.fast_real_timer = 1; 00856 _papi_pfm_vector.cmp_info.cntr_umasks = 1; 00857 } 00858 00859 return PAPI_OK; 00860 } 00861 00862 int 00863 _papi_pfm_shutdown_component( ) 00864 { 00865 return PAPI_OK; 00866 } 00867 00868 static int 00869 _papi_pfm_init_thread( hwd_context_t * thr_ctx ) 00870 { 00871 pfarg_load_t load_args; 00872 pfarg_ctx_t newctx; 00873 int ret, ctx_fd; 00874 00875 #if defined(USE_PROC_PTTIMER) 00876 ret = init_proc_thread_timer( thr_ctx ); 00877 if ( ret != PAPI_OK ) 00878 return ( ret ); 00879 #endif 00880 00881 memset( &newctx, 0, sizeof ( newctx ) ); 00882 memset( &load_args, 0, sizeof ( load_args ) ); 00883 00884 if ( ( ret = pfm_create_context( &newctx, NULL, NULL, 0 ) ) == -1 ) { 00885 PAPIERROR( "pfm_create_context(): %s", 00886 strerror( errno ) ); 00887 return ( PAPI_ESYS ); 00888 } 00889 SUBDBG( "PFM_CREATE_CONTEXT returned fd %d\n", ret ); 00890 tune_up_fd( ret ); 00891 ctx_fd = ret; 00892 00893 memcpy( &( ( pfm_context_t * ) thr_ctx )->ctx, &newctx, sizeof ( newctx ) ); 00894 ( ( pfm_context_t * ) thr_ctx )->ctx_fd = ctx_fd; 00895 load_args.load_pid = mygettid( ); 00896 memcpy( &( ( pfm_context_t * ) thr_ctx )->load, &load_args, 00897 sizeof ( load_args ) ); 00898 00899 return ( PAPI_OK ); 00900 } 00901 00902 /* reset the hardware counters */ 00903 int 00904 _papi_pfm_reset( hwd_context_t * ctx, hwd_control_state_t * ctl ) 00905 { 00906 unsigned int i; 00907 int ret; 00908 00909 /* Read could have clobbered the values */ 00910 for ( i = 0; i < ( ( pfm_control_state_t * ) ctl )->in.pfp_event_count; 00911 i++ ) { 00912 if ( ( ( pfm_control_state_t * ) ctl )->pd[i]. 00913 reg_flags & PFM_REGFL_OVFL_NOTIFY ) 00914 ( ( pfm_control_state_t * ) ctl )->pd[i].reg_value = 00915 ( ( pfm_control_state_t * ) ctl )->pd[i].reg_long_reset; 00916 else 00917 ( ( pfm_control_state_t * ) ctl )->pd[i].reg_value = 0ULL; 00918 } 00919 00920 ret = 00921 _papi_pfm_write_pmds( ( pfm_context_t * ) ctx, 00922 ( pfm_control_state_t * ) ctl ); 00923 if ( ret != PAPI_OK ) 00924 return PAPI_ESYS; 00925 00926 return ( PAPI_OK ); 00927 } 00928 00929 /* write(set) the hardware counters */ 00930 int 00931 _papi_pfm_write( hwd_context_t * ctx, hwd_control_state_t * ctl, 00932 long long *from ) 00933 { 00934 unsigned int i; 00935 int ret; 00936 00937 /* Read could have clobbered the values */ 00938 for ( i = 0; i < ( ( pfm_control_state_t * ) ctl )->in.pfp_event_count; 00939 i++ ) { 00940 if ( ( ( pfm_control_state_t * ) ctl )->pd[i]. 00941 reg_flags & PFM_REGFL_OVFL_NOTIFY ) 00942 ( ( pfm_control_state_t * ) ctl )->pd[i].reg_value = 00943 from[i] + 00944 ( ( pfm_control_state_t * ) ctl )->pd[i].reg_long_reset; 00945 else 00946 ( ( pfm_control_state_t * ) ctl )->pd[i].reg_value = from[i]; 00947 } 00948 00949 ret = 00950 _papi_pfm_write_pmds( ( pfm_context_t * ) ctx, 00951 ( pfm_control_state_t * ) ctl ); 00952 if ( ret != PAPI_OK ) 00953 return PAPI_ESYS; 00954 00955 00956 return ( PAPI_OK ); 00957 } 00958 00959 int 00960 _papi_pfm_read( hwd_context_t * ctx0, hwd_control_state_t * ctl0, 00961 long long **events, int flags ) 00962 { 00963 ( void ) flags; /*unused */ 00964 unsigned int i; 00965 int ret; 00966 long long tot_runs = 0LL; 00967 pfm_control_state_t *ctl = ( pfm_control_state_t * ) ctl0; 00968 pfm_context_t *ctx = ( pfm_context_t * ) ctx0; 00969 00970 ret = _papi_pfm_read_pmds( ctx, ctl ); 00971 if ( ret != PAPI_OK ) 00972 return PAPI_ESYS; 00973 00974 /* Copy the values over */ 00975 00976 for ( i = 0; i < ctl->in.pfp_event_count; i++ ) { 00977 if ( ctl->pd[i].reg_flags & PFM_REGFL_OVFL_NOTIFY ) 00978 ctl->counts[i] = ctl->pd[i].reg_value - ctl->pd[i].reg_long_reset; 00979 else 00980 ctl->counts[i] = ctl->pd[i].reg_value; 00981 SUBDBG( "PMD[%d] = %lld (LLD),%llu (LLU)\n", i, 00982 ( unsigned long long ) ctl->counts[i], 00983 ( unsigned long long ) ctl->pd[i].reg_value ); 00984 } 00985 *events = ctl->counts; 00986 00987 /* If we're not multiplexing, bail now */ 00988 00989 if ( ctl->num_sets == 1 ) 00990 return ( PAPI_OK ); 00991 00992 /* If we're multiplexing, get the scaling information */ 00993 00994 SUBDBG( "PFM_GETINFO_EVTSETS(%d,%p,%d)\n", ctl->ctx_fd, ctl->setinfo, 00995 ctl->num_sets ); 00996 if ( ( ret = 00997 pfm_getinfo_evtsets( ctl->ctx_fd, ctl->setinfo, ctl->num_sets ) ) ) { 00998 DEBUGCALL( DEBUG_SUBSTRATE, 00999 dump_setinfo( ctl->setinfo, ctl->num_sets ) ); 01000 PAPIERROR( "pfm_getinfo_evtsets(%d,%p,%d): %s", ctl->ctx_fd, 01001 ctl->setinfo, ctl->num_sets, strerror( ret ) ); 01002 *events = NULL; 01003 return ( PAPI_ESYS ); 01004 } 01005 DEBUGCALL( DEBUG_SUBSTRATE, dump_setinfo( ctl->setinfo, ctl->num_sets ) ); 01006 01007 /* Add up the number of total runs */ 01008 01009 for ( i = 0; i < ( unsigned int ) ctl->num_sets; i++ ) 01010 tot_runs += ctl->setinfo[i].set_runs; 01011 01012 /* Now scale the values */ 01013 01014 for ( i = 0; i < ctl->in.pfp_event_count; i++ ) { 01015 SUBDBG 01016 ( "Counter %d is in set %d ran %llu of %llu times, old count %lld.\n", 01017 i, ctl->pd[i].reg_set, 01018 ( unsigned long long ) ctl->setinfo[ctl->pd[i].reg_set].set_runs, 01019 ( unsigned long long ) tot_runs, ctl->counts[i] ); 01020 if ( ctl->setinfo[ctl->pd[i].reg_set].set_runs ) 01021 ctl->counts[i] = 01022 ( ctl->counts[i] * tot_runs ) / 01023 ctl->setinfo[ctl->pd[i].reg_set].set_runs; 01024 else { 01025 ctl->counts[i] = 0; 01026 SUBDBG( "Set %lld didn't run!!!!\n", 01027 ( unsigned long long ) ctl->pd[i].reg_set ); 01028 } 01029 SUBDBG( "Counter %d, new count %lld.\n", i, ctl->counts[i] ); 01030 } 01031 01032 return PAPI_OK; 01033 } 01034 01035 #if defined(__crayxt) 01036 int _papi_hwd_start_create_context = 0; /* CrayPat checkpoint support */ 01037 #endif /* XT */ 01038 01039 int 01040 _papi_pfm_start( hwd_context_t * ctx0, hwd_control_state_t * ctl0 ) 01041 { 01042 unsigned int i; 01043 int ret; 01044 pfm_control_state_t *ctl = ( pfm_control_state_t * ) ctl0; 01045 pfm_context_t *ctx = ( pfm_context_t * ) ctx0; 01046 01047 #if defined(__crayxt) 01048 if ( _papi_hwd_start_create_context ) { 01049 pfarg_ctx_t tmp; 01050 01051 memset( &tmp, 0, sizeof ( tmp ) ); 01052 if ( ( ret = pfm_create_context( &tmp, NULL, NULL, 0 ) ) == -1 ) { 01053 PAPIERROR( "_papi_hwd_init:pfm_create_context(): %s", 01054 strerror( errno ) ); 01055 return ( PAPI_ESYS ); 01056 } 01057 tune_up_fd( ret ); 01058 ctl->ctx_fd = ctx->ctx_fd = ret; 01059 } 01060 #endif /* XT */ 01061 01062 if ( ctl->num_sets > 1 ) { 01063 SUBDBG( "PFM_CREATE_EVTSETS(%d,%p,%d)\n", ctl->ctx_fd, ctl->set, 01064 ctl->num_sets ); 01065 if ( ( ret = 01066 pfm_create_evtsets( ctl->ctx_fd, ctl->set, 01067 ctl->num_sets ) ) != PFMLIB_SUCCESS ) { 01068 DEBUGCALL( DEBUG_SUBSTRATE, dump_sets( ctl->set, ctl->num_sets ) ); 01069 PAPIERROR( "pfm_create_evtsets(%d,%p,%d): errno=%d %s", 01070 ctl->ctx_fd, ctl->set, ctl->num_sets, errno, 01071 strerror( ret ) ); 01072 perror( "pfm_create_evtsets" ); 01073 return ( PAPI_ESYS ); 01074 } 01075 DEBUGCALL( DEBUG_SUBSTRATE, dump_sets( ctl->set, ctl->num_sets ) ); 01076 } 01077 01078 /* 01079 * Now program the registers 01080 * 01081 * We don't use the same variable to indicate the number of elements passed to 01082 * the kernel because, as we said earlier, pc may contain more elements than 01083 * the number of events (pmd) we specified, i.e., contains more than counting 01084 * monitors. 01085 */ 01086 01087 ret = _papi_pfm_write_pmcs( ctx, ctl ); 01088 if ( ret != PAPI_OK ) 01089 return PAPI_ESYS; 01090 01091 /* Set counters to zero as per PAPI_start man page, unless it is set to overflow */ 01092 01093 for ( i = 0; i < ctl->in.pfp_event_count; i++ ) 01094 if ( !( ctl->pd[i].reg_flags & PFM_REGFL_OVFL_NOTIFY ) ) 01095 ctl->pd[i].reg_value = 0ULL; 01096 01097 /* 01098 * To be read, each PMD must be either written or declared 01099 * as being part of a sample (reg_smpl_pmds) 01100 */ 01101 01102 ret = _papi_pfm_write_pmds( ctx, ctl ); 01103 if ( ret != PAPI_OK ) 01104 return PAPI_ESYS; 01105 01106 SUBDBG( "PFM_LOAD_CONTEXT(%d,%p(%u))\n", ctl->ctx_fd, ctl->load, 01107 ctl->load->load_pid ); 01108 if ( ( ret = pfm_load_context( ctl->ctx_fd, ctl->load ) ) ) { 01109 PAPIERROR( "pfm_load_context(%d,%p(%u)): %s", ctl->ctx_fd, ctl->load, 01110 ctl->load->load_pid, strerror( ret ) ); 01111 return PAPI_ESYS; 01112 } 01113 01114 SUBDBG( "PFM_START(%d,%p)\n", ctl->ctx_fd, NULL ); 01115 if ( ( ret = pfm_start( ctl->ctx_fd, NULL ) ) ) { 01116 PAPIERROR( "pfm_start(%d): %s", ctl->ctx_fd, strerror( ret ) ); 01117 return ( PAPI_ESYS ); 01118 } 01119 return PAPI_OK; 01120 } 01121 01122 int 01123 _papi_pfm_stop( hwd_context_t * ctx0, hwd_control_state_t * ctl0 ) 01124 { 01125 ( void ) ctx0; /*unused */ 01126 int ret; 01127 pfm_control_state_t *ctl = ( pfm_control_state_t * ) ctl0; 01128 // pfm_context_t *ctx = (pfm_context_t *)ctx0; 01129 01130 SUBDBG( "PFM_STOP(%d)\n", ctl->ctx_fd ); 01131 if ( ( ret = pfm_stop( ctl->ctx_fd ) ) ) { 01132 /* If this thread is attached to another thread, and that thread 01133 has exited, we can safely discard the error here. */ 01134 01135 if ( ( ret == PFMLIB_ERR_NOTSUPP ) && 01136 ( ctl->load->load_pid != ( unsigned int ) mygettid( ) ) ) 01137 return ( PAPI_OK ); 01138 01139 PAPIERROR( "pfm_stop(%d): %s", ctl->ctx_fd, strerror( ret ) ); 01140 return ( PAPI_ESYS ); 01141 } 01142 01143 SUBDBG( "PFM_UNLOAD_CONTEXT(%d) (tid %u)\n", ctl->ctx_fd, 01144 ctl->load->load_pid ); 01145 if ( ( ret = pfm_unload_context( ctl->ctx_fd ) ) ) { 01146 PAPIERROR( "pfm_unload_context(%d): %s", ctl->ctx_fd, strerror( ret ) ); 01147 return PAPI_ESYS; 01148 } 01149 01150 if ( ctl->num_sets > 1 ) { 01151 static pfarg_setdesc_t set = { 0, 0, 0, 0, {0, 0, 0, 0, 0, 0} }; 01152 /* Delete the high sets */ 01153 SUBDBG( "PFM_DELETE_EVTSETS(%d,%p,%d)\n", ctl->ctx_fd, &ctl->set[1], 01154 ctl->num_sets - 1 ); 01155 if ( ( ret = 01156 pfm_delete_evtsets( ctl->ctx_fd, &ctl->set[1], 01157 ctl->num_sets - 1 ) ) != PFMLIB_SUCCESS ) { 01158 DEBUGCALL( DEBUG_SUBSTRATE, 01159 dump_sets( &ctl->set[1], ctl->num_sets - 1 ) ); 01160 PAPIERROR( "pfm_delete_evtsets(%d,%p,%d): %s", ctl->ctx_fd, 01161 &ctl->set[1], ctl->num_sets - 1, strerror( ret ) ); 01162 return ( PAPI_ESYS ); 01163 } 01164 DEBUGCALL( DEBUG_SUBSTRATE, 01165 dump_sets( &ctl->set[1], ctl->num_sets - 1 ) ); 01166 /* Reprogram the 0 set */ 01167 SUBDBG( "PFM_CREATE_EVTSETS(%d,%p,%d)\n", ctl->ctx_fd, &set, 1 ); 01168 if ( ( ret = 01169 pfm_create_evtsets( ctl->ctx_fd, &set, 01170 1 ) ) != PFMLIB_SUCCESS ) { 01171 DEBUGCALL( DEBUG_SUBSTRATE, dump_sets( &set, 1 ) ); 01172 PAPIERROR( "pfm_create_evtsets(%d,%p,%d): %s", ctl->ctx_fd, &set, 01173 ctl->num_sets, strerror( ret ) ); 01174 return ( PAPI_ESYS ); 01175 } 01176 DEBUGCALL( DEBUG_SUBSTRATE, dump_sets( &set, 1 ) ); 01177 } 01178 01179 return PAPI_OK; 01180 } 01181 01182 static inline int 01183 round_requested_ns( int ns ) 01184 { 01185 if ( ns <= _papi_os_info.itimer_res_ns ) { 01186 return _papi_os_info.itimer_res_ns; 01187 } else { 01188 int leftover_ns = ns % _papi_os_info.itimer_res_ns; 01189 return ( ns - leftover_ns + _papi_os_info.itimer_res_ns ); 01190 } 01191 } 01192 01193 int 01194 _papi_pfm_ctl( hwd_context_t * ctx, int code, _papi_int_option_t * option ) 01195 { 01196 switch ( code ) { 01197 case PAPI_MULTIPLEX: 01198 { 01199 option->multiplex.ns = round_requested_ns( option->multiplex.ns ); 01200 ( ( pfm_control_state_t * ) ( option->multiplex.ESI->ctl_state ) )-> 01201 multiplexed = option->multiplex.ns; 01202 return ( PAPI_OK ); 01203 } 01204 01205 case PAPI_ATTACH: 01206 return ( attach 01207 ( ( pfm_control_state_t * ) ( option->attach.ESI->ctl_state ), 01208 option->attach.tid ) ); 01209 case PAPI_DETACH: 01210 return ( detach 01211 ( ctx, 01212 ( pfm_control_state_t * ) ( option->attach.ESI-> 01213 ctl_state ) ) ); 01214 01215 case PAPI_DOMAIN: 01216 return ( set_domain 01217 ( ( pfm_control_state_t * ) ( option->domain.ESI->ctl_state ), 01218 option->domain.domain ) ); 01219 case PAPI_GRANUL: 01220 return ( set_granularity 01221 ( ( pfm_control_state_t * ) ( option->granularity.ESI-> 01222 ctl_state ), 01223 option->granularity.granularity ) ); 01224 #if 0 01225 case PAPI_DATA_ADDRESS: 01226 ret = 01227 set_default_domain( ( pfm_control_state_t * ) ( option-> 01228 address_range.ESI-> 01229 ctl_state ), 01230 option->address_range.domain ); 01231 if ( ret != PAPI_OK ) 01232 return ( ret ); 01233 set_drange( ctx, 01234 ( pfm_control_state_t * ) ( option->address_range.ESI-> 01235 ctl_state ), option ); 01236 return ( PAPI_OK ); 01237 case PAPI_INSTR_ADDRESS: 01238 ret = 01239 set_default_domain( ( pfm_control_state_t * ) ( option-> 01240 address_range.ESI-> 01241 ctl_state ), 01242 option->address_range.domain ); 01243 if ( ret != PAPI_OK ) 01244 return ( ret ); 01245 set_irange( ctx, 01246 ( pfm_control_state_t * ) ( option->address_range.ESI-> 01247 ctl_state ), option ); 01248 return ( PAPI_OK ); 01249 #endif 01250 01251 01252 case PAPI_DEF_ITIMER: 01253 { 01254 /* flags are currently ignored, eventually the flags will be able 01255 to specify whether or not we use POSIX itimers (clock_gettimer) */ 01256 if ( ( option->itimer.itimer_num == ITIMER_REAL ) && 01257 ( option->itimer.itimer_sig != SIGALRM ) ) 01258 return PAPI_EINVAL; 01259 if ( ( option->itimer.itimer_num == ITIMER_VIRTUAL ) && 01260 ( option->itimer.itimer_sig != SIGVTALRM ) ) 01261 return PAPI_EINVAL; 01262 if ( ( option->itimer.itimer_num == ITIMER_PROF ) && 01263 ( option->itimer.itimer_sig != SIGPROF ) ) 01264 return PAPI_EINVAL; 01265 if ( option->itimer.ns > 0 ) 01266 option->itimer.ns = round_requested_ns( option->itimer.ns ); 01267 /* At this point, we assume the user knows what he or 01268 she is doing, they maybe doing something arch specific */ 01269 return PAPI_OK; 01270 } 01271 01272 case PAPI_DEF_MPX_NS: 01273 { 01274 option->multiplex.ns = round_requested_ns( option->multiplex.ns ); 01275 return ( PAPI_OK ); 01276 } 01277 case PAPI_DEF_ITIMER_NS: 01278 { 01279 option->itimer.ns = round_requested_ns( option->itimer.ns ); 01280 return ( PAPI_OK ); 01281 } 01282 default: 01283 return ( PAPI_ENOSUPP ); 01284 } 01285 } 01286 01287 int 01288 _papi_pfm_shutdown( hwd_context_t * ctx0 ) 01289 { 01290 pfm_context_t *ctx = ( pfm_context_t * ) ctx0; 01291 int ret; 01292 #if defined(USE_PROC_PTTIMER) 01293 close( ctx->stat_fd ); 01294 #endif 01295 01296 01297 ret = close( ctx->ctx_fd ); 01298 SUBDBG( "CLOSE fd %d returned %d\n", ctx->ctx_fd, ret ); 01299 (void) ret; 01300 01301 return ( PAPI_OK ); 01302 } 01303 01304 /* This will need to be modified for the Pentium IV */ 01305 01306 static inline int 01307 find_profile_index( EventSetInfo_t * ESI, int pmd, int *flags, 01308 unsigned int *native_index, int *profile_index ) 01309 { 01310 int pos, esi_index, count; 01311 pfm_control_state_t *ctl = ( pfm_control_state_t * ) ESI->ctl_state; 01312 pfarg_pmd_t *pd; 01313 unsigned int i; 01314 01315 pd = ctl->pd; 01316 01317 /* Find virtual PMD index, the one we actually read from the physical PMD number that 01318 overflowed. This index is the one related to the profile buffer. */ 01319 01320 for ( i = 0; i < ctl->in.pfp_event_count; i++ ) { 01321 if ( pd[i].reg_num == pmd ) { 01322 SUBDBG( "Physical PMD %d is Virtual PMD %d\n", pmd, i ); 01323 pmd = i; 01324 break; 01325 } 01326 } 01327 01328 01329 SUBDBG( "(%p,%d,%p)\n", ESI, pmd, index ); 01330 01331 for ( count = 0; count < ESI->profile.event_counter; count++ ) { 01332 /* Find offset of PMD that gets read from the kernel */ 01333 esi_index = ESI->profile.EventIndex[count]; 01334 pos = ESI->EventInfoArray[esi_index].pos[0]; 01335 SUBDBG( "Examining event at ESI index %d, PMD position %d\n", esi_index, 01336 pos ); 01337 // PMU_FIRST_COUNTER 01338 if ( pos == pmd ) { 01339 *profile_index = count; 01340 *native_index = 01341 ESI->NativeInfoArray[pos].ni_event & PAPI_NATIVE_AND_MASK; 01342 *flags = ESI->profile.flags; 01343 SUBDBG( "Native event %d is at profile index %d, flags %d\n", 01344 *native_index, *profile_index, *flags ); 01345 return ( PAPI_OK ); 01346 } 01347 } 01348 01349 PAPIERROR( "wrong count: %d vs. ESI->profile.event_counter %d", count, 01350 ESI->profile.event_counter ); 01351 return ( PAPI_EBUG ); 01352 } 01353 01354 #if defined(__ia64__) 01355 static inline int 01356 is_montecito_and_dear( unsigned int native_index ) 01357 { 01358 if ( _perfmon2_pfm_pmu_type == PFMLIB_MONTECITO_PMU ) { 01359 if ( pfm_mont_is_dear( native_index ) ) 01360 return ( 1 ); 01361 } 01362 return ( 0 ); 01363 } 01364 static inline int 01365 is_montecito_and_iear( unsigned int native_index ) 01366 { 01367 if ( _perfmon2_pfm_pmu_type == PFMLIB_MONTECITO_PMU ) { 01368 if ( pfm_mont_is_iear( native_index ) ) 01369 return ( 1 ); 01370 } 01371 return ( 0 ); 01372 } 01373 static inline int 01374 is_itanium2_and_dear( unsigned int native_index ) 01375 { 01376 if ( _perfmon2_pfm_pmu_type == PFMLIB_ITANIUM2_PMU ) { 01377 if ( pfm_ita2_is_dear( native_index ) ) 01378 return ( 1 ); 01379 } 01380 return ( 0 ); 01381 } 01382 static inline int 01383 is_itanium2_and_iear( unsigned int native_index ) 01384 { 01385 if ( _perfmon2_pfm_pmu_type == PFMLIB_ITANIUM2_PMU ) { 01386 if ( pfm_ita2_is_iear( native_index ) ) 01387 return ( 1 ); 01388 } 01389 return ( 0 ); 01390 } 01391 #endif 01392 01393 #define BPL (sizeof(uint64_t)<<3) 01394 #define LBPL 6 01395 static inline void 01396 pfm_bv_set( uint64_t * bv, uint16_t rnum ) 01397 { 01398 bv[rnum >> LBPL] |= 1UL << ( rnum & ( BPL - 1 ) ); 01399 } 01400 01401 static inline int 01402 setup_ear_event( unsigned int native_index, pfarg_pmd_t * pd, int flags ) 01403 { 01404 ( void ) flags; /*unused */ 01405 #if defined(__ia64__) 01406 if ( _perfmon2_pfm_pmu_type == PFMLIB_MONTECITO_PMU ) { 01407 if ( pfm_mont_is_dear( native_index ) ) { /* 2,3,17 */ 01408 pfm_bv_set( pd[0].reg_smpl_pmds, 32 ); 01409 pfm_bv_set( pd[0].reg_smpl_pmds, 33 ); 01410 pfm_bv_set( pd[0].reg_smpl_pmds, 36 ); 01411 pfm_bv_set( pd[0].reg_reset_pmds, 36 ); 01412 return ( 1 ); 01413 } else if ( pfm_mont_is_iear( native_index ) ) { /* O,1 MK */ 01414 pfm_bv_set( pd[0].reg_smpl_pmds, 34 ); 01415 pfm_bv_set( pd[0].reg_smpl_pmds, 35 ); 01416 pfm_bv_set( pd[0].reg_reset_pmds, 34 ); 01417 return ( 1 ); 01418 } 01419 return ( 0 ); 01420 } else if ( _perfmon2_pfm_pmu_type == PFMLIB_ITANIUM2_PMU ) { 01421 if ( pfm_mont_is_dear( native_index ) ) { /* 2,3,17 */ 01422 pfm_bv_set( pd[0].reg_smpl_pmds, 2 ); 01423 pfm_bv_set( pd[0].reg_smpl_pmds, 3 ); 01424 pfm_bv_set( pd[0].reg_smpl_pmds, 17 ); 01425 pfm_bv_set( pd[0].reg_reset_pmds, 17 ); 01426 return ( 1 ); 01427 } else if ( pfm_mont_is_iear( native_index ) ) { /* O,1 MK */ 01428 pfm_bv_set( pd[0].reg_smpl_pmds, 0 ); 01429 pfm_bv_set( pd[0].reg_smpl_pmds, 1 ); 01430 pfm_bv_set( pd[0].reg_reset_pmds, 0 ); 01431 return ( 1 ); 01432 } 01433 return ( 0 ); 01434 } 01435 #else 01436 ( void ) native_index; /*unused */ 01437 ( void ) pd; /*unused */ 01438 #endif 01439 return ( 0 ); 01440 } 01441 01442 static inline int 01443 process_smpl_entry( unsigned int native_pfm_index, int flags, 01444 pfm_dfl_smpl_entry_t ** ent, caddr_t * pc ) 01445 { 01446 #ifndef __ia64__ 01447 ( void ) native_pfm_index; /*unused */ 01448 ( void ) flags; /*unused */ 01449 #endif 01450 SUBDBG( "process_smpl_entry(%d,%d,%p,%p)\n", native_pfm_index, flags, ent, 01451 pc ); 01452 01453 #ifdef __ia64__ 01454 /* Fixup EAR stuff here */ 01455 if ( is_montecito_and_dear( native_pfm_index ) ) { 01456 pfm_mont_pmd_reg_t data_addr; 01457 pfm_mont_pmd_reg_t latency; 01458 pfm_mont_pmd_reg_t load_addr; 01459 unsigned long newent; 01460 01461 if ( ( flags & ( PAPI_PROFIL_DATA_EAR | PAPI_PROFIL_INST_EAR ) ) == 0 ) 01462 goto safety; 01463 01464 /* Skip the header */ 01465 ++( *ent ); 01466 01467 // PMD32 has data address on Montecito 01468 // PMD33 has latency on Montecito 01469 // PMD36 has instruction address on Montecito 01470 data_addr = *( pfm_mont_pmd_reg_t * ) * ent; 01471 latency = 01472 *( pfm_mont_pmd_reg_t * ) ( ( unsigned long ) *ent + 01473 sizeof ( data_addr ) ); 01474 load_addr = 01475 *( pfm_mont_pmd_reg_t * ) ( ( unsigned long ) *ent + 01476 sizeof ( data_addr ) + 01477 sizeof ( latency ) ); 01478 01479 SUBDBG( "PMD[32]: 0x%016llx\n", 01480 ( unsigned long long ) data_addr.pmd_val ); 01481 SUBDBG( "PMD[33]: 0x%016llx\n", 01482 ( unsigned long long ) latency.pmd_val ); 01483 SUBDBG( "PMD[36]: 0x%016llx\n", 01484 ( unsigned long long ) load_addr.pmd_val ); 01485 01486 if ( ( !load_addr.pmd36_mont_reg.dear_vl ) || 01487 ( !load_addr.pmd33_mont_reg.dear_stat ) ) { 01488 SUBDBG 01489 ( "Invalid DEAR sample found, dear_vl = %d, dear_stat = 0x%x\n", 01490 load_addr.pmd36_mont_reg.dear_vl, 01491 load_addr.pmd33_mont_reg.dear_stat ); 01492 bail1: 01493 newent = ( unsigned long ) *ent; 01494 newent += 3 * sizeof ( pfm_mont_pmd_reg_t ); 01495 *ent = ( pfm_dfl_smpl_entry_t * ) newent; 01496 return 0; 01497 } 01498 01499 if ( flags & PAPI_PROFIL_DATA_EAR ) 01500 *pc = ( caddr_t ) data_addr.pmd_val; 01501 else if ( flags & PAPI_PROFIL_INST_EAR ) { 01502 unsigned long tmp = 01503 ( ( load_addr.pmd36_mont_reg.dear_iaddr + 01504 ( unsigned long ) load_addr.pmd36_mont_reg. 01505 dear_bn ) << 4 ) | ( unsigned long ) load_addr. 01506 pmd36_mont_reg.dear_slot; 01507 *pc = ( caddr_t ) tmp; 01508 } else { 01509 PAPIERROR( "BUG!" ); 01510 goto bail1; 01511 } 01512 01513 newent = ( unsigned long ) *ent; 01514 newent += 3 * sizeof ( pfm_mont_pmd_reg_t ); 01515 *ent = ( pfm_dfl_smpl_entry_t * ) newent; 01516 return 0; 01517 } else if ( is_montecito_and_iear( native_pfm_index ) ) { 01518 pfm_mont_pmd_reg_t latency; 01519 pfm_mont_pmd_reg_t icache_line_addr; 01520 unsigned long newent; 01521 01522 if ( ( flags & PAPI_PROFIL_INST_EAR ) == 0 ) 01523 goto safety; 01524 01525 /* Skip the header */ 01526 ++( *ent ); 01527 01528 // PMD34 has data address on Montecito 01529 // PMD35 has latency on Montecito 01530 icache_line_addr = *( pfm_mont_pmd_reg_t * ) * ent; 01531 latency = 01532 *( pfm_mont_pmd_reg_t * ) ( ( unsigned long ) *ent + 01533 sizeof ( icache_line_addr ) ); 01534 01535 SUBDBG( "PMD[34]: 0x%016llx\n", 01536 ( unsigned long long ) icache_line_addr.pmd_val ); 01537 SUBDBG( "PMD[35]: 0x%016llx\n", 01538 ( unsigned long long ) latency.pmd_val ); 01539 01540 if ( ( icache_line_addr.pmd34_mont_reg.iear_stat & 0x1 ) == 0 ) { 01541 SUBDBG( "Invalid IEAR sample found, iear_stat = 0x%x\n", 01542 icache_line_addr.pmd34_mont_reg.iear_stat ); 01543 bail2: 01544 newent = ( unsigned long ) *ent; 01545 newent += 2 * sizeof ( pfm_mont_pmd_reg_t ); 01546 *ent = ( pfm_dfl_smpl_entry_t * ) newent; 01547 return ( 0 ); 01548 } 01549 01550 if ( flags & PAPI_PROFIL_INST_EAR ) { 01551 unsigned long tmp = icache_line_addr.pmd34_mont_reg.iear_iaddr << 5; 01552 *pc = ( caddr_t ) tmp; 01553 } else { 01554 PAPIERROR( "BUG!" ); 01555 goto bail2; 01556 } 01557 01558 newent = ( unsigned long ) *ent; 01559 newent += 2 * sizeof ( pfm_mont_pmd_reg_t ); 01560 *ent = ( pfm_dfl_smpl_entry_t * ) newent; 01561 return 0; 01562 } else if ( is_itanium2_and_dear( native_pfm_index ) ) { 01563 pfm_ita2_pmd_reg_t data_addr; 01564 pfm_ita2_pmd_reg_t latency; 01565 pfm_ita2_pmd_reg_t load_addr; 01566 unsigned long newent; 01567 01568 if ( ( flags & ( PAPI_PROFIL_DATA_EAR | PAPI_PROFIL_INST_EAR ) ) == 0 ) 01569 goto safety; 01570 01571 /* Skip the header */ 01572 ++( *ent ); 01573 01574 // PMD2 has data address on Itanium 2 01575 // PMD3 has latency on Itanium 2 01576 // PMD17 has instruction address on Itanium 2 01577 data_addr = *( pfm_ita2_pmd_reg_t * ) * ent; 01578 latency = 01579 *( pfm_ita2_pmd_reg_t * ) ( ( unsigned long ) *ent + 01580 sizeof ( data_addr ) ); 01581 load_addr = 01582 *( pfm_ita2_pmd_reg_t * ) ( ( unsigned long ) *ent + 01583 sizeof ( data_addr ) + 01584 sizeof ( latency ) ); 01585 01586 SUBDBG( "PMD[2]: 0x%016llx\n", 01587 ( unsigned long long ) data_addr.pmd_val ); 01588 SUBDBG( "PMD[3]: 0x%016llx\n", ( unsigned long long ) latency.pmd_val ); 01589 SUBDBG( "PMD[17]: 0x%016llx\n", 01590 ( unsigned long long ) load_addr.pmd_val ); 01591 01592 if ( ( !load_addr.pmd17_ita2_reg.dear_vl ) || 01593 ( !load_addr.pmd3_ita2_reg.dear_stat ) ) { 01594 SUBDBG 01595 ( "Invalid DEAR sample found, dear_vl = %d, dear_stat = 0x%x\n", 01596 load_addr.pmd17_ita2_reg.dear_vl, 01597 load_addr.pmd3_ita2_reg.dear_stat ); 01598 bail3: 01599 newent = ( unsigned long ) *ent; 01600 newent += 3 * sizeof ( pfm_mont_pmd_reg_t ); 01601 *ent = ( pfm_dfl_smpl_entry_t * ) newent; 01602 return 0; 01603 } 01604 01605 if ( flags & PAPI_PROFIL_DATA_EAR ) 01606 *pc = ( caddr_t ) data_addr.pmd_val; 01607 else if ( flags & PAPI_PROFIL_INST_EAR ) { 01608 unsigned long tmp = 01609 ( ( load_addr.pmd17_ita2_reg.dear_iaddr + 01610 ( unsigned long ) load_addr.pmd17_ita2_reg. 01611 dear_bn ) << 4 ) | ( unsigned long ) load_addr. 01612 pmd17_ita2_reg.dear_slot; 01613 *pc = ( caddr_t ) tmp; 01614 } else { 01615 PAPIERROR( "BUG!" ); 01616 goto bail3; 01617 } 01618 01619 newent = ( unsigned long ) *ent; 01620 newent += 3 * sizeof ( pfm_ita2_pmd_reg_t ); 01621 *ent = ( pfm_dfl_smpl_entry_t * ) newent; 01622 return 0; 01623 } else if ( is_itanium2_and_iear( native_pfm_index ) ) { 01624 pfm_ita2_pmd_reg_t latency; 01625 pfm_ita2_pmd_reg_t icache_line_addr; 01626 unsigned long newent; 01627 01628 if ( ( flags & PAPI_PROFIL_INST_EAR ) == 0 ) 01629 goto safety; 01630 01631 /* Skip the header */ 01632 ++( *ent ); 01633 01634 // PMD0 has address on Itanium 2 01635 // PMD1 has latency on Itanium 2 01636 icache_line_addr = *( pfm_ita2_pmd_reg_t * ) * ent; 01637 latency = 01638 *( pfm_ita2_pmd_reg_t * ) ( ( unsigned long ) *ent + 01639 sizeof ( icache_line_addr ) ); 01640 01641 SUBDBG( "PMD[0]: 0x%016llx\n", 01642 ( unsigned long long ) icache_line_addr.pmd_val ); 01643 SUBDBG( "PMD[1]: 0x%016llx\n", ( unsigned long long ) latency.pmd_val ); 01644 01645 if ( ( icache_line_addr.pmd0_ita2_reg.iear_stat & 0x1 ) == 0 ) { 01646 SUBDBG( "Invalid IEAR sample found, iear_stat = 0x%x\n", 01647 icache_line_addr.pmd0_ita2_reg.iear_stat ); 01648 bail4: 01649 newent = ( unsigned long ) *ent; 01650 newent += 2 * sizeof ( pfm_mont_pmd_reg_t ); 01651 *ent = ( pfm_dfl_smpl_entry_t * ) newent; 01652 return ( 0 ); 01653 } 01654 01655 if ( flags & PAPI_PROFIL_INST_EAR ) { 01656 unsigned long tmp = icache_line_addr.pmd0_ita2_reg.iear_iaddr << 5; 01657 *pc = ( caddr_t ) tmp; 01658 } else { 01659 PAPIERROR( "BUG!" ); 01660 goto bail4; 01661 } 01662 01663 newent = ( unsigned long ) *ent; 01664 newent += 2 * sizeof ( pfm_ita2_pmd_reg_t ); 01665 *ent = ( pfm_dfl_smpl_entry_t * ) newent; 01666 return 0; 01667 } 01668 #if 0 01669 ( is_btb( native_pfm_index ) ) { 01670 // PMD48-63,39 on Montecito 01671 // PMD8-15,16 on Itanium 2 01672 } 01673 #endif 01674 else 01675 safety: 01676 #endif 01677 { 01678 *pc = ( caddr_t ) ( ( size_t ) ( ( *ent )->ip ) ); 01679 ++( *ent ); 01680 return ( 0 ); 01681 } 01682 } 01683 01684 static inline int 01685 process_smpl_buf( int num_smpl_pmds, int entry_size, ThreadInfo_t ** thr ) 01686 { 01687 ( void ) num_smpl_pmds; /*unused */ 01688 ( void ) entry_size; /*unused */ 01689 int cidx = _papi_pfm_vector.cmp_info.CmpIdx; 01690 pfm_dfl_smpl_entry_t *ent; 01691 uint64_t entry, count; 01692 pfm_dfl_smpl_hdr_t *hdr = 01693 ( ( pfm_context_t * ) ( *thr )->context[cidx] )->smpl_buf; 01694 int ret, profile_index, flags; 01695 unsigned int native_pfm_index; 01696 caddr_t pc = NULL; 01697 long long weight; 01698 01699 DEBUGCALL( DEBUG_SUBSTRATE, dump_smpl_hdr( hdr ) ); 01700 count = hdr->hdr_count; 01701 ent = ( pfm_dfl_smpl_entry_t * ) ( hdr + 1 ); 01702 entry = 0; 01703 01704 SUBDBG( "This buffer has %llu samples in it.\n", 01705 ( unsigned long long ) count ); 01706 while ( count-- ) { 01707 SUBDBG( "Processing sample entry %llu\n", 01708 ( unsigned long long ) entry ); 01709 DEBUGCALL( DEBUG_SUBSTRATE, dump_smpl( ent ) ); 01710 01711 /* Find the index of the profile buffers if we are profiling on many events */ 01712 01713 ret = 01714 find_profile_index( ( *thr )->running_eventset[cidx], ent->ovfl_pmd, 01715 &flags, &native_pfm_index, &profile_index ); 01716 if ( ret != PAPI_OK ) 01717 return ( ret ); 01718 01719 weight = process_smpl_entry( native_pfm_index, flags, &ent, &pc ); 01720 01721 _papi_hwi_dispatch_profile( ( *thr )->running_eventset[cidx], pc, 01722 weight, profile_index ); 01723 01724 entry++; 01725 } 01726 return ( PAPI_OK ); 01727 } 01728 01729 01730 /* This function used when hardware overflows ARE working 01731 or when software overflows are forced */ 01732 01733 static void 01734 _papi_pfm_dispatch_timer( int n, hwd_siginfo_t * info, void *uc ) 01735 { 01736 _papi_hwi_context_t ctx; 01737 #ifdef HAVE_PFM_MSG_TYPE 01738 pfm_msg_t msg; 01739 #else 01740 pfarg_msg_t msg; 01741 #endif 01742 int ret, wanted_fd, fd = info->si_fd; 01743 caddr_t address; 01744 ThreadInfo_t *thread = _papi_hwi_lookup_thread( 0 ); 01745 int cidx = _papi_pfm_vector.cmp_info.CmpIdx; 01746 01747 if ( thread == NULL ) { 01748 PAPIERROR( "thread == NULL in _papi_pfm_dispatch_timer!" ); 01749 if ( n == _papi_pfm_vector.cmp_info.hardware_intr_sig ) { 01750 ret = read( fd, &msg, sizeof ( msg ) ); 01751 pfm_restart( fd ); 01752 } 01753 return; 01754 } 01755 01756 if ( thread->running_eventset[cidx] == NULL ) { 01757 PAPIERROR 01758 ( "thread->running_eventset == NULL in _papi_pfm_dispatch_timer!" ); 01759 if ( n == _papi_pfm_vector.cmp_info.hardware_intr_sig ) { 01760 ret = read( fd, &msg, sizeof ( msg ) ); 01761 pfm_restart( fd ); 01762 } 01763 return; 01764 } 01765 01766 if ( thread->running_eventset[cidx]->overflow.flags == 0 ) { 01767 PAPIERROR 01768 ( "thread->running_eventset->overflow.flags == 0 in _papi_pfm_dispatch_timer!" ); 01769 if ( n == _papi_pfm_vector.cmp_info.hardware_intr_sig ) { 01770 ret = read( fd, &msg, sizeof ( msg ) ); 01771 pfm_restart( fd ); 01772 } 01773 return; 01774 } 01775 01776 ctx.si = info; 01777 ctx.ucontext = ( hwd_ucontext_t * ) uc; 01778 01779 if ( thread->running_eventset[cidx]->overflow. 01780 flags & PAPI_OVERFLOW_FORCE_SW ) { 01781 address = GET_OVERFLOW_ADDRESS( ctx ); 01782 _papi_hwi_dispatch_overflow_signal( ( void * ) &ctx, address, NULL, 01783 0, 0, &thread, cidx ); 01784 } else { 01785 if ( thread->running_eventset[cidx]->overflow.flags == 01786 PAPI_OVERFLOW_HARDWARE ) { 01787 wanted_fd = 01788 ( ( pfm_control_state_t * ) ( thread->running_eventset[cidx]-> 01789 ctl_state ) )->ctx_fd; 01790 } else { 01791 wanted_fd = ( ( pfm_context_t * ) thread->context[cidx] )->ctx_fd; 01792 } 01793 if ( wanted_fd != fd ) { 01794 SUBDBG( "expected fd %d, got %d in _papi_hwi_dispatch_timer!", 01795 wanted_fd, fd ); 01796 if ( n == _papi_pfm_vector.cmp_info.hardware_intr_sig ) { 01797 ret = read( fd, &msg, sizeof ( msg ) ); 01798 pfm_restart( fd ); 01799 } 01800 return; 01801 } 01802 retry: 01803 ret = read( fd, &msg, sizeof ( msg ) ); 01804 if ( ret == -1 ) { 01805 if ( errno == EINTR ) { 01806 SUBDBG( "read(%d) interrupted, retrying\n", fd ); 01807 goto retry; 01808 } else { 01809 PAPIERROR( "read(%d): errno %d", fd, errno ); 01810 } 01811 } else if ( ret != sizeof ( msg ) ) { 01812 PAPIERROR( "read(%d): short %d vs. %d bytes", fd, ret, 01813 sizeof ( msg ) ); 01814 ret = -1; 01815 } 01816 01817 if ( msg.type != PFM_MSG_OVFL ) { 01818 PAPIERROR( "unexpected msg type %d", msg.type ); 01819 ret = -1; 01820 } 01821 #if 0 01822 if ( msg.pfm_ovfl_msg.msg_ovfl_tid != mygettid( ) ) { 01823 PAPIERROR( "unmatched thread id %lx vs. %lx", 01824 msg.pfm_ovfl_msg.msg_ovfl_tid, mygettid( ) ); 01825 ret = -1; 01826 } 01827 #endif 01828 01829 if ( ret != -1 ) { 01830 if ( ( thread->running_eventset[cidx]->state & PAPI_PROFILING ) && 01831 !( thread->running_eventset[cidx]->profile. 01832 flags & PAPI_PROFIL_FORCE_SW ) ) 01833 process_smpl_buf( 0, sizeof ( pfm_dfl_smpl_entry_t ), &thread ); 01834 else { 01835 /* PAPI assumes that the overflow vector contains the register index of the 01836 overflowing native event. That is generally true, but Stephane used some 01837 tricks to offset the fixed counters on Core2 (Core? i7?) by 16. This hack 01838 corrects for that hack in a (hopefully) transparent manner */ 01839 unsigned long i, vector = msg.pfm_ovfl_msg.msg_ovfl_pmds[0]; 01840 pfm_control_state_t *ctl = 01841 ( pfm_control_state_t * ) thread->running_eventset[cidx]-> 01842 ctl_state; 01843 for ( i = 0; i < ctl->in.pfp_event_count; i++ ) { 01844 /* We're only comparing to pmds[0]. A more robust implementation would 01845 compare to pmds[0-3]. The bit mask must be converted to an index 01846 for the comparison to work */ 01847 if ( ctl->pd[i].reg_num == 01848 ffsl( msg.pfm_ovfl_msg.msg_ovfl_pmds[0] ) - 1 ) { 01849 /* if a match is found, convert the index back to a bitmask */ 01850 vector = 1 << i; 01851 break; 01852 } 01853 } 01854 _papi_hwi_dispatch_overflow_signal( ( void * ) &ctx, 01855 ( caddr_t ) ( ( size_t ) 01856 msg. 01857 pfm_ovfl_msg. 01858 msg_ovfl_ip ), 01859 NULL, vector, 0, &thread, 01860 cidx ); 01861 } 01862 } 01863 01864 if ( ( ret = pfm_restart( fd ) ) ) { 01865 PAPIERROR( "pfm_restart(%d): %s", fd, strerror( ret ) ); 01866 } 01867 } 01868 } 01869 01870 static int 01871 _papi_pfm_stop_profiling( ThreadInfo_t * thread, EventSetInfo_t * ESI ) 01872 { 01873 ( void ) ESI; /*unused */ 01874 /* Process any remaining samples in the sample buffer */ 01875 return ( process_smpl_buf( 0, sizeof ( pfm_dfl_smpl_entry_t ), &thread ) ); 01876 } 01877 01878 static int 01879 _papi_pfm_set_profile( EventSetInfo_t * ESI, int EventIndex, int threshold ) 01880 { 01881 int cidx = _papi_pfm_vector.cmp_info.CmpIdx; 01882 pfm_control_state_t *ctl = ( pfm_control_state_t * ) ( ESI->ctl_state ); 01883 pfm_context_t *ctx = ( pfm_context_t * ) ( ESI->master->context[cidx] ); 01884 pfarg_ctx_t newctx; 01885 void *buf_addr = NULL; 01886 pfm_dfl_smpl_arg_t buf_arg; 01887 pfm_dfl_smpl_hdr_t *hdr; 01888 int i, ret, ctx_fd; 01889 01890 memset( &newctx, 0, sizeof ( newctx ) ); 01891 01892 if ( threshold == 0 ) { 01893 SUBDBG( "MUNMAP(%p,%lld)\n", ctx->smpl_buf, 01894 ( unsigned long long ) ctx->smpl.buf_size ); 01895 munmap( ctx->smpl_buf, ctx->smpl.buf_size ); 01896 01897 i = close( ctl->ctx_fd ); 01898 SUBDBG( "CLOSE fd %d returned %d\n", ctl->ctx_fd, i ); 01899 (void) i; 01900 01901 /* Thread has master context */ 01902 01903 ctl->ctx_fd = ctx->ctx_fd; 01904 ctl->ctx = &ctx->ctx; 01905 memset( &ctx->smpl, 0, sizeof ( buf_arg ) ); 01906 ctx->smpl_buf = NULL; 01907 ret = _papi_pfm_set_overflow( ESI, EventIndex, threshold ); 01908 //#warning "This should be handled somewhere else" 01909 ESI->state &= ~( PAPI_OVERFLOWING ); 01910 ESI->overflow.flags &= ~( PAPI_OVERFLOW_HARDWARE ); 01911 01912 return ( ret ); 01913 } 01914 01915 memset( &buf_arg, 0, sizeof ( buf_arg ) ); 01916 buf_arg.buf_size = 2 * getpagesize( ); 01917 01918 SUBDBG( "PFM_CREATE_CONTEXT(%p,%s,%p,%d)\n", &newctx, PFM_DFL_SMPL_NAME, 01919 &buf_arg, ( int ) sizeof ( buf_arg ) ); 01920 if ( ( ret = 01921 pfm_create_context( &newctx, PFM_DFL_SMPL_NAME, &buf_arg, 01922 sizeof ( buf_arg ) ) ) == -1 ) { 01923 DEBUGCALL( DEBUG_SUBSTRATE, dump_smpl_arg( &buf_arg ) ); 01924 PAPIERROR( "_papi_hwd_set_profile:pfm_create_context(): %s", 01925 strerror( errno ) ); 01926 return ( PAPI_ESYS ); 01927 } 01928 ctx_fd = ret; 01929 SUBDBG( "PFM_CREATE_CONTEXT returned fd %d\n", ctx_fd ); 01930 tune_up_fd( ret ); 01931 01932 SUBDBG( "MMAP(NULL,%lld,%d,%d,%d,0)\n", 01933 ( unsigned long long ) buf_arg.buf_size, PROT_READ, MAP_PRIVATE, 01934 ctx_fd ); 01935 buf_addr = 01936 mmap( NULL, ( size_t ) buf_arg.buf_size, PROT_READ, MAP_PRIVATE, ctx_fd, 01937 0 ); 01938 if ( buf_addr == MAP_FAILED ) { 01939 PAPIERROR( "mmap(NULL,%d,%d,%d,%d,0): %s", buf_arg.buf_size, PROT_READ, 01940 MAP_PRIVATE, ctx_fd, strerror( errno ) ); 01941 close( ctx_fd ); 01942 return ( PAPI_ESYS ); 01943 } 01944 SUBDBG( "Sample buffer is located at %p\n", buf_addr ); 01945 01946 hdr = ( pfm_dfl_smpl_hdr_t * ) buf_addr; 01947 SUBDBG( "hdr_cur_offs=%llu version=%u.%u\n", 01948 ( unsigned long long ) hdr->hdr_cur_offs, 01949 PFM_VERSION_MAJOR( hdr->hdr_version ), 01950 PFM_VERSION_MINOR( hdr->hdr_version ) ); 01951 01952 if ( PFM_VERSION_MAJOR( hdr->hdr_version ) < 1 ) { 01953 PAPIERROR( "invalid buffer format version %d", 01954 PFM_VERSION_MAJOR( hdr->hdr_version ) ); 01955 munmap( buf_addr, buf_arg.buf_size ); 01956 close( ctx_fd ); 01957 return PAPI_ESYS; 01958 } 01959 01960 ret = _papi_pfm_set_overflow( ESI, EventIndex, threshold ); 01961 if ( ret != PAPI_OK ) { 01962 munmap( buf_addr, buf_arg.buf_size ); 01963 close( ctx_fd ); 01964 return ( ret ); 01965 } 01966 01967 /* Look up the native event code */ 01968 01969 if ( ESI->profile.flags & ( PAPI_PROFIL_DATA_EAR | PAPI_PROFIL_INST_EAR ) ) { 01970 pfarg_pmd_t *pd; 01971 int pos, native_index; 01972 pd = ctl->pd; 01973 pos = ESI->EventInfoArray[EventIndex].pos[0]; 01974 native_index = 01975 ( ( pfm_register_t * ) ( ESI->NativeInfoArray[pos].ni_bits ) )-> 01976 event; 01977 setup_ear_event( native_index, &pd[pos], ESI->profile.flags ); 01978 } 01979 01980 if ( ESI->profile.flags & PAPI_PROFIL_RANDOM ) { 01981 pfarg_pmd_t *pd; 01982 int pos; 01983 pd = ctl->pd; 01984 pos = ESI->EventInfoArray[EventIndex].pos[0]; 01985 pd[pos].reg_random_seed = 5; 01986 pd[pos].reg_random_mask = 0xff; 01987 } 01988 01989 /* Now close our context it is safe */ 01990 01991 // close(ctx->ctx_fd); 01992 01993 /* Copy the new data to the threads context control block */ 01994 01995 ctl->ctx_fd = ctx_fd; 01996 memcpy( &ctx->smpl, &buf_arg, sizeof ( buf_arg ) ); 01997 ctx->smpl_buf = buf_addr; 01998 01999 return ( PAPI_OK ); 02000 } 02001 02002 02003 02004 static int 02005 _papi_pfm_set_overflow( EventSetInfo_t * ESI, int EventIndex, int threshold ) 02006 { 02007 pfm_control_state_t *this_state = 02008 ( pfm_control_state_t * ) ( ESI->ctl_state ); 02009 int j, retval = PAPI_OK, *pos; 02010 02011 /* Which counter are we on, this looks suspicious because of the pos[0], 02012 but this could be because of derived events. We should do more here 02013 to figure out exactly what the position is, because the event may 02014 actually have more than one position. */ 02015 02016 pos = ESI->EventInfoArray[EventIndex].pos; 02017 j = pos[0]; 02018 SUBDBG( "Hardware counter %d used in overflow, threshold %d\n", j, 02019 threshold ); 02020 02021 if ( threshold == 0 ) { 02022 /* If this counter isn't set to overflow */ 02023 02024 if ( ( this_state->pd[j].reg_flags & PFM_REGFL_OVFL_NOTIFY ) == 0 ) 02025 return ( PAPI_EINVAL ); 02026 02027 /* Remove the signal handler */ 02028 02029 retval = _papi_hwi_stop_signal( _papi_pfm_vector.cmp_info.hardware_intr_sig ); 02030 if ( retval != PAPI_OK ) 02031 return ( retval ); 02032 02033 /* Disable overflow */ 02034 02035 this_state->pd[j].reg_flags ^= PFM_REGFL_OVFL_NOTIFY; 02036 02037 /* 02038 * we may want to reset the other PMDs on 02039 * every overflow. If we do not set 02040 * this, the non-overflowed counters 02041 * will be untouched. 02042 02043 if (inp.pfp_event_count > 1) 02044 this_state->pd[j].reg_reset_pmds[0] ^= 1UL << counter_to_reset */ 02045 02046 /* Clear the overflow period */ 02047 02048 this_state->pd[j].reg_value = 0; 02049 this_state->pd[j].reg_long_reset = 0; 02050 this_state->pd[j].reg_short_reset = 0; 02051 this_state->pd[j].reg_random_seed = 0; 02052 this_state->pd[j].reg_random_mask = 0; 02053 } else { 02054 /* Enable the signal handler */ 02055 02056 retval = 02057 _papi_hwi_start_signal( _papi_pfm_vector.cmp_info.hardware_intr_sig, 1, 02058 _papi_pfm_vector.cmp_info.CmpIdx ); 02059 if ( retval != PAPI_OK ) 02060 return ( retval ); 02061 02062 /* Set it to overflow */ 02063 02064 this_state->pd[j].reg_flags |= PFM_REGFL_OVFL_NOTIFY; 02065 02066 /* 02067 * we may want to reset the other PMDs on 02068 * every overflow. If we do not set 02069 * this, the non-overflowed counters 02070 * will be untouched. 02071 02072 if (inp.pfp_event_count > 1) 02073 this_state->pd[j].reg_reset_pmds[0] |= 1UL << counter_to_reset */ 02074 02075 /* Set the overflow period */ 02076 02077 this_state->pd[j].reg_value = -( unsigned long long ) threshold + 1; 02078 this_state->pd[j].reg_short_reset = 02079 -( unsigned long long ) threshold + 1; 02080 this_state->pd[j].reg_long_reset = 02081 -( unsigned long long ) threshold + 1; 02082 } 02083 return ( retval ); 02084 } 02085 02086 static int 02087 _papi_pfm_init_control_state( hwd_control_state_t * ctl0 ) 02088 { 02089 pfm_control_state_t *ctl = ( pfm_control_state_t * ) ctl0; 02090 pfmlib_input_param_t *inp = &ctl->in; 02091 pfmlib_output_param_t *outp = &ctl->out; 02092 pfarg_pmd_t *pd = ctl->pd; 02093 pfarg_pmc_t *pc = ctl->pc; 02094 pfarg_setdesc_t *set = ctl->set; 02095 pfarg_setinfo_t *setinfo = ctl->setinfo; 02096 02097 memset( inp, 0, sizeof ( *inp ) ); 02098 memset( outp, 0, sizeof ( *inp ) ); 02099 memset( pc, 0, sizeof ( ctl->pc ) ); 02100 memset( pd, 0, sizeof ( ctl->pd ) ); 02101 memset( set, 0, sizeof ( ctl->set ) ); 02102 memset( setinfo, 0, sizeof ( ctl->setinfo ) ); 02103 /* Will be filled by update now...until this gets another arg */ 02104 ctl->ctx = NULL; 02105 ctl->ctx_fd = -1; 02106 ctl->load = NULL; 02107 set_domain( ctl, _papi_pfm_vector.cmp_info.default_domain ); 02108 return ( PAPI_OK ); 02109 } 02110 02111 static int 02112 _papi_pfm_allocate_registers( EventSetInfo_t * ESI ) 02113 { 02114 int i, j; 02115 for ( i = 0; i < ESI->NativeCount; i++ ) { 02116 if ( _papi_libpfm_ntv_code_to_bits 02117 ( ESI->NativeInfoArray[i].ni_event, 02118 ESI->NativeInfoArray[i].ni_bits ) != PAPI_OK ) 02119 goto bail; 02120 } 02121 return PAPI_OK; 02122 bail: 02123 for ( j = 0; j < i; j++ ) 02124 memset( ESI->NativeInfoArray[j].ni_bits, 0x0, 02125 sizeof ( pfm_register_t ) ); 02126 return PAPI_ECNFLCT; 02127 } 02128 02129 /* This function clears the current contents of the control structure and 02130 updates it with whatever resources are allocated for all the native events 02131 in the native info structure array. */ 02132 02133 static int 02134 _papi_pfm_update_control_state( hwd_control_state_t * ctl0, 02135 NativeInfo_t * native, int count, 02136 hwd_context_t * ctx0 ) 02137 { 02138 pfm_control_state_t *ctl = ( pfm_control_state_t * ) ctl0; 02139 pfm_context_t *ctx = ( pfm_context_t * ) ctx0; 02140 int i = 0, ret; 02141 int last_reg_set = 0, reg_set_done = 0, offset = 0; 02142 pfmlib_input_param_t tmpin, *inp = &ctl->in; 02143 pfmlib_output_param_t tmpout, *outp = &ctl->out; 02144 pfarg_pmd_t *pd = ctl->pd; 02145 02146 if ( count == 0 ) { 02147 SUBDBG( "Called with count == 0\n" ); 02148 inp->pfp_event_count = 0; 02149 outp->pfp_pmc_count = 0; 02150 memset( inp->pfp_events, 0x0, sizeof ( inp->pfp_events ) ); 02151 return ( PAPI_OK ); 02152 } 02153 02154 memcpy( &tmpin, inp, sizeof ( tmpin ) ); 02155 memcpy( &tmpout, outp, sizeof ( tmpout ) ); 02156 02157 for ( i = 0; i < count; i++ ) { 02158 SUBDBG 02159 ( "Stuffing native event index %d (code 0x%x) into input structure.\n", 02160 i, ( ( pfm_register_t * ) native[i].ni_bits )->event ); 02161 memcpy( inp->pfp_events + i, native[i].ni_bits, 02162 sizeof ( pfmlib_event_t ) ); 02163 } 02164 inp->pfp_event_count = count; 02165 02166 /* let the library figure out the values for the PMCS */ 02167 02168 ret = compute_kernel_args( ctl ); 02169 if ( ret != PAPI_OK ) { 02170 /* Restore values */ 02171 memcpy( inp, &tmpin, sizeof ( tmpin ) ); 02172 memcpy( outp, &tmpout, sizeof ( tmpout ) ); 02173 return ( ret ); 02174 } 02175 02176 /* Update the native structure, because the allocation is done here. */ 02177 02178 last_reg_set = pd[0].reg_set; 02179 for ( i = 0; i < count; i++ ) { 02180 if ( pd[i].reg_set != last_reg_set ) { 02181 offset += reg_set_done; 02182 reg_set_done = 0; 02183 } 02184 reg_set_done++; 02185 02186 native[i].ni_position = i; 02187 SUBDBG( "native event index %d (code 0x%x) is at PMD offset %d\n", i, 02188 ( ( pfm_register_t * ) native[i].ni_bits )->event, 02189 native[i].ni_position ); 02190 } 02191 02192 /* If structure has not yet been filled with a context, fill it 02193 from the thread's context. This should happen in init_control_state 02194 when we give that a *ctx argument */ 02195 02196 if ( ctl->ctx == NULL ) { 02197 ctl->ctx = &ctx->ctx; 02198 ctl->ctx_fd = ctx->ctx_fd; 02199 ctl->load = &ctx->load; 02200 } 02201 02202 return ( PAPI_OK ); 02203 } 02204 02205 02206 papi_vector_t _papi_pfm_vector = { 02207 .cmp_info = { 02208 /* default component information (unspecified values initialized to 0) */ 02209 .name = "perfmon", 02210 .description = "Linux perfmon2 CPU counters", 02211 .version = "3.8", 02212 02213 .default_domain = PAPI_DOM_USER, 02214 .available_domains = PAPI_DOM_USER | PAPI_DOM_KERNEL, 02215 .default_granularity = PAPI_GRN_THR, 02216 .available_granularities = PAPI_GRN_THR, 02217 02218 .hardware_intr = 1, 02219 .kernel_multiplex = 1, 02220 .kernel_profile = 1, 02221 .num_mpx_cntrs = PFMLIB_MAX_PMDS, 02222 02223 /* component specific cmp_info initializations */ 02224 .fast_real_timer = 1, 02225 .fast_virtual_timer = 0, 02226 .attach = 1, 02227 .attach_must_ptrace = 1, 02228 }, 02229 02230 /* sizes of framework-opaque component-private structures */ 02231 .size = { 02232 .context = sizeof ( pfm_context_t ), 02233 .control_state = sizeof ( pfm_control_state_t ), 02234 .reg_value = sizeof ( pfm_register_t ), 02235 .reg_alloc = sizeof ( pfm_reg_alloc_t ), 02236 }, 02237 /* function pointers in this component */ 02238 .init_control_state = _papi_pfm_init_control_state, 02239 .start = _papi_pfm_start, 02240 .stop = _papi_pfm_stop, 02241 .read = _papi_pfm_read, 02242 .shutdown_thread = _papi_pfm_shutdown, 02243 .shutdown_component = _papi_pfm_shutdown_component, 02244 .ctl = _papi_pfm_ctl, 02245 .update_control_state = _papi_pfm_update_control_state, 02246 .set_domain = set_domain, 02247 .reset = _papi_pfm_reset, 02248 .set_overflow = _papi_pfm_set_overflow, 02249 .set_profile = _papi_pfm_set_profile, 02250 .stop_profiling = _papi_pfm_stop_profiling, 02251 .init_component = _papi_pfm_init_component, 02252 .dispatch_timer = _papi_pfm_dispatch_timer, 02253 .init_thread = _papi_pfm_init_thread, 02254 .allocate_registers = _papi_pfm_allocate_registers, 02255 .write = _papi_pfm_write, 02256 02257 /* from the counter name library */ 02258 .ntv_enum_events = _papi_libpfm_ntv_enum_events, 02259 .ntv_name_to_code = _papi_libpfm_ntv_name_to_code, 02260 .ntv_code_to_name = _papi_libpfm_ntv_code_to_name, 02261 .ntv_code_to_descr = _papi_libpfm_ntv_code_to_descr, 02262 .ntv_code_to_bits = _papi_libpfm_ntv_code_to_bits, 02263 02264 };