|
PAPI
5.3.0.0
|

Go to the source code of this file.
| #define AMD_FPU "SPECULATIVE" |
Definition at line 72 of file perfctr-x86.c.
| #define P4_FPU " X87 SSE_DP" |
Definition at line 61 of file perfctr-x86.c.
| #define P4_REPLAY_REAL_MASK 0x00000003 |
Definition at line 903 of file perfctr-x86.c.
| #define P4_VEC "SSE" |
Definition at line 51 of file perfctr-x86.c.
| static int _bpt_map_avail | ( | hwd_reg_alloc_t * | dst, |
| int | ctr | ||
| ) | [static] |
Definition at line 266 of file perfctr-x86.c.
{
return ( int ) ( dst->ra_selector & ( 1 << ctr ) );
}
| static int _bpt_map_exclusive | ( | hwd_reg_alloc_t * | dst | ) | [static] |
Definition at line 295 of file perfctr-x86.c.
{
return ( dst->ra_rank == 1 );
}
| static void _bpt_map_preempt | ( | hwd_reg_alloc_t * | dst, |
| hwd_reg_alloc_t * | src | ||
| ) | [static] |
Definition at line 343 of file perfctr-x86.c.
{
int i;
unsigned shared;
if ( is_pentium4() ) {
#ifdef DEBUG
SUBDBG( "src, dst\n" );
print_alloc( src );
print_alloc( dst );
#endif
/* check for a pebs conflict */
/* pebs enables must both be non-zero */
i = ( ( ( dst->ra_bits.pebs_enable && src->ra_bits.pebs_enable ) &&
/* and not equal to each other */
( dst->ra_bits.pebs_enable != src->ra_bits.pebs_enable ) ) ||
/* same for pebs_matrix_vert */
( ( dst->ra_bits.pebs_matrix_vert &&
src->ra_bits.pebs_matrix_vert )
&& ( dst->ra_bits.pebs_matrix_vert !=
src->ra_bits.pebs_matrix_vert ) ) );
if ( i ) {
SUBDBG( "pebs conflict! clearing selector\n" );
dst->ra_selector = 0;
return;
} else {
/* remove counters referenced by any shared escrs */
if ( ( dst->ra_escr[0] == src->ra_escr[0] ) &&
( ( int ) dst->ra_escr[0] != -1 ) ) {
dst->ra_selector &= ~dst->ra_bits.counter[0];
dst->ra_escr[0] = -1;
}
if ( ( dst->ra_escr[1] == src->ra_escr[1] ) &&
( ( int ) dst->ra_escr[1] != -1 ) ) {
dst->ra_selector &= ~dst->ra_bits.counter[1];
dst->ra_escr[1] = -1;
}
/* remove any remaining shared counters */
shared = ( dst->ra_selector & src->ra_selector );
if ( shared )
dst->ra_selector ^= shared;
}
/* recompute rank */
for ( i = 0, dst->ra_rank = 0; i < MAX_COUNTERS; i++ )
if ( dst->ra_selector & ( 1 << i ) )
dst->ra_rank++;
#ifdef DEBUG
SUBDBG( "new dst\n" );
print_alloc( dst );
#endif
} else {
shared = dst->ra_selector & src->ra_selector;
if ( shared )
dst->ra_selector ^= shared;
for ( i = 0, dst->ra_rank = 0; i < MAX_COUNTERS; i++ )
if ( dst->ra_selector & ( 1 << i ) )
dst->ra_rank++;
}
}

| static void _bpt_map_set | ( | hwd_reg_alloc_t * | dst, |
| int | ctr | ||
| ) | [static] |
Definition at line 275 of file perfctr-x86.c.
{
dst->ra_selector = ( unsigned int ) ( 1 << ctr );
dst->ra_rank = 1;
if ( is_pentium4() ) {
/* Pentium 4 requires that both an escr and a counter are selected.
Find which counter mask contains this counter.
Set the opposite escr to empty (-1) */
if ( dst->ra_bits.counter[0] & dst->ra_selector )
dst->ra_escr[1] = -1;
else
dst->ra_escr[0] = -1;
}
}

| static int _bpt_map_shared | ( | hwd_reg_alloc_t * | dst, |
| hwd_reg_alloc_t * | src | ||
| ) | [static] |
Definition at line 305 of file perfctr-x86.c.
{
if ( is_pentium4() ) {
int retval1, retval2;
/* Pentium 4 needs to check for conflict of both counters and esc registers */
/* selectors must share bits */
retval1 = ( ( dst->ra_selector & src->ra_selector ) ||
/* or escrs must equal each other and not be set to -1 */
( ( dst->ra_escr[0] == src->ra_escr[0] ) &&
( ( int ) dst->ra_escr[0] != -1 ) ) ||
( ( dst->ra_escr[1] == src->ra_escr[1] ) &&
( ( int ) dst->ra_escr[1] != -1 ) ) );
/* Pentium 4 also needs to check for conflict on pebs registers */
/* pebs enables must both be non-zero */
retval2 =
( ( ( dst->ra_bits.pebs_enable && src->ra_bits.pebs_enable ) &&
/* and not equal to each other */
( dst->ra_bits.pebs_enable != src->ra_bits.pebs_enable ) ) ||
/* same for pebs_matrix_vert */
( ( dst->ra_bits.pebs_matrix_vert &&
src->ra_bits.pebs_matrix_vert ) &&
( dst->ra_bits.pebs_matrix_vert !=
src->ra_bits.pebs_matrix_vert ) ) );
if ( retval2 ) {
SUBDBG( "pebs conflict!\n" );
}
return ( retval1 | retval2 );
}
return ( int ) ( dst->ra_selector & src->ra_selector );
}

| static void _bpt_map_update | ( | hwd_reg_alloc_t * | dst, |
| hwd_reg_alloc_t * | src | ||
| ) | [static] |
Definition at line 406 of file perfctr-x86.c.
{
dst->ra_selector = src->ra_selector;
if ( is_pentium4() ) {
dst->ra_escr[0] = src->ra_escr[0];
dst->ra_escr[1] = src->ra_escr[1];
}
}

| int _papi_libpfm_ntv_code_to_bits_perfctr | ( | unsigned int | EventCode, |
| hwd_register_t * | newbits | ||
| ) |
Definition at line 1015 of file perfctr-x86.c.
{
unsigned int event, umask;
X86_register_t *bits = (X86_register_t *)newbits;
if ( is_pentium4() ) {
pentium4_escr_value_t escr_value;
pentium4_cccr_value_t cccr_value;
unsigned int num_masks, replay_mask, unit_masks[12];
unsigned int event_mask;
unsigned int tag_value, tag_enable;
unsigned int i;
int j, escr, cccr, pmd;
if ( _pfm_decode_native_event( EventCode, &event, &umask ) != PAPI_OK )
return PAPI_ENOEVNT;
/* for each allowed escr (1 or 2) find the allowed cccrs.
for each allowed cccr find the pmd index
convert to an intel counter number; or it into bits->counter */
for ( i = 0; i < MAX_ESCRS_PER_EVENT; i++ ) {
bits->counter[i] = 0;
escr = pentium4_events[event].allowed_escrs[i];
if ( escr < 0 ) {
continue;
}
bits->escr[i] = escr;
for ( j = 0; j < MAX_CCCRS_PER_ESCR; j++ ) {
cccr = pentium4_escrs[escr].allowed_cccrs[j];
if ( cccr < 0 ) {
continue;
}
pmd = pentium4_cccrs[cccr].pmd;
bits->counter[i] |= ( 1 << pfm2intel[pmd] );
}
}
/* if there's only one valid escr, copy the values */
if ( escr < 0 ) {
bits->escr[1] = bits->escr[0];
bits->counter[1] = bits->counter[0];
}
/* Calculate the event-mask value. Invalid masks
* specified by the caller are ignored. */
tag_value = 0;
tag_enable = 0;
event_mask = _pfm_convert_umask( event, umask );
if ( event_mask & 0xF0000 ) {
tag_enable = 1;
tag_value = ( ( event_mask & 0xF0000 ) >> EVENT_MASK_BITS );
}
event_mask &= 0x0FFFF; /* mask off possible tag bits */
/* Set up the ESCR and CCCR register values. */
escr_value.val = 0;
escr_value.bits.t1_usr = 0; /* controlled by kernel */
escr_value.bits.t1_os = 0; /* controlled by kernel */
// escr_value.bits.t0_usr = (plm & PFM_PLM3) ? 1 : 0;
// escr_value.bits.t0_os = (plm & PFM_PLM0) ? 1 : 0;
escr_value.bits.tag_enable = tag_enable;
escr_value.bits.tag_value = tag_value;
escr_value.bits.event_mask = event_mask;
escr_value.bits.event_select = pentium4_events[event].event_select;
escr_value.bits.reserved = 0;
/* initialize the proper bits in the cccr register */
cccr_value.val = 0;
cccr_value.bits.reserved1 = 0;
cccr_value.bits.enable = 1;
cccr_value.bits.escr_select = pentium4_events[event].escr_select;
cccr_value.bits.active_thread = 3;
/* FIXME: This is set to count when either logical
* CPU is active. Need a way to distinguish
* between logical CPUs when HT is enabled.
* the docs say these bits should always
* be set. */
cccr_value.bits.compare = 0;
/* FIXME: What do we do with "threshold" settings? */
cccr_value.bits.complement = 0;
/* FIXME: What do we do with "threshold" settings? */
cccr_value.bits.threshold = 0;
/* FIXME: What do we do with "threshold" settings? */
cccr_value.bits.force_ovf = 0;
/* FIXME: Do we want to allow "forcing" overflow
* interrupts on all counter increments? */
cccr_value.bits.ovf_pmi_t0 = 0;
cccr_value.bits.ovf_pmi_t1 = 0;
/* PMI taken care of by kernel typically */
cccr_value.bits.reserved2 = 0;
cccr_value.bits.cascade = 0;
/* FIXME: How do we handle "cascading" counters? */
cccr_value.bits.overflow = 0;
/* these flags are always zero, from what I can tell... */
bits->pebs_enable = 0; /* flag for PEBS counting */
bits->pebs_matrix_vert = 0;
/* flag for PEBS_MATRIX_VERT, whatever that is */
/* ...unless the event is replay_event */
if ( !strcmp( pentium4_events[event].name, "replay_event" ) ) {
escr_value.bits.event_mask = event_mask & P4_REPLAY_REAL_MASK;
num_masks = prepare_umask( umask, unit_masks );
for ( i = 0; i < num_masks; i++ ) {
replay_mask = unit_masks[i];
if ( replay_mask > 1 && replay_mask < 11 ) {
/* process each valid mask we find */
bits->pebs_enable |= p4_replay_regs[replay_mask].enb;
bits->pebs_matrix_vert |= p4_replay_regs[replay_mask].mat_vert;
}
}
}
/* store the escr and cccr values */
bits->event = escr_value.val;
bits->cccr = cccr_value.val;
bits->ireset = 0; /* I don't really know what this does */
SUBDBG( "escr: 0x%lx; cccr: 0x%lx\n", escr_value.val, cccr_value.val );
} else {
int ret, code;
if ( _pfm_decode_native_event( EventCode, &event, &umask ) != PAPI_OK )
return PAPI_ENOEVNT;
if ( ( ret = _pfm_get_counter_info( event, &bits->selector,
&code ) ) != PAPI_OK )
return ret;
bits->counter_cmd=(int) (code | ((_pfm_convert_umask(event,umask))<< 8) );
SUBDBG( "selector: %#x\n", bits->selector );
SUBDBG( "event: %#x; umask: %#x; code: %#x; cmd: %#x\n", event,
umask, code, ( ( hwd_register_t * ) bits )->counter_cmd );
}
return PAPI_OK;
}

| int _perfctr_ctl | ( | hwd_context_t * | ctx, |
| int | code, | ||
| _papi_int_option_t * | option | ||
| ) |
Definition at line 289 of file perfctr.c.
{
( void ) ctx; /*unused */
switch ( code ) {
case PAPI_DOMAIN:
case PAPI_DEFDOM:
#if defined(PPC64)
return ( _perfctr_vector.
set_domain( option->domain.ESI, option->domain.domain ) );
#else
return ( _perfctr_vector.
set_domain( option->domain.ESI->ctl_state,
option->domain.domain ) );
#endif
case PAPI_GRANUL:
case PAPI_DEFGRN:
return PAPI_ECMP;
case PAPI_ATTACH:
return ( attach( option->attach.ESI->ctl_state, option->attach.tid ) );
case PAPI_DETACH:
return ( detach( option->attach.ESI->ctl_state ) );
case PAPI_DEF_ITIMER:
{
/* flags are currently ignored, eventually the flags will be able
to specify whether or not we use POSIX itimers (clock_gettimer) */
if ( ( option->itimer.itimer_num == ITIMER_REAL ) &&
( option->itimer.itimer_sig != SIGALRM ) )
return PAPI_EINVAL;
if ( ( option->itimer.itimer_num == ITIMER_VIRTUAL ) &&
( option->itimer.itimer_sig != SIGVTALRM ) )
return PAPI_EINVAL;
if ( ( option->itimer.itimer_num == ITIMER_PROF ) &&
( option->itimer.itimer_sig != SIGPROF ) )
return PAPI_EINVAL;
if ( option->itimer.ns > 0 )
option->itimer.ns = round_requested_ns( option->itimer.ns );
/* At this point, we assume the user knows what he or
she is doing, they maybe doing something arch specific */
return PAPI_OK;
}
case PAPI_DEF_MPX_NS:
{
option->multiplex.ns =
( unsigned long ) round_requested_ns( ( int ) option->multiplex.
ns );
return ( PAPI_OK );
}
case PAPI_DEF_ITIMER_NS:
{
option->itimer.ns = round_requested_ns( option->itimer.ns );
return ( PAPI_OK );
}
default:
return ( PAPI_ENOSUPP );
}
}

| void _perfctr_dispatch_timer | ( | int | signal, |
| hwd_siginfo_t * | si, | ||
| void * | context | ||
| ) |
| int _perfctr_init_component | ( | int | ) |
Definition at line 107 of file perfctr.c.
{
int retval;
struct perfctr_info info;
char abiv[PAPI_MIN_STR_LEN];
#if defined(PERFCTR26)
int fd;
#else
struct vperfctr *dev;
#endif
#if defined(PERFCTR26)
/* Get info from the kernel */
/* Use lower level calls per Mikael to get the perfctr info
without actually creating a new kernel-side state.
Also, close the fd immediately after retrieving the info.
This is much lighter weight and doesn't reserve the counter
resources. Also compatible with perfctr 2.6.14.
*/
fd = _vperfctr_open( 0 );
if ( fd < 0 ) {
strncpy(_perfctr_vector.cmp_info.disabled_reason,
VOPEN_ERROR,PAPI_MAX_STR_LEN);
return PAPI_ESYS;
}
retval = perfctr_info( fd, &info );
close( fd );
if ( retval < 0 ) {
strncpy(_perfctr_vector.cmp_info.disabled_reason,
VINFO_ERROR,PAPI_MAX_STR_LEN);
return PAPI_ESYS;
}
/* copy tsc multiplier to local variable */
/* this field appears in perfctr 2.6 and higher */
tb_scale_factor = ( long long ) info.tsc_to_cpu_mult;
#else
/* Opened once for all threads. */
if ( ( dev = vperfctr_open( ) ) == NULL ) {
strncpy(_perfctr_vector.cmp_info.disabled_reason,
VOPEN_ERROR,PAPI_MAX_STR_LEN);
return PAPI_ESYS;
}
SUBDBG( "_perfctr_init_component vperfctr_open = %p\n", dev );
/* Get info from the kernel */
retval = vperfctr_info( dev, &info );
if ( retval < 0 ) {
strncpy(_perfctr_vector.cmp_info.disabled_reason,
VINFO_ERROR,PAPI_MAX_STR_LEN);
return ( PAPI_ESYS );
}
vperfctr_close( dev );
#endif
/* Fill in what we can of the papi_system_info. */
retval = _papi_os_vector.get_system_info( &_papi_hwi_system_info );
if ( retval != PAPI_OK )
return ( retval );
/* Setup memory info */
retval = _papi_os_vector.get_memory_info( &_papi_hwi_system_info.hw_info,
( int ) info.cpu_type );
if ( retval )
return ( retval );
strcpy( _perfctr_vector.cmp_info.name,"perfctr.c" );
strcpy( _perfctr_vector.cmp_info.version, "$Revision$" );
sprintf( abiv, "0x%08X", info.abi_version );
strcpy( _perfctr_vector.cmp_info.support_version, abiv );
strcpy( _perfctr_vector.cmp_info.kernel_version, info.driver_version );
_perfctr_vector.cmp_info.CmpIdx = cidx;
_perfctr_vector.cmp_info.num_cntrs = ( int ) PERFCTR_CPU_NRCTRS( &info );
_perfctr_vector.cmp_info.num_mpx_cntrs=_perfctr_vector.cmp_info.num_cntrs;
if ( info.cpu_features & PERFCTR_FEATURE_RDPMC )
_perfctr_vector.cmp_info.fast_counter_read = 1;
else
_perfctr_vector.cmp_info.fast_counter_read = 0;
_perfctr_vector.cmp_info.fast_real_timer = 1;
_perfctr_vector.cmp_info.fast_virtual_timer = 1;
_perfctr_vector.cmp_info.attach = 1;
_perfctr_vector.cmp_info.attach_must_ptrace = 1;
_perfctr_vector.cmp_info.default_domain = PAPI_DOM_USER;
#if !defined(PPC64)
/* AMD and Intel ia386 processors all support unit mask bits */
_perfctr_vector.cmp_info.cntr_umasks = 1;
#endif
#if defined(PPC64)
_perfctr_vector.cmp_info.available_domains =
PAPI_DOM_USER | PAPI_DOM_KERNEL | PAPI_DOM_SUPERVISOR;
#else
_perfctr_vector.cmp_info.available_domains = PAPI_DOM_USER | PAPI_DOM_KERNEL;
#endif
_perfctr_vector.cmp_info.default_granularity = PAPI_GRN_THR;
_perfctr_vector.cmp_info.available_granularities = PAPI_GRN_THR;
if ( info.cpu_features & PERFCTR_FEATURE_PCINT )
_perfctr_vector.cmp_info.hardware_intr = 1;
else
_perfctr_vector.cmp_info.hardware_intr = 0;
SUBDBG( "Hardware/OS %s support counter generated interrupts\n",
_perfctr_vector.cmp_info.hardware_intr ? "does" : "does not" );
strcpy( _papi_hwi_system_info.hw_info.model_string,
PERFCTR_CPU_NAME( &info ) );
_papi_hwi_system_info.hw_info.model = ( int ) info.cpu_type;
#if defined(PPC64)
_papi_hwi_system_info.hw_info.vendor = PAPI_VENDOR_IBM;
if ( strlen( _papi_hwi_system_info.hw_info.vendor_string ) == 0 )
strcpy( _papi_hwi_system_info.hw_info.vendor_string, "IBM" );
#else
_papi_hwi_system_info.hw_info.vendor =
xlate_cpu_type_to_vendor( info.cpu_type );
#endif
/* Setup presets last. Some platforms depend on earlier info */
#if !defined(PPC64)
// retval = setup_p3_vector_table(vtable);
if ( !retval )
retval = _papi_libpfm_init(&_perfctr_vector, cidx );
#else
/* Setup native and preset events */
// retval = ppc64_setup_vector_table(vtable);
if ( !retval )
retval = perfctr_ppc64_setup_native_table( );
if ( !retval )
retval = setup_ppc64_presets( info.cpu_type, cidx );
#endif
if ( retval )
return ( retval );
return ( PAPI_OK );
}

| int _perfctr_init_thread | ( | hwd_context_t * | ctx | ) |
Definition at line 380 of file perfctr.c.
{
struct vperfctr_control tmp;
int error;
/* Initialize our thread/process pointer. */
if ( ( ctx->perfctr = vperfctr_open( ) ) == NULL ) {
#ifdef VPERFCTR_OPEN_CREAT_EXCL
/* New versions of perfctr have this, which allows us to
get a previously created context, i.e. one created after
a fork and now we're inside a new process that has been exec'd */
if ( errno ) {
if ( ( ctx->perfctr = vperfctr_open_mode( 0 ) ) == NULL ) {
return PAPI_ESYS;
}
} else {
return PAPI_ESYS;
}
#else
return PAPI_ESYS;
#endif
}
SUBDBG( "_papi_hwd_init vperfctr_open() = %p\n", ctx->perfctr );
/* Initialize the per thread/process virtualized TSC */
memset( &tmp, 0x0, sizeof ( tmp ) );
tmp.cpu_control.tsc_on = 1;
#ifdef VPERFCTR_CONTROL_CLOEXEC
tmp.flags = VPERFCTR_CONTROL_CLOEXEC;
SUBDBG( "close on exec\t\t\t%u\n", tmp.flags );
#endif
/* Start the per thread/process virtualized TSC */
error = vperfctr_control( ctx->perfctr, &tmp );
if ( error < 0 ) {
SUBDBG( "starting virtualized TSC; vperfctr_control returns %d\n",
error );
return PAPI_ESYS;
}
return PAPI_OK;
}
| int _perfctr_shutdown_thread | ( | hwd_context_t * | ctx | ) |
Definition at line 428 of file perfctr.c.
{
#ifdef DEBUG
int retval = vperfctr_unlink( ctx->perfctr );
SUBDBG( "_papi_hwd_shutdown vperfctr_unlink(%p) = %d\n", ctx->perfctr,
retval );
#else
vperfctr_unlink( ctx->perfctr );
#endif
vperfctr_close( ctx->perfctr );
SUBDBG( "_perfctr_shutdown vperfctr_close(%p)\n", ctx->perfctr );
memset( ctx, 0x0, sizeof ( hwd_context_t ) );
return ( PAPI_OK );
}
| static int _pfm_get_counter_info | ( | unsigned int | event, |
| unsigned int * | selector, | ||
| int * | code | ||
| ) | [static] |
Definition at line 970 of file perfctr-x86.c.
{
pfmlib_regmask_t cnt, impl;
unsigned int num;
unsigned int i, first = 1;
int ret;
if ( ( ret = pfm_get_event_counters( event, &cnt ) ) != PFMLIB_SUCCESS ) {
PAPIERROR( "pfm_get_event_counters(%d,%p): %s", event, &cnt,
pfm_strerror( ret ) );
return PAPI_ESYS;
}
if ( ( ret = pfm_get_num_counters( &num ) ) != PFMLIB_SUCCESS ) {
PAPIERROR( "pfm_get_num_counters(%p): %s", num, pfm_strerror( ret ) );
return PAPI_ESYS;
}
if ( ( ret = pfm_get_impl_counters( &impl ) ) != PFMLIB_SUCCESS ) {
PAPIERROR( "pfm_get_impl_counters(%p): %s", &impl,
pfm_strerror( ret ) );
return PAPI_ESYS;
}
*selector = 0;
for ( i = 0; num; i++ ) {
if ( pfm_regmask_isset( &impl, i ) )
num--;
if ( pfm_regmask_isset( &cnt, i ) ) {
if ( first ) {
if ( ( ret =
pfm_get_event_code_counter( event, i,
code ) ) !=
PFMLIB_SUCCESS ) {
PAPIERROR( "pfm_get_event_code_counter(%d, %d, %p): %s",
event, i, code, pfm_strerror( ret ) );
return PAPI_ESYS;
}
first = 0;
}
*selector |= 1 << i;
}
}
return PAPI_OK;
}


| static int _x86_allocate_registers | ( | EventSetInfo_t * | ESI | ) | [static] |
Definition at line 418 of file perfctr-x86.c.
{
int i, j, natNum;
hwd_reg_alloc_t event_list[MAX_COUNTERS];
hwd_register_t *ptr;
/* Initialize the local structure needed
for counter allocation and optimization. */
natNum = ESI->NativeCount;
if ( is_pentium4() ) {
SUBDBG( "native event count: %d\n", natNum );
}
for ( i = 0; i < natNum; i++ ) {
/* retrieve the mapping information about this native event */
_papi_libpfm_ntv_code_to_bits( ( unsigned int ) ESI->NativeInfoArray[i].
ni_event, &event_list[i].ra_bits );
if ( is_pentium4() ) {
/* combine counter bit masks for both esc registers into selector */
event_list[i].ra_selector =
event_list[i].ra_bits.counter[0] | event_list[i].ra_bits.
counter[1];
} else {
/* make sure register allocator only looks at legal registers */
event_list[i].ra_selector =
event_list[i].ra_bits.selector & ALLCNTRS;
#ifdef PERFCTR_X86_INTEL_CORE2
if ( _papi_hwi_system_info.hw_info.model ==
PERFCTR_X86_INTEL_CORE2 )
event_list[i].ra_selector |=
( ( event_list[i].ra_bits.
selector >> 16 ) << 2 ) & ALLCNTRS;
#endif
}
/* calculate native event rank, which is no. of counters it can live on */
event_list[i].ra_rank = 0;
for ( j = 0; j < MAX_COUNTERS; j++ ) {
if ( event_list[i].ra_selector & ( 1 << j ) ) {
event_list[i].ra_rank++;
}
}
if ( is_pentium4() ) {
event_list[i].ra_escr[0] = event_list[i].ra_bits.escr[0];
event_list[i].ra_escr[1] = event_list[i].ra_bits.escr[1];
#ifdef DEBUG
SUBDBG( "i: %d\n", i );
print_alloc( &event_list[i] );
#endif
}
}
if ( _papi_bipartite_alloc( event_list, natNum, ESI->CmpIdx ) ) { /* successfully mapped */
for ( i = 0; i < natNum; i++ ) {
#ifdef PERFCTR_X86_INTEL_CORE2
if ( _papi_hwi_system_info.hw_info.model ==
PERFCTR_X86_INTEL_CORE2 )
event_list[i].ra_bits.selector = event_list[i].ra_selector;
#endif
#ifdef DEBUG
if ( is_pentium4() ) {
SUBDBG( "i: %d\n", i );
print_alloc( &event_list[i] );
}
#endif
/* Copy all info about this native event to the NativeInfo struct */
ptr = ESI->NativeInfoArray[i].ni_bits;
*ptr = event_list[i].ra_bits;
if ( is_pentium4() ) {
/* The selector contains the counter bit position. Turn it into a number
and store it in the first counter value, zeroing the second. */
ptr->counter[0] = ffs( event_list[i].ra_selector ) - 1;
ptr->counter[1] = 0;
}
/* Array order on perfctr is event ADD order, not counter #... */
ESI->NativeInfoArray[i].ni_position = i;
}
return PAPI_OK;
} else
return PAPI_ECNFLCT;
}

| static int _x86_init_control_state | ( | hwd_control_state_t * | ptr | ) | [static] |
Definition at line 119 of file perfctr-x86.c.
{
int i, def_mode = 0;
if ( is_pentium4() ) {
if ( _perfctr_vector.cmp_info.default_domain & PAPI_DOM_USER )
def_mode |= ESCR_T0_USR;
if ( _perfctr_vector.cmp_info.default_domain & PAPI_DOM_KERNEL )
def_mode |= ESCR_T0_OS;
for ( i = 0; i < _perfctr_vector.cmp_info.num_cntrs; i++ ) {
ptr->control.cpu_control.evntsel_aux[i] |= def_mode;
}
ptr->control.cpu_control.tsc_on = 1;
ptr->control.cpu_control.nractrs = 0;
ptr->control.cpu_control.nrictrs = 0;
#ifdef VPERFCTR_CONTROL_CLOEXEC
ptr->control.flags = VPERFCTR_CONTROL_CLOEXEC;
SUBDBG( "close on exec\t\t\t%u\n", ptr->control.flags );
#endif
} else {
if ( _perfctr_vector.cmp_info.default_domain & PAPI_DOM_USER )
def_mode |= PERF_USR;
if ( _perfctr_vector.cmp_info.default_domain & PAPI_DOM_KERNEL )
def_mode |= PERF_OS;
ptr->allocated_registers.selector = 0;
switch ( _papi_hwi_system_info.hw_info.model ) {
case PERFCTR_X86_GENERIC:
case PERFCTR_X86_WINCHIP_C6:
case PERFCTR_X86_WINCHIP_2:
case PERFCTR_X86_VIA_C3:
case PERFCTR_X86_INTEL_P5:
case PERFCTR_X86_INTEL_P5MMX:
case PERFCTR_X86_INTEL_PII:
case PERFCTR_X86_INTEL_P6:
case PERFCTR_X86_INTEL_PIII:
#ifdef PERFCTR_X86_INTEL_CORE
case PERFCTR_X86_INTEL_CORE:
#endif
#ifdef PERFCTR_X86_INTEL_PENTM
case PERFCTR_X86_INTEL_PENTM:
#endif
ptr->control.cpu_control.evntsel[0] |= PERF_ENABLE;
for ( i = 0; i < _perfctr_vector.cmp_info.num_cntrs; i++ ) {
ptr->control.cpu_control.evntsel[i] |= def_mode;
ptr->control.cpu_control.pmc_map[i] = ( unsigned int ) i;
}
break;
#ifdef PERFCTR_X86_INTEL_CORE2
case PERFCTR_X86_INTEL_CORE2:
#endif
#ifdef PERFCTR_X86_INTEL_ATOM
case PERFCTR_X86_INTEL_ATOM:
#endif
#ifdef PERFCTR_X86_INTEL_NHLM
case PERFCTR_X86_INTEL_NHLM:
#endif
#ifdef PERFCTR_X86_INTEL_WSTMR
case PERFCTR_X86_INTEL_WSTMR:
#endif
#ifdef PERFCTR_X86_AMD_K8
case PERFCTR_X86_AMD_K8:
#endif
#ifdef PERFCTR_X86_AMD_K8C
case PERFCTR_X86_AMD_K8C:
#endif
#ifdef PERFCTR_X86_AMD_FAM10H /* this is defined in perfctr 2.6.29 */
case PERFCTR_X86_AMD_FAM10H:
#endif
case PERFCTR_X86_AMD_K7:
for ( i = 0; i < _perfctr_vector.cmp_info.num_cntrs; i++ ) {
ptr->control.cpu_control.evntsel[i] |= PERF_ENABLE | def_mode;
ptr->control.cpu_control.pmc_map[i] = ( unsigned int ) i;
}
break;
}
#ifdef VPERFCTR_CONTROL_CLOEXEC
ptr->control.flags = VPERFCTR_CONTROL_CLOEXEC;
SUBDBG( "close on exec\t\t\t%u\n", ptr->control.flags );
#endif
/* Make sure the TSC is always on */
ptr->control.cpu_control.tsc_on = 1;
}
return ( PAPI_OK );
}

| static int _x86_read | ( | hwd_context_t * | ctx, |
| hwd_control_state_t * | spc, | ||
| long long ** | dp, | ||
| int | flags | ||
| ) | [static] |
Definition at line 701 of file perfctr-x86.c.
{
if ( flags & PAPI_PAUSED ) {
vperfctr_read_state( ctx->perfctr, &spc->state, NULL );
if ( !is_pentium4() ) {
unsigned int i = 0;
for ( i = 0;
i <
spc->control.cpu_control.nractrs +
spc->control.cpu_control.nrictrs; i++ ) {
SUBDBG( "vperfctr_read_state: counter %d = %lld\n", i,
spc->state.pmc[i] );
}
}
} else {
SUBDBG( "vperfctr_read_ctrs\n" );
if ( spc->rvperfctr != NULL ) {
rvperfctr_read_ctrs( spc->rvperfctr, &spc->state );
} else {
vperfctr_read_ctrs( ctx->perfctr, &spc->state );
}
}
*dp = ( long long * ) spc->state.pmc;
#ifdef DEBUG
{
if ( ISLEVEL( DEBUG_SUBSTRATE ) ) {
unsigned int i;
if ( is_pentium4() ) {
for ( i = 0; i < spc->control.cpu_control.nractrs; i++ ) {
SUBDBG( "raw val hardware index %d is %lld\n", i,
( long long ) spc->state.pmc[i] );
}
} else {
for ( i = 0;
i <
spc->control.cpu_control.nractrs +
spc->control.cpu_control.nrictrs; i++ ) {
SUBDBG( "raw val hardware index %d is %lld\n", i,
( long long ) spc->state.pmc[i] );
}
}
}
}
#endif
return ( PAPI_OK );
}

| static int _x86_reset | ( | hwd_context_t * | ctx, |
| hwd_control_state_t * | cntrl | ||
| ) | [static] |
Definition at line 750 of file perfctr-x86.c.
{
return ( _x86_start( ctx, cntrl ) );
}

| int _x86_set_domain | ( | hwd_control_state_t * | cntrl, |
| int | domain | ||
| ) |
Definition at line 210 of file perfctr-x86.c.
{
int i, did = 0;
int num_cntrs = _perfctr_vector.cmp_info.num_cntrs;
/* Clear the current domain set for this event set */
/* We don't touch the Enable bit in this code */
if ( is_pentium4() ) {
for ( i = 0; i < _perfctr_vector.cmp_info.num_cntrs; i++ ) {
cntrl->control.cpu_control.evntsel_aux[i] &=
~( ESCR_T0_OS | ESCR_T0_USR );
}
if ( domain & PAPI_DOM_USER ) {
did = 1;
for ( i = 0; i < _perfctr_vector.cmp_info.num_cntrs; i++ ) {
cntrl->control.cpu_control.evntsel_aux[i] |= ESCR_T0_USR;
}
}
if ( domain & PAPI_DOM_KERNEL ) {
did = 1;
for ( i = 0; i < _perfctr_vector.cmp_info.num_cntrs; i++ ) {
cntrl->control.cpu_control.evntsel_aux[i] |= ESCR_T0_OS;
}
}
} else {
for ( i = 0; i < num_cntrs; i++ ) {
cntrl->control.cpu_control.evntsel[i] &= ~( PERF_OS | PERF_USR );
}
if ( domain & PAPI_DOM_USER ) {
did = 1;
for ( i = 0; i < num_cntrs; i++ ) {
cntrl->control.cpu_control.evntsel[i] |= PERF_USR;
}
}
if ( domain & PAPI_DOM_KERNEL ) {
did = 1;
for ( i = 0; i < num_cntrs; i++ ) {
cntrl->control.cpu_control.evntsel[i] |= PERF_OS;
}
}
}
if ( !did )
return ( PAPI_EINVAL );
else
return ( PAPI_OK );
}

| static int _x86_set_overflow | ( | EventSetInfo_t * | ESI, |
| int | EventIndex, | ||
| int | threshold | ||
| ) | [static] |
Definition at line 805 of file perfctr-x86.c.
{
hwd_control_state_t *ctl = ( hwd_control_state_t * ) ( ESI->ctl_state );
struct hwd_pmc_control *contr = &(ctl->control);
int i, ncntrs, nricntrs = 0, nracntrs = 0, retval = 0;
OVFDBG( "EventIndex=%d\n", EventIndex );
#ifdef DEBUG
if ( is_pentium4() )
print_control( &(contr->cpu_control) );
#endif
/* The correct event to overflow is EventIndex */
ncntrs = _perfctr_vector.cmp_info.num_cntrs;
i = ESI->EventInfoArray[EventIndex].pos[0];
if ( i >= ncntrs ) {
PAPIERROR( "Selector id %d is larger than ncntrs %d", i, ncntrs );
return PAPI_EINVAL;
}
if ( threshold != 0 ) { /* Set an overflow threshold */
retval = _papi_hwi_start_signal( _perfctr_vector.cmp_info.hardware_intr_sig,
NEED_CONTEXT,
_perfctr_vector.cmp_info.CmpIdx );
if ( retval != PAPI_OK )
return ( retval );
/* overflow interrupt occurs on the NEXT event after overflow occurs
thus we subtract 1 from the threshold. */
contr->cpu_control.ireset[i] = ( -threshold + 1 );
if ( is_pentium4() )
contr->cpu_control.evntsel[i] |= CCCR_OVF_PMI_T0;
else
contr->cpu_control.evntsel[i] |= PERF_INT_ENABLE;
contr->cpu_control.nrictrs++;
contr->cpu_control.nractrs--;
nricntrs = ( int ) contr->cpu_control.nrictrs;
nracntrs = ( int ) contr->cpu_control.nractrs;
contr->si_signo = _perfctr_vector.cmp_info.hardware_intr_sig;
/* move this event to the bottom part of the list if needed */
if ( i < nracntrs )
swap_events( ESI, contr, i, nracntrs );
OVFDBG( "Modified event set\n" );
} else {
if ( is_pentium4() && contr->cpu_control.evntsel[i] & CCCR_OVF_PMI_T0 ) {
contr->cpu_control.ireset[i] = 0;
contr->cpu_control.evntsel[i] &= ( ~CCCR_OVF_PMI_T0 );
contr->cpu_control.nrictrs--;
contr->cpu_control.nractrs++;
} else if ( !is_pentium4() &&
contr->cpu_control.evntsel[i] & PERF_INT_ENABLE ) {
contr->cpu_control.ireset[i] = 0;
contr->cpu_control.evntsel[i] &= ( ~PERF_INT_ENABLE );
contr->cpu_control.nrictrs--;
contr->cpu_control.nractrs++;
}
nricntrs = ( int ) contr->cpu_control.nrictrs;
nracntrs = ( int ) contr->cpu_control.nractrs;
/* move this event to the top part of the list if needed */
if ( i >= nracntrs )
swap_events( ESI, contr, i, nracntrs - 1 );
if ( !nricntrs )
contr->si_signo = 0;
OVFDBG( "Modified event set\n" );
retval = _papi_hwi_stop_signal( _perfctr_vector.cmp_info.hardware_intr_sig );
}
#ifdef DEBUG
if ( is_pentium4() )
print_control( &(contr->cpu_control) );
#endif
OVFDBG( "End of call. Exit code: %d\n", retval );
return ( retval );
}

| static int _x86_start | ( | hwd_context_t * | ctx, |
| hwd_control_state_t * | state | ||
| ) | [static] |
Definition at line 653 of file perfctr-x86.c.
{
int error;
#ifdef DEBUG
print_control( &state->control.cpu_control );
#endif
if ( state->rvperfctr != NULL ) {
if ( ( error =
rvperfctr_control( state->rvperfctr, &state->control ) ) < 0 ) {
SUBDBG( "rvperfctr_control returns: %d\n", error );
PAPIERROR( RCNTRL_ERROR );
return ( PAPI_ESYS );
}
return ( PAPI_OK );
}
if ( ( error = vperfctr_control( ctx->perfctr, &state->control ) ) < 0 ) {
SUBDBG( "vperfctr_control returns: %d\n", error );
PAPIERROR( VCNTRL_ERROR );
return ( PAPI_ESYS );
}
return ( PAPI_OK );
}


| static int _x86_stop | ( | hwd_context_t * | ctx, |
| hwd_control_state_t * | state | ||
| ) | [static] |
Definition at line 679 of file perfctr-x86.c.
{
int error;
if ( state->rvperfctr != NULL ) {
if ( rvperfctr_stop( ( struct rvperfctr * ) ctx->perfctr ) < 0 ) {
PAPIERROR( RCNTRL_ERROR );
return ( PAPI_ESYS );
}
return ( PAPI_OK );
}
error = vperfctr_stop( ctx->perfctr );
if ( error < 0 ) {
SUBDBG( "vperfctr_stop returns: %d\n", error );
PAPIERROR( VCNTRL_ERROR );
return ( PAPI_ESYS );
}
return ( PAPI_OK );
}

| static int _x86_stop_profiling | ( | ThreadInfo_t * | master, |
| EventSetInfo_t * | ESI | ||
| ) | [static] |
Definition at line 890 of file perfctr-x86.c.
{
( void ) master; /*unused */
( void ) ESI; /*unused */
return ( PAPI_OK );
}
| static int _x86_update_control_state | ( | hwd_control_state_t * | this_state, |
| NativeInfo_t * | native, | ||
| int | count, | ||
| hwd_context_t * | ctx | ||
| ) | [static] |
Definition at line 550 of file perfctr-x86.c.
{
( void ) ctx; /*unused */
unsigned int i, k, retval = PAPI_OK;
hwd_register_t *bits,*bits2;
struct perfctr_cpu_control *cpu_control = &this_state->control.cpu_control;
/* clear out the events from the control state */
clear_cs_events( this_state );
if ( is_pentium4() ) {
/* fill the counters we're using */
for ( i = 0; i < ( unsigned int ) count; i++ ) {
/* dereference the mapping information about this native event */
bits = native[i].ni_bits;
/* Add counter control command values to eventset */
cpu_control->pmc_map[i] = bits->counter[0];
cpu_control->evntsel[i] = bits->cccr;
cpu_control->ireset[i] = bits->ireset;
cpu_control->pmc_map[i] |= FAST_RDPMC;
cpu_control->evntsel_aux[i] |= bits->event;
/* pebs_enable and pebs_matrix_vert are shared registers used for replay_events.
Replay_events count L1 and L2 cache events. There is only one of each for
the entire eventset. Therefore, there can be only one unique replay_event
per eventset. This means L1 and L2 can't be counted together. Which stinks.
This conflict should be trapped in the allocation scheme, but we'll test for it
here too, just in case. */
if ( bits->pebs_enable ) {
/* if pebs_enable isn't set, just copy */
if ( cpu_control->p4.pebs_enable == 0 ) {
cpu_control->p4.pebs_enable = bits->pebs_enable;
/* if pebs_enable conflicts, flag an error */
} else if ( cpu_control->p4.pebs_enable != bits->pebs_enable ) {
SUBDBG
( "WARNING: P4_update_control_state -- pebs_enable conflict!" );
retval = PAPI_ECNFLCT;
}
/* if pebs_enable == bits->pebs_enable, do nothing */
}
if ( bits->pebs_matrix_vert ) {
/* if pebs_matrix_vert isn't set, just copy */
if ( cpu_control->p4.pebs_matrix_vert == 0 ) {
cpu_control->p4.pebs_matrix_vert = bits->pebs_matrix_vert;
/* if pebs_matrix_vert conflicts, flag an error */
} else if ( cpu_control->p4.pebs_matrix_vert !=
bits->pebs_matrix_vert ) {
SUBDBG
( "WARNING: P4_update_control_state -- pebs_matrix_vert conflict!" );
retval = PAPI_ECNFLCT;
}
/* if pebs_matrix_vert == bits->pebs_matrix_vert, do nothing */
}
}
this_state->control.cpu_control.nractrs = count;
/* Make sure the TSC is always on */
this_state->control.cpu_control.tsc_on = 1;
#ifdef DEBUG
print_control( &this_state->control.cpu_control );
#endif
} else {
switch ( _papi_hwi_system_info.hw_info.model ) {
#ifdef PERFCTR_X86_INTEL_CORE2
case PERFCTR_X86_INTEL_CORE2:
/* fill the counters we're using */
for ( i = 0; i < ( unsigned int ) count; i++ ) {
bits2 = native[i].ni_bits;
for ( k = 0; k < MAX_COUNTERS; k++ )
if ( bits2->selector & ( 1 << k ) ) {
break;
}
if ( k > 1 )
this_state->control.cpu_control.pmc_map[i] =
( k - 2 ) | 0x40000000;
else
this_state->control.cpu_control.pmc_map[i] = k;
/* Add counter control command values to eventset */
this_state->control.cpu_control.evntsel[i] |=
bits2->counter_cmd;
}
break;
#endif
default:
/* fill the counters we're using */
for ( i = 0; i < ( unsigned int ) count; i++ ) {
/* Add counter control command values to eventset */
bits2 = native[i].ni_bits;
this_state->control.cpu_control.evntsel[i] |=
bits2->counter_cmd;
}
}
this_state->control.cpu_control.nractrs = ( unsigned int ) count;
}
return retval;
}

| static void clear_cs_events | ( | hwd_control_state_t * | this_state | ) | [static] |
Definition at line 504 of file perfctr-x86.c.
{
unsigned int i, j;
/* total counters is sum of accumulating (nractrs) and interrupting (nrictrs) */
j = this_state->control.cpu_control.nractrs +
this_state->control.cpu_control.nrictrs;
/* Remove all counter control command values from eventset. */
for ( i = 0; i < j; i++ ) {
SUBDBG( "Clearing pmc event entry %d\n", i );
if ( is_pentium4() ) {
this_state->control.cpu_control.pmc_map[i] = 0;
this_state->control.cpu_control.evntsel[i] = 0;
this_state->control.cpu_control.evntsel_aux[i] =
this_state->control.cpu_control.
evntsel_aux[i] & ( ESCR_T0_OS | ESCR_T0_USR );
} else {
this_state->control.cpu_control.pmc_map[i] = i;
this_state->control.cpu_control.evntsel[i]
= this_state->control.cpu_control.
evntsel[i] & ( PERF_ENABLE | PERF_OS | PERF_USR );
}
this_state->control.cpu_control.ireset[i] = 0;
}
if ( is_pentium4() ) {
/* Clear pebs stuff */
this_state->control.cpu_control.p4.pebs_enable = 0;
this_state->control.cpu_control.p4.pebs_matrix_vert = 0;
}
/* clear both a and i counter counts */
this_state->control.cpu_control.nractrs = 0;
this_state->control.cpu_control.nrictrs = 0;
#ifdef DEBUG
if ( is_pentium4() )
print_control( &this_state->control.cpu_control );
#endif
}


| static int is_pentium4 | ( | void | ) | [inline, static] |
Definition at line 75 of file perfctr-x86.c.
{
if ( ( _papi_hwi_system_info.hw_info.vendor == PAPI_VENDOR_INTEL ) &&
( _papi_hwi_system_info.hw_info.cpuid_family == 15 )) {
return 1;
}
return 0;
}

| static void print_alloc | ( | X86_reg_alloc_t * | a | ) | [static] |
Definition at line 87 of file perfctr-x86.c.
{
SUBDBG( "X86_reg_alloc:\n" );
SUBDBG( " selector: %#x\n", a->ra_selector );
SUBDBG( " rank: %#x\n", a->ra_rank );
SUBDBG( " escr: %#x %#x\n", a->ra_escr[0], a->ra_escr[1] );
}

| void print_control | ( | const struct perfctr_cpu_control * | control | ) |
Definition at line 96 of file perfctr-x86.c.
{
unsigned int i;
SUBDBG( "Control used:\n" );
SUBDBG( "tsc_on\t\t\t%u\n", control->tsc_on );
SUBDBG( "nractrs\t\t\t%u\n", control->nractrs );
SUBDBG( "nrictrs\t\t\t%u\n", control->nrictrs );
for ( i = 0; i < ( control->nractrs + control->nrictrs ); ++i ) {
if ( control->pmc_map[i] >= 18 ) {
SUBDBG( "pmc_map[%u]\t\t0x%08X\n", i, control->pmc_map[i] );
} else {
SUBDBG( "pmc_map[%u]\t\t%u\n", i, control->pmc_map[i] );
}
SUBDBG( "evntsel[%u]\t\t0x%08X\n", i, control->evntsel[i] );
if ( control->ireset[i] ) {
SUBDBG( "ireset[%u]\t%d\n", i, control->ireset[i] );
}
}
}

| static void swap_events | ( | EventSetInfo_t * | ESI, |
| struct hwd_pmc_control * | contr, | ||
| int | cntr1, | ||
| int | cntr2 | ||
| ) | [static] |
Definition at line 762 of file perfctr-x86.c.
{
unsigned int ui;
int si, i, j;
for ( i = 0; i < ESI->NativeCount; i++ ) {
if ( ESI->NativeInfoArray[i].ni_position == cntr1 )
ESI->NativeInfoArray[i].ni_position = cntr2;
else if ( ESI->NativeInfoArray[i].ni_position == cntr2 )
ESI->NativeInfoArray[i].ni_position = cntr1;
}
for ( i = 0; i < ESI->NumberOfEvents; i++ ) {
for ( j = 0; ESI->EventInfoArray[i].pos[j] >= 0; j++ ) {
if ( ESI->EventInfoArray[i].pos[j] == cntr1 )
ESI->EventInfoArray[i].pos[j] = cntr2;
else if ( ESI->EventInfoArray[i].pos[j] == cntr2 )
ESI->EventInfoArray[i].pos[j] = cntr1;
}
}
ui = contr->cpu_control.pmc_map[cntr1];
contr->cpu_control.pmc_map[cntr1] = contr->cpu_control.pmc_map[cntr2];
contr->cpu_control.pmc_map[cntr2] = ui;
ui = contr->cpu_control.evntsel[cntr1];
contr->cpu_control.evntsel[cntr1] = contr->cpu_control.evntsel[cntr2];
contr->cpu_control.evntsel[cntr2] = ui;
if ( is_pentium4() ) {
ui = contr->cpu_control.evntsel_aux[cntr1];
contr->cpu_control.evntsel_aux[cntr1] =
contr->cpu_control.evntsel_aux[cntr2];
contr->cpu_control.evntsel_aux[cntr2] = ui;
}
si = contr->cpu_control.ireset[cntr1];
contr->cpu_control.ireset[cntr1] = contr->cpu_control.ireset[cntr2];
contr->cpu_control.ireset[cntr2] = si;
}


Definition at line 57 of file papi_internal.c.
Definition at line 1163 of file perfctr-x86.c.
pentium4_replay_regs_t p4_replay_regs[] [static] |
Definition at line 910 of file perfctr-x86.c.
| pentium4_cccr_reg_t pentium4_cccrs[] |
| pentium4_escr_reg_t pentium4_escrs[] |
| pentium4_event_t pentium4_events[] |
int pfm2intel[] [static] |
{ 0, 1, 4, 5, 8, 9, 12, 13, 16, 2, 3, 6, 7, 10, 11, 14, 15, 17 }
Definition at line 958 of file perfctr-x86.c.