PAPI  5.3.0.0
perfctr-x86.c
Go to the documentation of this file.
00001 /* 
00002 * File:    perfctr-x86.c
00003 * Author:  Brian Sheely
00004 *          bsheely@eecs.utk.edu
00005 * Mods:    <your name here>
00006 *          <your email address>
00007 */
00008 
00009 #include <string.h>
00010 #include <linux/unistd.h>
00011 
00012 #include "papi.h"
00013 #include "papi_memory.h"
00014 #include "papi_internal.h"
00015 #include "perfctr-x86.h"
00016 #include "perfmon/pfmlib.h"
00017 #include "extras.h"
00018 #include "papi_vector.h"
00019 #include "papi_libpfm_events.h"
00020 
00021 #include "papi_preset.h"
00022 #include "linux-memory.h"
00023 
00024 /* Contains source for the Modified Bipartite Allocation scheme */
00025 #include "papi_bipartite.h"
00026 
00027 /* Prototypes for entry points found in perfctr.c */
00028 extern int _perfctr_init_component( int );
00029 extern int _perfctr_ctl( hwd_context_t * ctx, int code,
00030                        _papi_int_option_t * option );
00031 extern void _perfctr_dispatch_timer( int signal, hwd_siginfo_t * si,
00032                                    void *context );
00033 
00034 extern int _perfctr_init_thread( hwd_context_t * ctx );
00035 extern int _perfctr_shutdown_thread( hwd_context_t * ctx );
00036 
00037 #include "linux-common.h"
00038 #include "linux-timer.h"
00039 
00040 extern papi_mdi_t _papi_hwi_system_info;
00041 
00042 extern papi_vector_t _perfctr_vector;
00043 
00044 #if defined(PERFCTR26)
00045 #define evntsel_aux p4.escr
00046 #endif
00047 
00048 #if defined(PAPI_PENTIUM4_VEC_MMX)
00049 #define P4_VEC "MMX"
00050 #else
00051 #define P4_VEC "SSE"
00052 #endif
00053 
00054 #if defined(PAPI_PENTIUM4_FP_X87)
00055 #define P4_FPU " X87"
00056 #elif defined(PAPI_PENTIUM4_FP_X87_SSE_SP)
00057 #define P4_FPU " X87 SSE_SP"
00058 #elif defined(PAPI_PENTIUM4_FP_SSE_SP_DP)
00059 #define P4_FPU " SSE_SP SSE_DP"
00060 #else
00061 #define P4_FPU " X87 SSE_DP"
00062 #endif
00063 
00064 /* CODE TO SUPPORT CUSTOMIZABLE FP COUNTS ON OPTERON */
00065 #if defined(PAPI_OPTERON_FP_RETIRED)
00066 #define AMD_FPU "RETIRED"
00067 #elif defined(PAPI_OPTERON_FP_SSE_SP)
00068 #define AMD_FPU "SSE_SP"
00069 #elif defined(PAPI_OPTERON_FP_SSE_DP)
00070 #define AMD_FPU "SSE_DP"
00071 #else
00072 #define AMD_FPU "SPECULATIVE"
00073 #endif
00074 
00075 static inline int is_pentium4(void) {
00076   if ( ( _papi_hwi_system_info.hw_info.vendor == PAPI_VENDOR_INTEL ) &&
00077        ( _papi_hwi_system_info.hw_info.cpuid_family == 15 )) {
00078     return 1;
00079   }
00080 
00081   return 0;
00082 
00083 }
00084 
00085 #ifdef DEBUG
00086 static void
00087 print_alloc( X86_reg_alloc_t * a )
00088 {
00089     SUBDBG( "X86_reg_alloc:\n" );
00090     SUBDBG( "  selector: %#x\n", a->ra_selector );
00091     SUBDBG( "  rank: %#x\n", a->ra_rank );
00092     SUBDBG( "  escr: %#x %#x\n", a->ra_escr[0], a->ra_escr[1] );
00093 }
00094 
00095 void
00096 print_control( const struct perfctr_cpu_control *control )
00097 {
00098     unsigned int i;
00099     SUBDBG( "Control used:\n" );
00100     SUBDBG( "tsc_on\t\t\t%u\n", control->tsc_on );
00101     SUBDBG( "nractrs\t\t\t%u\n", control->nractrs );
00102     SUBDBG( "nrictrs\t\t\t%u\n", control->nrictrs );
00103 
00104     for ( i = 0; i < ( control->nractrs + control->nrictrs ); ++i ) {
00105         if ( control->pmc_map[i] >= 18 ) {
00106             SUBDBG( "pmc_map[%u]\t\t0x%08X\n", i, control->pmc_map[i] );
00107         } else {
00108             SUBDBG( "pmc_map[%u]\t\t%u\n", i, control->pmc_map[i] );
00109         }
00110         SUBDBG( "evntsel[%u]\t\t0x%08X\n", i, control->evntsel[i] );
00111         if ( control->ireset[i] ) {
00112             SUBDBG( "ireset[%u]\t%d\n", i, control->ireset[i] );
00113         }
00114     }
00115 }
00116 #endif
00117 
00118 static int
00119 _x86_init_control_state( hwd_control_state_t *ptr )
00120 {
00121     int i, def_mode = 0;
00122 
00123     if ( is_pentium4() ) {
00124         if ( _perfctr_vector.cmp_info.default_domain & PAPI_DOM_USER )
00125             def_mode |= ESCR_T0_USR;
00126         if ( _perfctr_vector.cmp_info.default_domain & PAPI_DOM_KERNEL )
00127             def_mode |= ESCR_T0_OS;
00128 
00129         for ( i = 0; i < _perfctr_vector.cmp_info.num_cntrs; i++ ) {
00130             ptr->control.cpu_control.evntsel_aux[i] |= def_mode;
00131         }
00132         ptr->control.cpu_control.tsc_on = 1;
00133         ptr->control.cpu_control.nractrs = 0;
00134         ptr->control.cpu_control.nrictrs = 0;
00135 
00136 #ifdef VPERFCTR_CONTROL_CLOEXEC
00137         ptr->control.flags = VPERFCTR_CONTROL_CLOEXEC;
00138         SUBDBG( "close on exec\t\t\t%u\n", ptr->control.flags );
00139 #endif
00140     } else {
00141 
00142         if ( _perfctr_vector.cmp_info.default_domain & PAPI_DOM_USER )
00143             def_mode |= PERF_USR;
00144         if ( _perfctr_vector.cmp_info.default_domain & PAPI_DOM_KERNEL )
00145             def_mode |= PERF_OS;
00146 
00147         ptr->allocated_registers.selector = 0;
00148         switch ( _papi_hwi_system_info.hw_info.model ) {
00149         case PERFCTR_X86_GENERIC:
00150         case PERFCTR_X86_WINCHIP_C6:
00151         case PERFCTR_X86_WINCHIP_2:
00152         case PERFCTR_X86_VIA_C3:
00153         case PERFCTR_X86_INTEL_P5:
00154         case PERFCTR_X86_INTEL_P5MMX:
00155         case PERFCTR_X86_INTEL_PII:
00156         case PERFCTR_X86_INTEL_P6:
00157         case PERFCTR_X86_INTEL_PIII:
00158 #ifdef PERFCTR_X86_INTEL_CORE
00159         case PERFCTR_X86_INTEL_CORE:
00160 #endif
00161 #ifdef PERFCTR_X86_INTEL_PENTM
00162         case PERFCTR_X86_INTEL_PENTM:
00163 #endif
00164             ptr->control.cpu_control.evntsel[0] |= PERF_ENABLE;
00165             for ( i = 0; i < _perfctr_vector.cmp_info.num_cntrs; i++ ) {
00166                 ptr->control.cpu_control.evntsel[i] |= def_mode;
00167                 ptr->control.cpu_control.pmc_map[i] = ( unsigned int ) i;
00168             }
00169             break;
00170 #ifdef PERFCTR_X86_INTEL_CORE2
00171         case PERFCTR_X86_INTEL_CORE2:
00172 #endif
00173 #ifdef PERFCTR_X86_INTEL_ATOM
00174         case PERFCTR_X86_INTEL_ATOM:
00175 #endif
00176 #ifdef PERFCTR_X86_INTEL_NHLM
00177         case PERFCTR_X86_INTEL_NHLM:
00178 #endif
00179 #ifdef PERFCTR_X86_INTEL_WSTMR
00180         case PERFCTR_X86_INTEL_WSTMR:
00181 #endif
00182 #ifdef PERFCTR_X86_AMD_K8
00183         case PERFCTR_X86_AMD_K8:
00184 #endif
00185 #ifdef PERFCTR_X86_AMD_K8C
00186         case PERFCTR_X86_AMD_K8C:
00187 #endif
00188 #ifdef PERFCTR_X86_AMD_FAM10H   /* this is defined in perfctr 2.6.29 */
00189         case PERFCTR_X86_AMD_FAM10H:
00190 #endif
00191         case PERFCTR_X86_AMD_K7:
00192             for ( i = 0; i < _perfctr_vector.cmp_info.num_cntrs; i++ ) {
00193                 ptr->control.cpu_control.evntsel[i] |= PERF_ENABLE | def_mode;
00194                 ptr->control.cpu_control.pmc_map[i] = ( unsigned int ) i;
00195             }
00196             break;
00197         }
00198 #ifdef VPERFCTR_CONTROL_CLOEXEC
00199         ptr->control.flags = VPERFCTR_CONTROL_CLOEXEC;
00200         SUBDBG( "close on exec\t\t\t%u\n", ptr->control.flags );
00201 #endif
00202 
00203         /* Make sure the TSC is always on */
00204         ptr->control.cpu_control.tsc_on = 1;
00205     }
00206     return ( PAPI_OK );
00207 }
00208 
00209 int
00210 _x86_set_domain( hwd_control_state_t * cntrl, int domain )
00211 {
00212     int i, did = 0;
00213     int num_cntrs = _perfctr_vector.cmp_info.num_cntrs;
00214 
00215     /* Clear the current domain set for this event set */
00216     /* We don't touch the Enable bit in this code */
00217     if ( is_pentium4() ) {
00218         for ( i = 0; i < _perfctr_vector.cmp_info.num_cntrs; i++ ) {
00219             cntrl->control.cpu_control.evntsel_aux[i] &=
00220                 ~( ESCR_T0_OS | ESCR_T0_USR );
00221         }
00222 
00223         if ( domain & PAPI_DOM_USER ) {
00224             did = 1;
00225             for ( i = 0; i < _perfctr_vector.cmp_info.num_cntrs; i++ ) {
00226                 cntrl->control.cpu_control.evntsel_aux[i] |= ESCR_T0_USR;
00227             }
00228         }
00229 
00230         if ( domain & PAPI_DOM_KERNEL ) {
00231             did = 1;
00232             for ( i = 0; i < _perfctr_vector.cmp_info.num_cntrs; i++ ) {
00233                 cntrl->control.cpu_control.evntsel_aux[i] |= ESCR_T0_OS;
00234             }
00235         }
00236     } else {
00237         for ( i = 0; i < num_cntrs; i++ ) {
00238             cntrl->control.cpu_control.evntsel[i] &= ~( PERF_OS | PERF_USR );
00239         }
00240 
00241         if ( domain & PAPI_DOM_USER ) {
00242             did = 1;
00243             for ( i = 0; i < num_cntrs; i++ ) {
00244                 cntrl->control.cpu_control.evntsel[i] |= PERF_USR;
00245             }
00246         }
00247 
00248         if ( domain & PAPI_DOM_KERNEL ) {
00249             did = 1;
00250             for ( i = 0; i < num_cntrs; i++ ) {
00251                 cntrl->control.cpu_control.evntsel[i] |= PERF_OS;
00252             }
00253         }
00254     }
00255 
00256     if ( !did )
00257         return ( PAPI_EINVAL );
00258     else
00259         return ( PAPI_OK );
00260 }
00261 
00262 /* This function examines the event to determine
00263     if it can be mapped to counter ctr.
00264     Returns true if it can, false if it can't. */
00265 static int
00266 _bpt_map_avail( hwd_reg_alloc_t * dst, int ctr )
00267 {
00268     return ( int ) ( dst->ra_selector & ( 1 << ctr ) );
00269 }
00270 
00271 /* This function forces the event to
00272     be mapped to only counter ctr.
00273     Returns nothing.  */
00274 static void
00275 _bpt_map_set( hwd_reg_alloc_t * dst, int ctr )
00276 {
00277     dst->ra_selector = ( unsigned int ) ( 1 << ctr );
00278     dst->ra_rank = 1;
00279 
00280     if ( is_pentium4() ) {
00281         /* Pentium 4 requires that both an escr and a counter are selected.
00282            Find which counter mask contains this counter.
00283            Set the opposite escr to empty (-1) */
00284         if ( dst->ra_bits.counter[0] & dst->ra_selector )
00285             dst->ra_escr[1] = -1;
00286         else
00287             dst->ra_escr[0] = -1;
00288     }
00289 }
00290 
00291 /* This function examines the event to determine
00292    if it has a single exclusive mapping.
00293    Returns true if exlusive, false if non-exclusive.  */
00294 static int
00295 _bpt_map_exclusive( hwd_reg_alloc_t * dst )
00296 {
00297     return ( dst->ra_rank == 1 );
00298 }
00299 
00300 /* This function compares the dst and src events
00301     to determine if any resources are shared. Typically the src event
00302     is exclusive, so this detects a conflict if true.
00303     Returns true if conflict, false if no conflict.  */
00304 static int
00305 _bpt_map_shared( hwd_reg_alloc_t * dst, hwd_reg_alloc_t * src )
00306 {
00307   if ( is_pentium4() ) {
00308         int retval1, retval2;
00309         /* Pentium 4 needs to check for conflict of both counters and esc registers */
00310         /* selectors must share bits */
00311         retval1 = ( ( dst->ra_selector & src->ra_selector ) ||
00312                     /* or escrs must equal each other and not be set to -1 */
00313                     ( ( dst->ra_escr[0] == src->ra_escr[0] ) &&
00314                       ( ( int ) dst->ra_escr[0] != -1 ) ) ||
00315                     ( ( dst->ra_escr[1] == src->ra_escr[1] ) &&
00316                       ( ( int ) dst->ra_escr[1] != -1 ) ) );
00317         /* Pentium 4 also needs to check for conflict on pebs registers */
00318         /* pebs enables must both be non-zero */
00319         retval2 =
00320             ( ( ( dst->ra_bits.pebs_enable && src->ra_bits.pebs_enable ) &&
00321                 /* and not equal to each other */
00322                 ( dst->ra_bits.pebs_enable != src->ra_bits.pebs_enable ) ) ||
00323               /* same for pebs_matrix_vert */
00324               ( ( dst->ra_bits.pebs_matrix_vert &&
00325                   src->ra_bits.pebs_matrix_vert ) &&
00326                 ( dst->ra_bits.pebs_matrix_vert !=
00327                   src->ra_bits.pebs_matrix_vert ) ) );
00328         if ( retval2 ) {
00329             SUBDBG( "pebs conflict!\n" );
00330         }
00331         return ( retval1 | retval2 );
00332     }
00333 
00334     return ( int ) ( dst->ra_selector & src->ra_selector );
00335 }
00336 
00337 /* This function removes shared resources available to the src event
00338     from the resources available to the dst event,
00339     and reduces the rank of the dst event accordingly. Typically,
00340     the src event will be exclusive, but the code shouldn't assume it.
00341     Returns nothing.  */
00342 static void
00343 _bpt_map_preempt( hwd_reg_alloc_t * dst, hwd_reg_alloc_t * src )
00344 {
00345     int i;
00346     unsigned shared;
00347 
00348     if ( is_pentium4() ) {
00349 #ifdef DEBUG
00350         SUBDBG( "src, dst\n" );
00351         print_alloc( src );
00352         print_alloc( dst );
00353 #endif
00354 
00355         /* check for a pebs conflict */
00356         /* pebs enables must both be non-zero */
00357         i = ( ( ( dst->ra_bits.pebs_enable && src->ra_bits.pebs_enable ) &&
00358                 /* and not equal to each other */
00359                 ( dst->ra_bits.pebs_enable != src->ra_bits.pebs_enable ) ) ||
00360               /* same for pebs_matrix_vert */
00361               ( ( dst->ra_bits.pebs_matrix_vert &&
00362                   src->ra_bits.pebs_matrix_vert )
00363                 && ( dst->ra_bits.pebs_matrix_vert !=
00364                      src->ra_bits.pebs_matrix_vert ) ) );
00365         if ( i ) {
00366             SUBDBG( "pebs conflict! clearing selector\n" );
00367             dst->ra_selector = 0;
00368             return;
00369         } else {
00370             /* remove counters referenced by any shared escrs */
00371             if ( ( dst->ra_escr[0] == src->ra_escr[0] ) &&
00372                  ( ( int ) dst->ra_escr[0] != -1 ) ) {
00373                 dst->ra_selector &= ~dst->ra_bits.counter[0];
00374                 dst->ra_escr[0] = -1;
00375             }
00376             if ( ( dst->ra_escr[1] == src->ra_escr[1] ) &&
00377                  ( ( int ) dst->ra_escr[1] != -1 ) ) {
00378                 dst->ra_selector &= ~dst->ra_bits.counter[1];
00379                 dst->ra_escr[1] = -1;
00380             }
00381 
00382             /* remove any remaining shared counters */
00383             shared = ( dst->ra_selector & src->ra_selector );
00384             if ( shared )
00385                 dst->ra_selector ^= shared;
00386         }
00387         /* recompute rank */
00388         for ( i = 0, dst->ra_rank = 0; i < MAX_COUNTERS; i++ )
00389             if ( dst->ra_selector & ( 1 << i ) )
00390                 dst->ra_rank++;
00391 #ifdef DEBUG
00392         SUBDBG( "new dst\n" );
00393         print_alloc( dst );
00394 #endif
00395     } else {
00396         shared = dst->ra_selector & src->ra_selector;
00397         if ( shared )
00398             dst->ra_selector ^= shared;
00399         for ( i = 0, dst->ra_rank = 0; i < MAX_COUNTERS; i++ )
00400             if ( dst->ra_selector & ( 1 << i ) )
00401                 dst->ra_rank++;
00402     }
00403 }
00404 
00405 static void
00406 _bpt_map_update( hwd_reg_alloc_t * dst, hwd_reg_alloc_t * src )
00407 {
00408     dst->ra_selector = src->ra_selector;
00409 
00410     if ( is_pentium4() ) {
00411         dst->ra_escr[0] = src->ra_escr[0];
00412         dst->ra_escr[1] = src->ra_escr[1];
00413     }
00414 }
00415 
00416 /* Register allocation */
00417 static int
00418 _x86_allocate_registers( EventSetInfo_t * ESI )
00419 {
00420     int i, j, natNum;
00421     hwd_reg_alloc_t event_list[MAX_COUNTERS];
00422     hwd_register_t *ptr;
00423 
00424     /* Initialize the local structure needed
00425        for counter allocation and optimization. */
00426     natNum = ESI->NativeCount;
00427 
00428     if ( is_pentium4() ) {
00429         SUBDBG( "native event count: %d\n", natNum );
00430     }
00431 
00432     for ( i = 0; i < natNum; i++ ) {
00433         /* retrieve the mapping information about this native event */
00434         _papi_libpfm_ntv_code_to_bits( ( unsigned int ) ESI->NativeInfoArray[i].
00435                                ni_event, &event_list[i].ra_bits );
00436 
00437         if ( is_pentium4() ) {
00438             /* combine counter bit masks for both esc registers into selector */
00439             event_list[i].ra_selector =
00440                 event_list[i].ra_bits.counter[0] | event_list[i].ra_bits.
00441                 counter[1];
00442         } else {
00443             /* make sure register allocator only looks at legal registers */
00444             event_list[i].ra_selector =
00445                 event_list[i].ra_bits.selector & ALLCNTRS;
00446 #ifdef PERFCTR_X86_INTEL_CORE2
00447             if ( _papi_hwi_system_info.hw_info.model ==
00448                  PERFCTR_X86_INTEL_CORE2 )
00449                 event_list[i].ra_selector |=
00450                     ( ( event_list[i].ra_bits.
00451                         selector >> 16 ) << 2 ) & ALLCNTRS;
00452 #endif
00453         }
00454         /* calculate native event rank, which is no. of counters it can live on */
00455         event_list[i].ra_rank = 0;
00456         for ( j = 0; j < MAX_COUNTERS; j++ ) {
00457             if ( event_list[i].ra_selector & ( 1 << j ) ) {
00458                 event_list[i].ra_rank++;
00459             }
00460         }
00461 
00462         if ( is_pentium4() ) {
00463             event_list[i].ra_escr[0] = event_list[i].ra_bits.escr[0];
00464             event_list[i].ra_escr[1] = event_list[i].ra_bits.escr[1];
00465 #ifdef DEBUG
00466             SUBDBG( "i: %d\n", i );
00467             print_alloc( &event_list[i] );
00468 #endif
00469         }
00470     }
00471     if ( _papi_bipartite_alloc( event_list, natNum, ESI->CmpIdx ) ) {   /* successfully mapped */
00472         for ( i = 0; i < natNum; i++ ) {
00473 #ifdef PERFCTR_X86_INTEL_CORE2
00474             if ( _papi_hwi_system_info.hw_info.model ==
00475                  PERFCTR_X86_INTEL_CORE2 )
00476                 event_list[i].ra_bits.selector = event_list[i].ra_selector;
00477 #endif
00478 #ifdef DEBUG
00479             if ( is_pentium4() ) {
00480                 SUBDBG( "i: %d\n", i );
00481                 print_alloc( &event_list[i] );
00482             }
00483 #endif
00484             /* Copy all info about this native event to the NativeInfo struct */
00485             ptr = ESI->NativeInfoArray[i].ni_bits;
00486             *ptr = event_list[i].ra_bits;
00487 
00488             if ( is_pentium4() ) {
00489                 /* The selector contains the counter bit position. Turn it into a number
00490                    and store it in the first counter value, zeroing the second. */
00491                 ptr->counter[0] = ffs( event_list[i].ra_selector ) - 1;
00492                 ptr->counter[1] = 0;
00493             }
00494 
00495             /* Array order on perfctr is event ADD order, not counter #... */
00496             ESI->NativeInfoArray[i].ni_position = i;
00497         }
00498         return PAPI_OK;
00499     } else
00500         return PAPI_ECNFLCT;
00501 }
00502 
00503 static void
00504 clear_cs_events( hwd_control_state_t * this_state )
00505 {
00506     unsigned int i, j;
00507 
00508     /* total counters is sum of accumulating (nractrs) and interrupting (nrictrs) */
00509     j = this_state->control.cpu_control.nractrs +
00510         this_state->control.cpu_control.nrictrs;
00511 
00512     /* Remove all counter control command values from eventset. */
00513     for ( i = 0; i < j; i++ ) {
00514         SUBDBG( "Clearing pmc event entry %d\n", i );
00515         if ( is_pentium4() ) {
00516             this_state->control.cpu_control.pmc_map[i] = 0;
00517             this_state->control.cpu_control.evntsel[i] = 0;
00518             this_state->control.cpu_control.evntsel_aux[i] =
00519                 this_state->control.cpu_control.
00520                 evntsel_aux[i] & ( ESCR_T0_OS | ESCR_T0_USR );
00521         } else {
00522             this_state->control.cpu_control.pmc_map[i] = i;
00523             this_state->control.cpu_control.evntsel[i]
00524                 = this_state->control.cpu_control.
00525                 evntsel[i] & ( PERF_ENABLE | PERF_OS | PERF_USR );
00526         }
00527         this_state->control.cpu_control.ireset[i] = 0;
00528     }
00529 
00530     if ( is_pentium4() ) {
00531         /* Clear pebs stuff */
00532         this_state->control.cpu_control.p4.pebs_enable = 0;
00533         this_state->control.cpu_control.p4.pebs_matrix_vert = 0;
00534     }
00535 
00536     /* clear both a and i counter counts */
00537     this_state->control.cpu_control.nractrs = 0;
00538     this_state->control.cpu_control.nrictrs = 0;
00539 
00540 #ifdef DEBUG
00541     if ( is_pentium4() )
00542         print_control( &this_state->control.cpu_control );
00543 #endif
00544 }
00545 
00546 /* This function clears the current contents of the control structure and 
00547    updates it with whatever resources are allocated for all the native events
00548    in the native info structure array. */
00549 static int
00550 _x86_update_control_state( hwd_control_state_t * this_state,
00551                            NativeInfo_t * native, int count,
00552                            hwd_context_t * ctx )
00553 {
00554     ( void ) ctx;            /*unused */
00555     unsigned int i, k, retval = PAPI_OK;
00556     hwd_register_t *bits,*bits2;
00557     struct perfctr_cpu_control *cpu_control = &this_state->control.cpu_control;
00558 
00559     /* clear out the events from the control state */
00560     clear_cs_events( this_state );
00561 
00562     if ( is_pentium4() ) {
00563         /* fill the counters we're using */
00564         for ( i = 0; i < ( unsigned int ) count; i++ ) {
00565             /* dereference the mapping information about this native event */
00566             bits = native[i].ni_bits;
00567 
00568             /* Add counter control command values to eventset */
00569             cpu_control->pmc_map[i] = bits->counter[0];
00570             cpu_control->evntsel[i] = bits->cccr;
00571             cpu_control->ireset[i] = bits->ireset;
00572             cpu_control->pmc_map[i] |= FAST_RDPMC;
00573             cpu_control->evntsel_aux[i] |= bits->event;
00574 
00575             /* pebs_enable and pebs_matrix_vert are shared registers used for replay_events.
00576                Replay_events count L1 and L2 cache events. There is only one of each for 
00577                the entire eventset. Therefore, there can be only one unique replay_event 
00578                per eventset. This means L1 and L2 can't be counted together. Which stinks.
00579                This conflict should be trapped in the allocation scheme, but we'll test for it
00580                here too, just in case. */
00581             if ( bits->pebs_enable ) {
00582                 /* if pebs_enable isn't set, just copy */
00583                 if ( cpu_control->p4.pebs_enable == 0 ) {
00584                     cpu_control->p4.pebs_enable = bits->pebs_enable;
00585                     /* if pebs_enable conflicts, flag an error */
00586                 } else if ( cpu_control->p4.pebs_enable != bits->pebs_enable ) {
00587                     SUBDBG
00588                         ( "WARNING: P4_update_control_state -- pebs_enable conflict!" );
00589                     retval = PAPI_ECNFLCT;
00590                 }
00591                 /* if pebs_enable == bits->pebs_enable, do nothing */
00592             }
00593             if ( bits->pebs_matrix_vert ) {
00594                 /* if pebs_matrix_vert isn't set, just copy */
00595                 if ( cpu_control->p4.pebs_matrix_vert == 0 ) {
00596                     cpu_control->p4.pebs_matrix_vert = bits->pebs_matrix_vert;
00597                     /* if pebs_matrix_vert conflicts, flag an error */
00598                 } else if ( cpu_control->p4.pebs_matrix_vert !=
00599                             bits->pebs_matrix_vert ) {
00600                     SUBDBG
00601                         ( "WARNING: P4_update_control_state -- pebs_matrix_vert conflict!" );
00602                     retval = PAPI_ECNFLCT;
00603                 }
00604                 /* if pebs_matrix_vert == bits->pebs_matrix_vert, do nothing */
00605             }
00606         }
00607         this_state->control.cpu_control.nractrs = count;
00608 
00609         /* Make sure the TSC is always on */
00610         this_state->control.cpu_control.tsc_on = 1;
00611 
00612 #ifdef DEBUG
00613         print_control( &this_state->control.cpu_control );
00614 #endif
00615     } else {
00616         switch ( _papi_hwi_system_info.hw_info.model ) {
00617 #ifdef PERFCTR_X86_INTEL_CORE2
00618         case PERFCTR_X86_INTEL_CORE2:
00619             /* fill the counters we're using */
00620             for ( i = 0; i < ( unsigned int ) count; i++ ) {
00621                 bits2 = native[i].ni_bits;
00622                 for ( k = 0; k < MAX_COUNTERS; k++ )
00623                     if ( bits2->selector & ( 1 << k ) ) {
00624                         break;
00625                     }
00626                 if ( k > 1 )
00627                     this_state->control.cpu_control.pmc_map[i] =
00628                         ( k - 2 ) | 0x40000000;
00629                 else
00630                     this_state->control.cpu_control.pmc_map[i] = k;
00631 
00632                 /* Add counter control command values to eventset */
00633                 this_state->control.cpu_control.evntsel[i] |=
00634                     bits2->counter_cmd;
00635             }
00636             break;
00637 #endif
00638         default:
00639             /* fill the counters we're using */
00640             for ( i = 0; i < ( unsigned int ) count; i++ ) {
00641                 /* Add counter control command values to eventset */
00642                  bits2 = native[i].ni_bits;
00643                 this_state->control.cpu_control.evntsel[i] |=
00644                     bits2->counter_cmd;
00645             }
00646         }
00647         this_state->control.cpu_control.nractrs = ( unsigned int ) count;
00648     }
00649     return retval;
00650 }
00651 
00652 static int
00653 _x86_start( hwd_context_t * ctx, hwd_control_state_t * state )
00654 {
00655     int error;
00656 #ifdef DEBUG
00657     print_control( &state->control.cpu_control );
00658 #endif
00659 
00660     if ( state->rvperfctr != NULL ) {
00661         if ( ( error =
00662                rvperfctr_control( state->rvperfctr, &state->control ) ) < 0 ) {
00663             SUBDBG( "rvperfctr_control returns: %d\n", error );
00664             PAPIERROR( RCNTRL_ERROR );
00665             return ( PAPI_ESYS );
00666         }
00667         return ( PAPI_OK );
00668     }
00669 
00670     if ( ( error = vperfctr_control( ctx->perfctr, &state->control ) ) < 0 ) {
00671         SUBDBG( "vperfctr_control returns: %d\n", error );
00672         PAPIERROR( VCNTRL_ERROR );
00673         return ( PAPI_ESYS );
00674     }
00675     return ( PAPI_OK );
00676 }
00677 
00678 static int
00679 _x86_stop( hwd_context_t * ctx, hwd_control_state_t * state )
00680 {
00681     int error;
00682 
00683     if ( state->rvperfctr != NULL ) {
00684         if ( rvperfctr_stop( ( struct rvperfctr * ) ctx->perfctr ) < 0 ) {
00685             PAPIERROR( RCNTRL_ERROR );
00686             return ( PAPI_ESYS );
00687         }
00688         return ( PAPI_OK );
00689     }
00690 
00691     error = vperfctr_stop( ctx->perfctr );
00692     if ( error < 0 ) {
00693         SUBDBG( "vperfctr_stop returns: %d\n", error );
00694         PAPIERROR( VCNTRL_ERROR );
00695         return ( PAPI_ESYS );
00696     }
00697     return ( PAPI_OK );
00698 }
00699 
00700 static int
00701 _x86_read( hwd_context_t * ctx, hwd_control_state_t * spc, long long **dp,
00702            int flags )
00703 {
00704     if ( flags & PAPI_PAUSED ) {
00705         vperfctr_read_state( ctx->perfctr, &spc->state, NULL );
00706         if ( !is_pentium4() ) {
00707             unsigned int i = 0;
00708             for ( i = 0;
00709                   i <
00710                   spc->control.cpu_control.nractrs +
00711                   spc->control.cpu_control.nrictrs; i++ ) {
00712                 SUBDBG( "vperfctr_read_state: counter %d =  %lld\n", i,
00713                         spc->state.pmc[i] );
00714             }
00715         }
00716     } else {
00717         SUBDBG( "vperfctr_read_ctrs\n" );
00718         if ( spc->rvperfctr != NULL ) {
00719             rvperfctr_read_ctrs( spc->rvperfctr, &spc->state );
00720         } else {
00721             vperfctr_read_ctrs( ctx->perfctr, &spc->state );
00722         }
00723     }
00724     *dp = ( long long * ) spc->state.pmc;
00725 #ifdef DEBUG
00726     {
00727         if ( ISLEVEL( DEBUG_SUBSTRATE ) ) {
00728             unsigned int i;
00729             if ( is_pentium4() ) {
00730                 for ( i = 0; i < spc->control.cpu_control.nractrs; i++ ) {
00731                     SUBDBG( "raw val hardware index %d is %lld\n", i,
00732                             ( long long ) spc->state.pmc[i] );
00733                 }
00734             } else {
00735                 for ( i = 0;
00736                       i <
00737                       spc->control.cpu_control.nractrs +
00738                       spc->control.cpu_control.nrictrs; i++ ) {
00739                     SUBDBG( "raw val hardware index %d is %lld\n", i,
00740                             ( long long ) spc->state.pmc[i] );
00741                 }
00742             }
00743         }
00744     }
00745 #endif
00746     return ( PAPI_OK );
00747 }
00748 
00749 static int
00750 _x86_reset( hwd_context_t * ctx, hwd_control_state_t * cntrl )
00751 {
00752     return ( _x86_start( ctx, cntrl ) );
00753 }
00754 
00755 /* Perfctr requires that interrupting counters appear at the end of the pmc list
00756    In the case a user wants to interrupt on a counter in an evntset that is not
00757    among the last events, we need to move the perfctr virtual events around to
00758    make it last. This function swaps two perfctr events, and then adjust the
00759    position entries in both the NativeInfoArray and the EventInfoArray to keep
00760    everything consistent. */
00761 static void
00762 swap_events( EventSetInfo_t * ESI, struct hwd_pmc_control *contr, int cntr1,
00763              int cntr2 )
00764 {
00765     unsigned int ui;
00766     int si, i, j;
00767 
00768     for ( i = 0; i < ESI->NativeCount; i++ ) {
00769         if ( ESI->NativeInfoArray[i].ni_position == cntr1 )
00770             ESI->NativeInfoArray[i].ni_position = cntr2;
00771         else if ( ESI->NativeInfoArray[i].ni_position == cntr2 )
00772             ESI->NativeInfoArray[i].ni_position = cntr1;
00773     }
00774 
00775     for ( i = 0; i < ESI->NumberOfEvents; i++ ) {
00776         for ( j = 0; ESI->EventInfoArray[i].pos[j] >= 0; j++ ) {
00777             if ( ESI->EventInfoArray[i].pos[j] == cntr1 )
00778                 ESI->EventInfoArray[i].pos[j] = cntr2;
00779             else if ( ESI->EventInfoArray[i].pos[j] == cntr2 )
00780                 ESI->EventInfoArray[i].pos[j] = cntr1;
00781         }
00782     }
00783 
00784     ui = contr->cpu_control.pmc_map[cntr1];
00785     contr->cpu_control.pmc_map[cntr1] = contr->cpu_control.pmc_map[cntr2];
00786     contr->cpu_control.pmc_map[cntr2] = ui;
00787 
00788     ui = contr->cpu_control.evntsel[cntr1];
00789     contr->cpu_control.evntsel[cntr1] = contr->cpu_control.evntsel[cntr2];
00790     contr->cpu_control.evntsel[cntr2] = ui;
00791 
00792     if ( is_pentium4() ) {
00793         ui = contr->cpu_control.evntsel_aux[cntr1];
00794         contr->cpu_control.evntsel_aux[cntr1] =
00795             contr->cpu_control.evntsel_aux[cntr2];
00796         contr->cpu_control.evntsel_aux[cntr2] = ui;
00797     }
00798 
00799     si = contr->cpu_control.ireset[cntr1];
00800     contr->cpu_control.ireset[cntr1] = contr->cpu_control.ireset[cntr2];
00801     contr->cpu_control.ireset[cntr2] = si;
00802 }
00803 
00804 static int
00805 _x86_set_overflow( EventSetInfo_t *ESI, int EventIndex, int threshold )
00806 {
00807        hwd_control_state_t *ctl = ( hwd_control_state_t * ) ( ESI->ctl_state );
00808        struct hwd_pmc_control *contr = &(ctl->control);
00809     int i, ncntrs, nricntrs = 0, nracntrs = 0, retval = 0;
00810     OVFDBG( "EventIndex=%d\n", EventIndex );
00811 
00812 #ifdef DEBUG
00813     if ( is_pentium4() )
00814       print_control( &(contr->cpu_control) );
00815 #endif
00816 
00817     /* The correct event to overflow is EventIndex */
00818     ncntrs = _perfctr_vector.cmp_info.num_cntrs;
00819     i = ESI->EventInfoArray[EventIndex].pos[0];
00820 
00821     if ( i >= ncntrs ) {
00822         PAPIERROR( "Selector id %d is larger than ncntrs %d", i, ncntrs );
00823         return PAPI_EINVAL;
00824     }
00825 
00826     if ( threshold != 0 ) {  /* Set an overflow threshold */
00827         retval = _papi_hwi_start_signal( _perfctr_vector.cmp_info.hardware_intr_sig,
00828                                          NEED_CONTEXT,
00829                                          _perfctr_vector.cmp_info.CmpIdx );
00830         if ( retval != PAPI_OK )
00831             return ( retval );
00832 
00833         /* overflow interrupt occurs on the NEXT event after overflow occurs
00834            thus we subtract 1 from the threshold. */
00835         contr->cpu_control.ireset[i] = ( -threshold + 1 );
00836 
00837         if ( is_pentium4() )
00838             contr->cpu_control.evntsel[i] |= CCCR_OVF_PMI_T0;
00839         else
00840             contr->cpu_control.evntsel[i] |= PERF_INT_ENABLE;
00841 
00842         contr->cpu_control.nrictrs++;
00843         contr->cpu_control.nractrs--;
00844         nricntrs = ( int ) contr->cpu_control.nrictrs;
00845         nracntrs = ( int ) contr->cpu_control.nractrs;
00846         contr->si_signo = _perfctr_vector.cmp_info.hardware_intr_sig;
00847 
00848         /* move this event to the bottom part of the list if needed */
00849         if ( i < nracntrs )
00850             swap_events( ESI, contr, i, nracntrs );
00851         OVFDBG( "Modified event set\n" );
00852     } else {
00853       if ( is_pentium4() && contr->cpu_control.evntsel[i] & CCCR_OVF_PMI_T0 ) {
00854             contr->cpu_control.ireset[i] = 0;
00855             contr->cpu_control.evntsel[i] &= ( ~CCCR_OVF_PMI_T0 );
00856             contr->cpu_control.nrictrs--;
00857             contr->cpu_control.nractrs++;
00858       } else if ( !is_pentium4() &&
00859                     contr->cpu_control.evntsel[i] & PERF_INT_ENABLE ) {
00860             contr->cpu_control.ireset[i] = 0;
00861             contr->cpu_control.evntsel[i] &= ( ~PERF_INT_ENABLE );
00862             contr->cpu_control.nrictrs--;
00863             contr->cpu_control.nractrs++;
00864         }
00865 
00866         nricntrs = ( int ) contr->cpu_control.nrictrs;
00867         nracntrs = ( int ) contr->cpu_control.nractrs;
00868 
00869         /* move this event to the top part of the list if needed */
00870         if ( i >= nracntrs )
00871             swap_events( ESI, contr, i, nracntrs - 1 );
00872 
00873         if ( !nricntrs )
00874             contr->si_signo = 0;
00875 
00876         OVFDBG( "Modified event set\n" );
00877 
00878         retval = _papi_hwi_stop_signal( _perfctr_vector.cmp_info.hardware_intr_sig );
00879     }
00880 
00881 #ifdef DEBUG
00882     if ( is_pentium4() )
00883       print_control( &(contr->cpu_control) );
00884 #endif
00885     OVFDBG( "End of call. Exit code: %d\n", retval );
00886     return ( retval );
00887 }
00888 
00889 static int
00890 _x86_stop_profiling( ThreadInfo_t * master, EventSetInfo_t * ESI )
00891 {
00892     ( void ) master;         /*unused */
00893     ( void ) ESI;            /*unused */
00894     return ( PAPI_OK );
00895 }
00896 
00897 
00898 
00899 /* these define cccr and escr register bits, and the p4 event structure */
00900 #include "perfmon/pfmlib_pentium4.h"
00901 #include "../lib/pfmlib_pentium4_priv.h"
00902 
00903 #define P4_REPLAY_REAL_MASK 0x00000003
00904 
00905 extern pentium4_escr_reg_t pentium4_escrs[];
00906 extern pentium4_cccr_reg_t pentium4_cccrs[];
00907 extern pentium4_event_t pentium4_events[];
00908 
00909 
00910 static pentium4_replay_regs_t p4_replay_regs[] = {
00911     /* 0 */ {.enb = 0,
00912              /* dummy */
00913              .mat_vert = 0,
00914              },
00915     /* 1 */ {.enb = 0,
00916              /* dummy */
00917              .mat_vert = 0,
00918              },
00919     /* 2 */ {.enb = 0x01000001,
00920              /* 1stL_cache_load_miss_retired */
00921              .mat_vert = 0x00000001,
00922              },
00923     /* 3 */ {.enb = 0x01000002,
00924              /* 2ndL_cache_load_miss_retired */
00925              .mat_vert = 0x00000001,
00926              },
00927     /* 4 */ {.enb = 0x01000004,
00928              /* DTLB_load_miss_retired */
00929              .mat_vert = 0x00000001,
00930              },
00931     /* 5 */ {.enb = 0x01000004,
00932              /* DTLB_store_miss_retired */
00933              .mat_vert = 0x00000002,
00934              },
00935     /* 6 */ {.enb = 0x01000004,
00936              /* DTLB_all_miss_retired */
00937              .mat_vert = 0x00000003,
00938              },
00939     /* 7 */ {.enb = 0x01018001,
00940              /* Tagged_mispred_branch */
00941              .mat_vert = 0x00000010,
00942              },
00943     /* 8 */ {.enb = 0x01000200,
00944              /* MOB_load_replay_retired */
00945              .mat_vert = 0x00000001,
00946              },
00947     /* 9 */ {.enb = 0x01000400,
00948              /* split_load_retired */
00949              .mat_vert = 0x00000001,
00950              },
00951     /* 10 */ {.enb = 0x01000400,
00952               /* split_store_retired */
00953               .mat_vert = 0x00000002,
00954               },
00955 };
00956 
00957 /* this maps the arbitrary pmd index in libpfm/pentium4_events.h to the intel documentation */
00958 static int pfm2intel[] =
00959     { 0, 1, 4, 5, 8, 9, 12, 13, 16, 2, 3, 6, 7, 10, 11, 14, 15, 17 };
00960 
00961 
00962 
00963 
00964 /* This call is broken. Selector can be much bigger than 32 bits. It should be a pfmlib_regmask_t - pjm */
00965 /* Also, libpfm assumes events can live on different counters with different codes. This call only returns
00966     the first occurence found. */
00967 /* Right now its only called by ntv_code_to_bits in perfctr-p3, so we're ok. But for it to be
00968     generally useful it should be fixed. - dkt */
00969 static int
00970 _pfm_get_counter_info( unsigned int event, unsigned int *selector, int *code )
00971 {
00972     pfmlib_regmask_t cnt, impl;
00973     unsigned int num;
00974     unsigned int i, first = 1;
00975     int ret;
00976 
00977     if ( ( ret = pfm_get_event_counters( event, &cnt ) ) != PFMLIB_SUCCESS ) {
00978         PAPIERROR( "pfm_get_event_counters(%d,%p): %s", event, &cnt,
00979                    pfm_strerror( ret ) );
00980         return PAPI_ESYS;
00981     }
00982     if ( ( ret = pfm_get_num_counters( &num ) ) != PFMLIB_SUCCESS ) {
00983         PAPIERROR( "pfm_get_num_counters(%p): %s", num, pfm_strerror( ret ) );
00984         return PAPI_ESYS;
00985     }
00986     if ( ( ret = pfm_get_impl_counters( &impl ) ) != PFMLIB_SUCCESS ) {
00987         PAPIERROR( "pfm_get_impl_counters(%p): %s", &impl,
00988                    pfm_strerror( ret ) );
00989         return PAPI_ESYS;
00990     }
00991 
00992     *selector = 0;
00993     for ( i = 0; num; i++ ) {
00994         if ( pfm_regmask_isset( &impl, i ) )
00995             num--;
00996         if ( pfm_regmask_isset( &cnt, i ) ) {
00997             if ( first ) {
00998                 if ( ( ret =
00999                        pfm_get_event_code_counter( event, i,
01000                                                    code ) ) !=
01001                      PFMLIB_SUCCESS ) {
01002                     PAPIERROR( "pfm_get_event_code_counter(%d, %d, %p): %s",
01003                            event, i, code, pfm_strerror( ret ) );
01004                     return PAPI_ESYS;
01005                 }
01006                 first = 0;
01007             }
01008             *selector |= 1 << i;
01009         }
01010     }
01011     return PAPI_OK;
01012 }
01013 
01014 int
01015 _papi_libpfm_ntv_code_to_bits_perfctr( unsigned int EventCode, 
01016                        hwd_register_t *newbits )
01017 {
01018     unsigned int event, umask;
01019 
01020     X86_register_t *bits = (X86_register_t *)newbits;
01021 
01022     if ( is_pentium4() ) {
01023        pentium4_escr_value_t escr_value;
01024        pentium4_cccr_value_t cccr_value;
01025        unsigned int num_masks, replay_mask, unit_masks[12];
01026        unsigned int event_mask;
01027        unsigned int tag_value, tag_enable;
01028        unsigned int i;
01029        int j, escr, cccr, pmd;
01030 
01031        if ( _pfm_decode_native_event( EventCode, &event, &umask ) != PAPI_OK )
01032       return PAPI_ENOEVNT;
01033 
01034        /* for each allowed escr (1 or 2) find the allowed cccrs.
01035       for each allowed cccr find the pmd index
01036       convert to an intel counter number; or it into bits->counter */
01037        for ( i = 0; i < MAX_ESCRS_PER_EVENT; i++ ) {
01038       bits->counter[i] = 0;
01039       escr = pentium4_events[event].allowed_escrs[i];
01040       if ( escr < 0 ) {
01041          continue;
01042       }
01043 
01044       bits->escr[i] = escr;
01045 
01046       for ( j = 0; j < MAX_CCCRS_PER_ESCR; j++ ) {
01047          cccr = pentium4_escrs[escr].allowed_cccrs[j];
01048          if ( cccr < 0 ) {
01049         continue;
01050          }
01051 
01052          pmd = pentium4_cccrs[cccr].pmd;
01053          bits->counter[i] |= ( 1 << pfm2intel[pmd] );
01054       }
01055        }
01056 
01057        /* if there's only one valid escr, copy the values */
01058        if ( escr < 0 ) {
01059       bits->escr[1] = bits->escr[0];
01060       bits->counter[1] = bits->counter[0];
01061        }
01062 
01063        /* Calculate the event-mask value. Invalid masks
01064     * specified by the caller are ignored. */
01065        tag_value = 0;
01066        tag_enable = 0;
01067        event_mask = _pfm_convert_umask( event, umask );
01068 
01069        if ( event_mask & 0xF0000 ) {
01070       tag_enable = 1;
01071       tag_value = ( ( event_mask & 0xF0000 ) >> EVENT_MASK_BITS );
01072        }
01073 
01074        event_mask &= 0x0FFFF;   /* mask off possible tag bits */
01075 
01076        /* Set up the ESCR and CCCR register values. */
01077        escr_value.val = 0;
01078        escr_value.bits.t1_usr = 0;  /* controlled by kernel */
01079        escr_value.bits.t1_os = 0;   /* controlled by kernel */
01080 //    escr_value.bits.t0_usr       = (plm & PFM_PLM3) ? 1 : 0;
01081 //    escr_value.bits.t0_os        = (plm & PFM_PLM0) ? 1 : 0;
01082        escr_value.bits.tag_enable = tag_enable;
01083        escr_value.bits.tag_value = tag_value;
01084        escr_value.bits.event_mask = event_mask;
01085        escr_value.bits.event_select = pentium4_events[event].event_select;
01086        escr_value.bits.reserved = 0;
01087 
01088        /* initialize the proper bits in the cccr register */
01089        cccr_value.val = 0;
01090        cccr_value.bits.reserved1 = 0;
01091        cccr_value.bits.enable = 1;
01092        cccr_value.bits.escr_select = pentium4_events[event].escr_select;
01093        cccr_value.bits.active_thread = 3;   
01094        /* FIXME: This is set to count when either logical
01095     *        CPU is active. Need a way to distinguish
01096     *        between logical CPUs when HT is enabled.
01097         *        the docs say these bits should always 
01098     *        be set.                                  */
01099        cccr_value.bits.compare = 0; 
01100        /* FIXME: What do we do with "threshold" settings? */
01101        cccr_value.bits.complement = 0;  
01102        /* FIXME: What do we do with "threshold" settings? */
01103        cccr_value.bits.threshold = 0;   
01104        /* FIXME: What do we do with "threshold" settings? */
01105        cccr_value.bits.force_ovf = 0;   
01106        /* FIXME: Do we want to allow "forcing" overflow
01107         *        interrupts on all counter increments? */
01108        cccr_value.bits.ovf_pmi_t0 = 0;
01109        cccr_value.bits.ovf_pmi_t1 = 0;  
01110        /* PMI taken care of by kernel typically */
01111        cccr_value.bits.reserved2 = 0;
01112        cccr_value.bits.cascade = 0; 
01113        /* FIXME: How do we handle "cascading" counters? */
01114        cccr_value.bits.overflow = 0;
01115 
01116        /* these flags are always zero, from what I can tell... */
01117        bits->pebs_enable = 0;   /* flag for PEBS counting */
01118        bits->pebs_matrix_vert = 0;  
01119        /* flag for PEBS_MATRIX_VERT, whatever that is */
01120 
01121        /* ...unless the event is replay_event */
01122        if ( !strcmp( pentium4_events[event].name, "replay_event" ) ) {
01123       escr_value.bits.event_mask = event_mask & P4_REPLAY_REAL_MASK;
01124       num_masks = prepare_umask( umask, unit_masks );
01125       for ( i = 0; i < num_masks; i++ ) {
01126          replay_mask = unit_masks[i];
01127          if ( replay_mask > 1 && replay_mask < 11 ) {
01128             /* process each valid mask we find */
01129         bits->pebs_enable |= p4_replay_regs[replay_mask].enb;
01130         bits->pebs_matrix_vert |= p4_replay_regs[replay_mask].mat_vert;
01131          }
01132       }
01133        }
01134 
01135        /* store the escr and cccr values */
01136        bits->event = escr_value.val;
01137        bits->cccr = cccr_value.val;
01138        bits->ireset = 0;     /* I don't really know what this does */
01139        SUBDBG( "escr: 0x%lx; cccr:  0x%lx\n", escr_value.val, cccr_value.val );
01140     } else {
01141 
01142        int ret, code;
01143 
01144        if ( _pfm_decode_native_event( EventCode, &event, &umask ) != PAPI_OK )
01145       return PAPI_ENOEVNT;
01146 
01147        if ( ( ret = _pfm_get_counter_info( event, &bits->selector,
01148                           &code ) ) != PAPI_OK )
01149       return ret;
01150 
01151        bits->counter_cmd=(int) (code | ((_pfm_convert_umask(event,umask))<< 8) );
01152 
01153        SUBDBG( "selector: %#x\n", bits->selector );
01154        SUBDBG( "event: %#x; umask: %#x; code: %#x; cmd: %#x\n", event,
01155            umask, code, ( ( hwd_register_t * ) bits )->counter_cmd );
01156     }
01157 
01158     return PAPI_OK;
01159 }
01160 
01161 
01162 
01163 papi_vector_t _perfctr_vector = {
01164     .cmp_info = {
01165                  /* default component information (unspecified values are initialized to 0) */
01166                                  .name = "perfctr",
01167                  .description = "Linux perfctr CPU counters",
01168                  .default_domain = PAPI_DOM_USER,
01169                  .available_domains = PAPI_DOM_USER | PAPI_DOM_KERNEL,
01170                  .default_granularity = PAPI_GRN_THR,
01171                  .available_granularities = PAPI_GRN_THR,
01172                  .hardware_intr_sig = PAPI_INT_SIGNAL,
01173 
01174                  /* component specific cmp_info initializations */
01175                  .fast_real_timer = 1,
01176                  .fast_virtual_timer = 1,
01177                  .attach = 1,
01178                  .attach_must_ptrace = 1,
01179                  .cntr_umasks = 1,
01180                  }
01181     ,
01182 
01183     /* sizes of framework-opaque component-private structures */
01184     .size = {
01185              .context = sizeof ( X86_perfctr_context_t ),
01186              .control_state = sizeof ( X86_perfctr_control_t ),
01187              .reg_value = sizeof ( X86_register_t ),
01188              .reg_alloc = sizeof ( X86_reg_alloc_t ),
01189              }
01190     ,
01191 
01192     /* function pointers in this component */
01193     .init_control_state =   _x86_init_control_state,
01194     .start =                _x86_start,
01195     .stop =                 _x86_stop,
01196     .read =                 _x86_read,
01197     .allocate_registers =   _x86_allocate_registers,
01198     .update_control_state = _x86_update_control_state,
01199     .set_domain =           _x86_set_domain,
01200     .reset =                _x86_reset,
01201     .set_overflow =         _x86_set_overflow,
01202     .stop_profiling =       _x86_stop_profiling,
01203 
01204     .init_component =  _perfctr_init_component,
01205     .ctl =             _perfctr_ctl,
01206     .dispatch_timer =  _perfctr_dispatch_timer,
01207     .init_thread =     _perfctr_init_thread,
01208     .shutdown_thread = _perfctr_shutdown_thread,
01209 
01210     /* from libpfm */
01211     .ntv_enum_events   = _papi_libpfm_ntv_enum_events,
01212     .ntv_name_to_code  = _papi_libpfm_ntv_name_to_code,
01213     .ntv_code_to_name  = _papi_libpfm_ntv_code_to_name,
01214     .ntv_code_to_descr = _papi_libpfm_ntv_code_to_descr,
01215     .ntv_code_to_bits  = _papi_libpfm_ntv_code_to_bits_perfctr,
01216 
01217 };
01218 
01219 
 All Data Structures Files Functions Variables Typedefs Enumerations Enumerator Defines