PAPI  5.3.0.0
perf_event_uncore.c
Go to the documentation of this file.
00001 /*
00002 * File:    perf_event_uncore.c
00003 *
00004 * Author:  Vince Weaver
00005 *          vincent.weaver@maine.edu
00006 */
00007 
00008 #include <fcntl.h>
00009 #include <string.h>
00010 #include <errno.h>
00011 #include <signal.h>
00012 #include <syscall.h>
00013 #include <sys/utsname.h>
00014 #include <sys/mman.h>
00015 #include <sys/ioctl.h>
00016 
00017 /* PAPI-specific includes */
00018 #include "papi.h"
00019 #include "papi_memory.h"
00020 #include "papi_internal.h"
00021 #include "papi_vector.h"
00022 #include "extras.h"
00023 
00024 /* libpfm4 includes */
00025 #include "papi_libpfm4_events.h"
00026 #include "peu_libpfm4_events.h"
00027 #include "perfmon/pfmlib.h"
00028 #include PEINCLUDE
00029 
00030 /* Linux-specific includes */
00031 #include "mb.h"
00032 #include "linux-memory.h"
00033 #include "linux-timer.h"
00034 #include "linux-common.h"
00035 #include "linux-context.h"
00036 
00037 #include "components/perf_event/perf_event_lib.h"
00038 
00039 /* Forward declaration */
00040 papi_vector_t _perf_event_uncore_vector;
00041 
00042 /* Globals */
00043 struct native_event_table_t uncore_native_event_table;
00044 static int our_cidx;
00045 
00046 /* Defines for ctx->state */
00047 #define PERF_EVENTS_OPENED  0x01
00048 #define PERF_EVENTS_RUNNING 0x02
00049 
00050 
00051 /* The read format on perf_event varies based on various flags that */
00052 /* are passed into it.  This helper avoids copying this logic       */
00053 /* multiple places.                                                 */
00054 static unsigned int
00055 get_read_format( unsigned int multiplex,
00056          unsigned int inherit,
00057          int format_group )
00058 {
00059    unsigned int format = 0;
00060 
00061    /* if we need read format options for multiplexing, add them now */
00062    if (multiplex) {
00063       format |= PERF_FORMAT_TOTAL_TIME_ENABLED;
00064       format |= PERF_FORMAT_TOTAL_TIME_RUNNING;
00065    }
00066 
00067    /* If we are not using inherit, add the group read options     */
00068    if (!inherit) {
00069       if (format_group) {
00070      format |= PERF_FORMAT_GROUP;
00071       }
00072    }
00073 
00074    SUBDBG("multiplex: %d, inherit: %d, group_leader: %d, format: %#x\n",
00075       multiplex, inherit, format_group, format);
00076 
00077    return format;
00078 }
00079 
00080 /********************************************************************/
00081 /* Low-level perf_event calls                                       */
00082 /********************************************************************/
00083 
00084 /* In case headers aren't new enough to have __NR_perf_event_open */
00085 #ifndef __NR_perf_event_open
00086 
00087 #ifdef __powerpc__
00088 #define __NR_perf_event_open    319
00089 #elif defined(__x86_64__)
00090 #define __NR_perf_event_open    298
00091 #elif defined(__i386__)
00092 #define __NR_perf_event_open    336
00093 #elif defined(__arm__)          366+0x900000
00094 #define __NR_perf_event_open
00095 #endif
00096 
00097 #endif
00098 
00099 static long
00100 sys_perf_event_open( struct perf_event_attr *hw_event, pid_t pid, int cpu,
00101                        int group_fd, unsigned long flags )
00102 {
00103    int ret;
00104 
00105    SUBDBG("sys_perf_event_open(%p,%d,%d,%d,%lx\n",hw_event,pid,cpu,group_fd,flags);
00106    SUBDBG("   type: %d\n",hw_event->type);
00107    SUBDBG("   size: %d\n",hw_event->size);
00108    SUBDBG("   config: %"PRIx64" (%"PRIu64")\n",hw_event->config,
00109       hw_event->config);
00110    SUBDBG("   sample_period: %"PRIu64"\n",hw_event->sample_period);
00111    SUBDBG("   sample_type: %"PRIu64"\n",hw_event->sample_type);
00112    SUBDBG("   read_format: %"PRIu64"\n",hw_event->read_format);
00113    SUBDBG("   disabled: %d\n",hw_event->disabled);
00114    SUBDBG("   inherit: %d\n",hw_event->inherit);
00115    SUBDBG("   pinned: %d\n",hw_event->pinned);
00116    SUBDBG("   exclusive: %d\n",hw_event->exclusive);
00117    SUBDBG("   exclude_user: %d\n",hw_event->exclude_user);
00118    SUBDBG("   exclude_kernel: %d\n",hw_event->exclude_kernel);
00119    SUBDBG("   exclude_hv: %d\n",hw_event->exclude_hv);
00120    SUBDBG("   exclude_idle: %d\n",hw_event->exclude_idle);
00121    SUBDBG("   mmap: %d\n",hw_event->mmap);
00122    SUBDBG("   comm: %d\n",hw_event->comm);
00123    SUBDBG("   freq: %d\n",hw_event->freq);
00124    SUBDBG("   inherit_stat: %d\n",hw_event->inherit_stat);
00125    SUBDBG("   enable_on_exec: %d\n",hw_event->enable_on_exec);
00126    SUBDBG("   task: %d\n",hw_event->task);
00127    SUBDBG("   watermark: %d\n",hw_event->watermark);
00128 
00129     ret =
00130         syscall( __NR_perf_event_open, hw_event, pid, cpu, group_fd, flags );
00131     SUBDBG("Returned %d %d %s\n",ret,
00132            ret<0?errno:0,
00133            ret<0?strerror(errno):" ");
00134     return ret;
00135 }
00136 
00137 
00138 static int map_perf_event_errors_to_papi(int perf_event_error) {
00139 
00140    int ret;
00141 
00142    /* These mappings are approximate.
00143       EINVAL in particular can mean lots of different things */
00144    switch(perf_event_error) {
00145       case EPERM:
00146       case EACCES:
00147            ret = PAPI_EPERM;
00148        break;
00149       case ENODEV:
00150       case EOPNOTSUPP:
00151        ret = PAPI_ENOSUPP;
00152            break;
00153       case ENOENT:
00154        ret = PAPI_ENOEVNT;
00155            break;
00156       case ENOSYS:
00157       case EAGAIN:
00158       case EBUSY:
00159       case E2BIG:
00160        ret = PAPI_ESYS;
00161        break;
00162       case ENOMEM:
00163        ret = PAPI_ENOMEM;
00164        break;
00165       case EINVAL:
00166       default:
00167        ret = PAPI_EINVAL;
00168            break;
00169    }
00170    return ret;
00171 }
00172 
00173 /* Maximum size we ever expect to read from a perf_event fd   */
00174 /*  (this is the number of 64-bit values)                     */
00175 /* We use this to size the read buffers                       */
00176 /* The three is for event count, time_enabled, time_running   */
00177 /*  and the counter term is count value and count id for each */
00178 /*  possible counter value.                                   */
00179 #define READ_BUFFER_SIZE (3 + (2 * PERF_EVENT_MAX_MPX_COUNTERS))
00180 
00181 /* Open all events in the control state */
00182 static int
00183 open_pe_events( pe_context_t *ctx, pe_control_t *ctl )
00184 {
00185 
00186    int i, ret = PAPI_OK;
00187    long pid;
00188 
00189    if (ctl->granularity==PAPI_GRN_SYS) {
00190       pid = -1;
00191    }
00192    else {
00193       pid = ctl->tid;
00194    }
00195 
00196    for( i = 0; i < ctl->num_events; i++ ) {
00197 
00198       ctl->events[i].event_opened=0;
00199 
00200       /* set up the attr structure.  We don't set up all fields here */
00201       /* as some have already been set up previously.                */
00202 
00203       /* group leader (event 0) is special                */
00204       /* If we're multiplexed, everyone is a group leader */
00205       if (( i == 0 ) || (ctl->multiplexed)) {
00206          ctl->events[i].attr.pinned = !ctl->multiplexed;
00207      ctl->events[i].attr.disabled = 1;
00208      ctl->events[i].group_leader_fd=-1;
00209          ctl->events[i].attr.read_format = get_read_format(ctl->multiplexed,
00210                                ctl->inherit,
00211                                !ctl->multiplexed );
00212       } else {
00213      ctl->events[i].attr.pinned=0;
00214      ctl->events[i].attr.disabled = 0;
00215      ctl->events[i].group_leader_fd=ctl->events[0].event_fd,
00216          ctl->events[i].attr.read_format = get_read_format(ctl->multiplexed,
00217                                ctl->inherit,
00218                                0 );
00219       }
00220 
00221 
00222       /* try to open */
00223       ctl->events[i].event_fd = sys_perf_event_open( &ctl->events[i].attr,
00224                              pid,
00225                              ctl->cpu,
00226                    ctl->events[i].group_leader_fd,
00227                              0 /* flags */
00228                              );
00229 
00230       /* Try to match Linux errors to PAPI errors */
00231       if ( ctl->events[i].event_fd == -1 ) {
00232      SUBDBG("sys_perf_event_open returned error on event #%d."
00233         "  Error: %s\n",
00234         i, strerror( errno ) );
00235          ret=map_perf_event_errors_to_papi(errno);
00236 
00237      goto open_pe_cleanup;
00238       }
00239 
00240       SUBDBG ("sys_perf_event_open: tid: %ld, cpu_num: %d,"
00241               " group_leader/fd: %d, event_fd: %d,"
00242               " read_format: 0x%"PRIu64"\n",
00243           pid, ctl->cpu, ctl->events[i].group_leader_fd, 
00244           ctl->events[i].event_fd, ctl->events[i].attr.read_format);
00245 
00246       ctl->events[i].event_opened=1;
00247    }
00248 
00249    /* Now that we've successfully opened all of the events, do whatever  */
00250    /* "tune-up" is needed to attach the mmap'd buffers, signal handlers, */
00251    /* and so on.                                                         */
00252    for ( i = 0; i < ctl->num_events; i++ ) {
00253 
00254       /* No sampling if uncore */
00255       ctl->events[i].mmap_buf = NULL;
00256    }
00257 
00258    /* Set num_evts only if completely successful */
00259    ctx->state |= PERF_EVENTS_OPENED;
00260 
00261    return PAPI_OK;
00262 
00263 open_pe_cleanup:
00264    /* We encountered an error, close up the fds we successfully opened.  */
00265    /* We go backward in an attempt to close group leaders last, although */
00266    /* That's probably not strictly necessary.                            */
00267    while ( i > 0 ) {
00268       i--;
00269       if (ctl->events[i].event_fd>=0) {
00270      close( ctl->events[i].event_fd );
00271      ctl->events[i].event_opened=0;
00272       }
00273    }
00274 
00275    return ret;
00276 }
00277 
00278 /* Close all of the opened events */
00279 static int
00280 close_pe_events( pe_context_t *ctx, pe_control_t *ctl )
00281 {
00282    int i;
00283    int num_closed=0;
00284    int events_not_opened=0;
00285 
00286    /* should this be a more serious error? */
00287    if ( ctx->state & PERF_EVENTS_RUNNING ) {
00288       SUBDBG("Closing without stopping first\n");
00289    }
00290 
00291    /* Close child events first */
00292    for( i=0; i<ctl->num_events; i++ ) {
00293 
00294       if (ctl->events[i].event_opened) {
00295 
00296          if (ctl->events[i].group_leader_fd!=-1) {
00297             if ( ctl->events[i].mmap_buf ) {
00298            if ( munmap ( ctl->events[i].mmap_buf,
00299                      ctl->events[i].nr_mmap_pages * getpagesize() ) ) {
00300               PAPIERROR( "munmap of fd = %d returned error: %s",
00301                  ctl->events[i].event_fd, strerror( errno ) );
00302               return PAPI_ESYS;
00303            }
00304         }
00305 
00306             if ( close( ctl->events[i].event_fd ) ) {
00307            PAPIERROR( "close of fd = %d returned error: %s",
00308                ctl->events[i].event_fd, strerror( errno ) );
00309            return PAPI_ESYS;
00310         } else {
00311            num_closed++;
00312         }
00313         ctl->events[i].event_opened=0;
00314      }
00315       }
00316       else {
00317     events_not_opened++;
00318       }
00319    }
00320 
00321    /* Close the group leaders last */
00322    for( i=0; i<ctl->num_events; i++ ) {
00323 
00324       if (ctl->events[i].event_opened) {
00325 
00326          if (ctl->events[i].group_leader_fd==-1) {
00327             if ( ctl->events[i].mmap_buf ) {
00328            if ( munmap ( ctl->events[i].mmap_buf,
00329                      ctl->events[i].nr_mmap_pages * getpagesize() ) ) {
00330               PAPIERROR( "munmap of fd = %d returned error: %s",
00331                  ctl->events[i].event_fd, strerror( errno ) );
00332               return PAPI_ESYS;
00333            }
00334         }
00335 
00336 
00337             if ( close( ctl->events[i].event_fd ) ) {
00338            PAPIERROR( "close of fd = %d returned error: %s",
00339                ctl->events[i].event_fd, strerror( errno ) );
00340            return PAPI_ESYS;
00341         } else {
00342            num_closed++;
00343         }
00344         ctl->events[i].event_opened=0;
00345      }
00346       }
00347    }
00348 
00349 
00350    if (ctl->num_events!=num_closed) {
00351       if (ctl->num_events!=(num_closed+events_not_opened)) {
00352          PAPIERROR("Didn't close all events: "
00353            "Closed %d Not Opened: %d Expected %d\n",
00354            num_closed,events_not_opened,ctl->num_events);
00355          return PAPI_EBUG;
00356       }
00357    }
00358 
00359    ctl->num_events=0;
00360 
00361    ctx->state &= ~PERF_EVENTS_OPENED;
00362 
00363    return PAPI_OK;
00364 }
00365 
00366 
00367 
00368 
00369 /********************************************************************/
00370 /* Component Interface                                              */
00371 /********************************************************************/
00372 
00373 
00374 
00375 /* Initialize a thread */
00376 int
00377 _peu_init_thread( hwd_context_t *hwd_ctx )
00378 {
00379 
00380   pe_context_t *pe_ctx = ( pe_context_t *) hwd_ctx;
00381 
00382   /* clear the context structure and mark as initialized */
00383   memset( pe_ctx, 0, sizeof ( pe_context_t ) );
00384   pe_ctx->initialized=1;
00385 
00386   pe_ctx->event_table=&uncore_native_event_table;
00387   pe_ctx->cidx=our_cidx;
00388 
00389   return PAPI_OK;
00390 }
00391 
00392 /* Initialize a new control state */
00393 int
00394 _peu_init_control_state( hwd_control_state_t *ctl )
00395 {
00396   pe_control_t *pe_ctl = ( pe_control_t *) ctl;
00397 
00398   /* clear the contents */
00399   memset( pe_ctl, 0, sizeof ( pe_control_t ) );
00400 
00401   /* Set the default domain */
00402   _pe_set_domain( ctl, _perf_event_uncore_vector.cmp_info.default_domain );
00403 
00404   /* Set the default granularity */
00405   pe_ctl->granularity=_perf_event_uncore_vector.cmp_info.default_granularity;
00406 
00407   pe_ctl->cidx=our_cidx;
00408 
00409   /* Set cpu number in the control block to show events */
00410   /* are not tied to specific cpu                       */
00411   pe_ctl->cpu = -1;
00412   return PAPI_OK;
00413 }
00414 
00415 
00416 
00417 /* Initialize the perf_event uncore component */
00418 int
00419 _peu_init_component( int cidx )
00420 {
00421 
00422    int retval;
00423    int paranoid_level;
00424 
00425    FILE *fff;
00426 
00427    our_cidx=cidx;
00428 
00429    /* The is the official way to detect if perf_event support exists */
00430    /* The file is called perf_counter_paranoid on 2.6.31             */
00431    /* currently we are lazy and do not support 2.6.31 kernels        */
00432 
00433    fff=fopen("/proc/sys/kernel/perf_event_paranoid","r");
00434    if (fff==NULL) {
00435      strncpy(_papi_hwd[cidx]->cmp_info.disabled_reason,
00436         "perf_event support not detected",PAPI_MAX_STR_LEN);
00437      return PAPI_ENOCMP;
00438    }
00439    retval=fscanf(fff,"%d",&paranoid_level);
00440    if (retval!=1) fprintf(stderr,"Error reading paranoid level\n");
00441    fclose(fff);
00442 
00443 
00444    /* Run the libpfm4-specific setup */
00445 
00446    retval = _papi_libpfm4_init(_papi_hwd[cidx]);
00447    if (retval) {
00448      strncpy(_papi_hwd[cidx]->cmp_info.disabled_reason,
00449          "Error initializing libpfm4",PAPI_MAX_STR_LEN);
00450      return PAPI_ENOCMP;
00451    }
00452 
00453 
00454    /* Run the uncore specific libpfm4 setup */
00455 
00456    retval = _peu_libpfm4_init(_papi_hwd[cidx], 
00457                    &uncore_native_event_table,
00458                                PMU_TYPE_UNCORE);
00459    if (retval) {
00460      strncpy(_papi_hwd[cidx]->cmp_info.disabled_reason,
00461          "Error setting up libpfm4",PAPI_MAX_STR_LEN);
00462      return PAPI_ENOCMP;
00463    }
00464 
00465    /* Check if no uncore events found */
00466 
00467    if (_papi_hwd[cidx]->cmp_info.num_native_events==0) {
00468      strncpy(_papi_hwd[cidx]->cmp_info.disabled_reason,
00469          "No uncore PMUs or events found",PAPI_MAX_STR_LEN);
00470      return PAPI_ENOCMP;
00471    }
00472 
00473    /* Check if we have enough permissions for uncore */
00474 
00475    /* 2 means no kernel measurements allowed   */
00476    /* 1 means normal counter access            */
00477    /* 0 means you can access CPU-specific data */
00478    /* -1 means no restrictions                 */
00479 
00480    if ((paranoid_level>0) && (getuid()!=0)) {
00481       strncpy(_papi_hwd[cidx]->cmp_info.disabled_reason,
00482         "Insufficient permissions for uncore access.  Set /proc/sys/kernel/perf_event_paranoid to 0 or run as root.",
00483         PAPI_MAX_STR_LEN);
00484      return PAPI_ENOCMP;
00485    }
00486 
00487    return PAPI_OK;
00488 
00489 }
00490 
00491 /* Shutdown the perf_event component */
00492 int _peu_shutdown_component( void ) {
00493 
00494   /* deallocate our event table */
00495   _peu_libpfm4_shutdown(&uncore_native_event_table);
00496 
00497   /* Shutdown libpfm4 */
00498   _papi_libpfm4_shutdown();
00499 
00500   return PAPI_OK;
00501 }
00502 
00503 /* This function clears the current contents of the control structure and
00504    updates it with whatever resources are allocated for all the native events
00505    in the native info structure array. */
00506 
00507 int
00508 _peu_update_control_state( hwd_control_state_t *ctl,
00509                    NativeInfo_t *native,
00510                    int count, hwd_context_t *ctx )
00511 {
00512    int i = 0, ret;
00513    pe_context_t *pe_ctx = ( pe_context_t *) ctx;
00514    pe_control_t *pe_ctl = ( pe_control_t *) ctl;
00515 
00516    /* close all of the existing fds and start over again */
00517    /* In theory we could have finer-grained control and know if             */
00518    /* things were changed, but it's easier to tear things down and rebuild. */
00519    close_pe_events( pe_ctx, pe_ctl );
00520 
00521    /* Calling with count==0 should be OK, it's how things are deallocated */
00522    /* when an eventset is destroyed.                                      */
00523    if ( count == 0 ) {
00524       SUBDBG( "Called with count == 0\n" );
00525       return PAPI_OK;
00526    }
00527 
00528    /* set up all the events */
00529    for( i = 0; i < count; i++ ) {
00530       if ( native ) {
00531      /* Have libpfm4 set the config values for the event */
00532      ret=_peu_libpfm4_setup_counters(&pe_ctl->events[i].attr,
00533                     native[i].ni_event,
00534                     pe_ctx->event_table);
00535      SUBDBG( "pe_ctl->eventss[%d].config=%"PRIx64"\n",i,
00536          pe_ctl->events[i].attr.config);
00537      if (ret!=PAPI_OK) return ret;
00538 
00539       } else {
00540       /* I'm not sure how we'd end up in this case */
00541           /* should it be an error?                    */
00542       }
00543 
00544       /* Copy the inherit flag into the attribute block that will be   */
00545       /* passed to the kernel */
00546       pe_ctl->events[i].attr.inherit = pe_ctl->inherit;
00547 
00548       /* Set the position in the native structure */
00549       /* We just set up events linearly           */
00550       if ( native ) {
00551      native[i].ni_position = i;
00552       }
00553    }
00554 
00555    pe_ctl->num_events = count;
00556    _pe_set_domain( ctl, pe_ctl->domain );
00557 
00558    /* actuall open the events */
00559    /* (why is this a separate function?) */
00560    ret = open_pe_events( pe_ctx, pe_ctl );
00561    if ( ret != PAPI_OK ) {
00562       SUBDBG("open_pe_events failed\n");
00563       /* Restore values ? */
00564       return ret;
00565    }
00566 
00567    return PAPI_OK;
00568 }
00569 
00570 /********************************************************************/
00571 /********************************************************************/
00572 /* Start with functions that are exported via the module interface  */
00573 /********************************************************************/
00574 /********************************************************************/
00575 
00576 
00577 /* set the domain. FIXME: perf_events allows per-event control of this. */
00578 /* we do not handle that yet.                                           */
00579 int
00580 _peu_set_domain( hwd_control_state_t *ctl, int domain)
00581 {
00582 
00583    int i;
00584    pe_control_t *pe_ctl = ( pe_control_t *) ctl;
00585 
00586    SUBDBG("old control domain %d, new domain %d\n",
00587       pe_ctl->domain,domain);
00588 
00589    pe_ctl->domain = domain;
00590 
00591    /* Force the domain on all events */
00592    for( i = 0; i < pe_ctl->num_events; i++ ) {
00593       pe_ctl->events[i].attr.exclude_user =
00594                     !( pe_ctl->domain & PAPI_DOM_USER );
00595       pe_ctl->events[i].attr.exclude_kernel =
00596             !( pe_ctl->domain & PAPI_DOM_KERNEL );
00597       pe_ctl->events[i].attr.exclude_hv =
00598             !( pe_ctl->domain & PAPI_DOM_SUPERVISOR );
00599    }
00600    return PAPI_OK;
00601 }
00602 
00603 /* Shutdown a thread */
00604 int
00605 _peu_shutdown_thread( hwd_context_t *ctx )
00606 {
00607     pe_context_t *pe_ctx = ( pe_context_t *) ctx;
00608 
00609     pe_ctx->initialized=0;
00610 
00611     return PAPI_OK;
00612 }
00613 
00614 
00615 /* reset the hardware counters */
00616 /* Note: PAPI_reset() does not necessarily call this */
00617 /* unless the events are actually running.           */
00618 int
00619 _peu_reset( hwd_context_t *ctx, hwd_control_state_t *ctl )
00620 {
00621    int i, ret;
00622    pe_control_t *pe_ctl = ( pe_control_t *) ctl;
00623 
00624    ( void ) ctx;             /*unused */
00625 
00626    /* We need to reset all of the events, not just the group leaders */
00627    for( i = 0; i < pe_ctl->num_events; i++ ) {
00628       ret = ioctl( pe_ctl->events[i].event_fd, PERF_EVENT_IOC_RESET, NULL );
00629       if ( ret == -1 ) {
00630      PAPIERROR("ioctl(%d, PERF_EVENT_IOC_RESET, NULL) "
00631            "returned error, Linux says: %s",
00632            pe_ctl->events[i].event_fd, strerror( errno ) );
00633      return PAPI_ESYS;
00634       }
00635    }
00636 
00637    return PAPI_OK;
00638 }
00639 
00640 
00641 /* write (set) the hardware counters */
00642 /* Current we do not support this.   */
00643 int
00644 _peu_write( hwd_context_t *ctx, hwd_control_state_t *ctl,
00645         long long *from )
00646 {
00647    ( void ) ctx;             /*unused */
00648    ( void ) ctl;             /*unused */
00649    ( void ) from;            /*unused */
00650    /*
00651     * Counters cannot be written.  Do we need to virtualize the
00652     * counters so that they can be written, or perhaps modify code so that
00653     * they can be written? FIXME ?
00654     */
00655 
00656     return PAPI_ENOSUPP;
00657 }
00658 
00659 /*
00660  * perf_event provides a complicated read interface.
00661  *  the info returned by read() varies depending on whether
00662  *  you have PERF_FORMAT_GROUP, PERF_FORMAT_TOTAL_TIME_ENABLED,
00663  *  PERF_FORMAT_TOTAL_TIME_RUNNING, or PERF_FORMAT_ID set
00664  *
00665  * To simplify things we just always ask for everything.  This might
00666  * lead to overhead when reading more than we need, but it makes the
00667  * read code a lot simpler than the original implementation we had here.
00668  *
00669  * For more info on the layout see include/linux/perf_event.h
00670  *
00671  */
00672 
00673 int
00674 _peu_read( hwd_context_t *ctx, hwd_control_state_t *ctl,
00675            long long **events, int flags )
00676 {
00677    ( void ) flags;           /*unused */
00678    int i, ret = -1;
00679    /* pe_context_t *pe_ctx = ( pe_context_t *) ctx; */ 
00680    (void) ctx; /*unused*/
00681    pe_control_t *pe_ctl = ( pe_control_t *) ctl;
00682    long long papi_pe_buffer[READ_BUFFER_SIZE];
00683    long long tot_time_running, tot_time_enabled, scale;
00684 
00685    /* Handle case where we are multiplexing */
00686    if (pe_ctl->multiplexed) {
00687 
00688       /* currently we handle multiplexing by having individual events */
00689       /* so we read from each in turn.                                */
00690 
00691       for ( i = 0; i < pe_ctl->num_events; i++ ) {
00692 
00693          ret = read( pe_ctl->events[i].event_fd, papi_pe_buffer,
00694             sizeof ( papi_pe_buffer ) );
00695          if ( ret == -1 ) {
00696         PAPIERROR("read returned an error: ", strerror( errno ));
00697         return PAPI_ESYS;
00698      }
00699 
00700      /* We should read 3 64-bit values from the counter */
00701      if (ret<(signed)(3*sizeof(long long))) {
00702         PAPIERROR("Error!  short read!\n");
00703         return PAPI_ESYS;
00704      }
00705 
00706          SUBDBG("read: fd: %2d, tid: %ld, cpu: %d, ret: %d\n",
00707             pe_ctl->events[i].event_fd,
00708         (long)pe_ctl->tid, pe_ctl->cpu, ret);
00709          SUBDBG("read: %lld %lld %lld\n",papi_pe_buffer[0],
00710             papi_pe_buffer[1],papi_pe_buffer[2]);
00711 
00712          tot_time_enabled = papi_pe_buffer[1];
00713          tot_time_running = papi_pe_buffer[2];
00714 
00715          SUBDBG("count[%d] = (papi_pe_buffer[%d] %lld * "
00716         "tot_time_enabled %lld) / tot_time_running %lld\n",
00717         i, 0,papi_pe_buffer[0],
00718         tot_time_enabled,tot_time_running);
00719 
00720          if (tot_time_running == tot_time_enabled) {
00721         /* No scaling needed */
00722         pe_ctl->counts[i] = papi_pe_buffer[0];
00723          } else if (tot_time_running && tot_time_enabled) {
00724         /* Scale factor of 100 to avoid overflows when computing */
00725         /*enabled/running */
00726 
00727         scale = (tot_time_enabled * 100LL) / tot_time_running;
00728         scale = scale * papi_pe_buffer[0];
00729         scale = scale / 100LL;
00730         pe_ctl->counts[i] = scale;
00731      } else {
00732        /* This should not happen, but Phil reports it sometime does. */
00733         SUBDBG("perf_event kernel bug(?) count, enabled, "
00734            "running: %lld, %lld, %lld\n",
00735            papi_pe_buffer[0],tot_time_enabled,
00736            tot_time_running);
00737 
00738         pe_ctl->counts[i] = papi_pe_buffer[0];
00739      }
00740       }
00741    }
00742 
00743    /* Handle cases where we cannot use FORMAT GROUP */
00744    else if (pe_ctl->inherit) {
00745 
00746       /* we must read each counter individually */
00747       for ( i = 0; i < pe_ctl->num_events; i++ ) {
00748 
00749          ret = read( pe_ctl->events[i].event_fd, papi_pe_buffer, 
00750             sizeof ( papi_pe_buffer ) );
00751          if ( ret == -1 ) {
00752         PAPIERROR("read returned an error: ", strerror( errno ));
00753         return PAPI_ESYS;
00754      }
00755 
00756      /* we should read one 64-bit value from each counter */
00757      if (ret!=sizeof(long long)) {
00758         PAPIERROR("Error!  short read!\n");
00759         PAPIERROR("read: fd: %2d, tid: %ld, cpu: %d, ret: %d\n",
00760            pe_ctl->events[i].event_fd,
00761            (long)pe_ctl->tid, pe_ctl->cpu, ret);
00762         return PAPI_ESYS;
00763      }
00764 
00765          SUBDBG("read: fd: %2d, tid: %ld, cpu: %d, ret: %d\n",
00766             pe_ctl->events[i].event_fd, (long)pe_ctl->tid,
00767         pe_ctl->cpu, ret);
00768          SUBDBG("read: %lld\n",papi_pe_buffer[0]);
00769 
00770      pe_ctl->counts[i] = papi_pe_buffer[0];
00771       }
00772    }
00773 
00774 
00775    /* Handle cases where we are using FORMAT_GROUP   */
00776    /* We assume only one group leader, in position 0 */
00777 
00778    else {
00779       if (pe_ctl->events[0].group_leader_fd!=-1) {
00780      PAPIERROR("Was expecting group leader!\n");
00781       }
00782 
00783       ret = read( pe_ctl->events[0].event_fd, papi_pe_buffer,
00784           sizeof ( papi_pe_buffer ) );
00785 
00786       if ( ret == -1 ) {
00787      PAPIERROR("read returned an error: ", strerror( errno ));
00788      return PAPI_ESYS;
00789       }
00790 
00791       /* we read 1 64-bit value (number of events) then     */
00792       /* num_events more 64-bit values that hold the counts */
00793       if (ret<(signed)((1+pe_ctl->num_events)*sizeof(long long))) {
00794      PAPIERROR("Error! short read!\n");
00795      return PAPI_ESYS;
00796       }
00797 
00798       SUBDBG("read: fd: %2d, tid: %ld, cpu: %d, ret: %d\n",
00799          pe_ctl->events[0].event_fd,
00800          (long)pe_ctl->tid, pe_ctl->cpu, ret);
00801       {
00802      int j;
00803      for(j=0;j<ret/8;j++) {
00804             SUBDBG("read %d: %lld\n",j,papi_pe_buffer[j]);
00805      }
00806       }
00807 
00808       /* Make sure the kernel agrees with how many events we have */
00809       if (papi_pe_buffer[0]!=pe_ctl->num_events) {
00810      PAPIERROR("Error!  Wrong number of events!\n");
00811      return PAPI_ESYS;
00812       }
00813 
00814       /* put the count values in their proper location */
00815       for(i=0;i<papi_pe_buffer[0];i++) {
00816          pe_ctl->counts[i] = papi_pe_buffer[1+i];
00817       }
00818    }
00819 
00820    /* point PAPI to the values we read */
00821    *events = pe_ctl->counts;
00822 
00823    return PAPI_OK;
00824 }
00825 
00826 /* Start counting events */
00827 int
00828 _peu_start( hwd_context_t *ctx, hwd_control_state_t *ctl )
00829 {
00830    int ret;
00831    int i;
00832    int did_something = 0;
00833    pe_context_t *pe_ctx = ( pe_context_t *) ctx;
00834    pe_control_t *pe_ctl = ( pe_control_t *) ctl;
00835 
00836    /* Reset the counters first.  Is this necessary? */
00837    ret = _pe_reset( pe_ctx, pe_ctl );
00838    if ( ret ) {
00839       return ret;
00840    }
00841 
00842    /* Enable all of the group leaders                */
00843    /* All group leaders have a group_leader_fd of -1 */
00844    for( i = 0; i < pe_ctl->num_events; i++ ) {
00845       if (pe_ctl->events[i].group_leader_fd == -1) {
00846      SUBDBG("ioctl(enable): fd: %d\n", pe_ctl->events[i].event_fd);
00847      ret=ioctl( pe_ctl->events[i].event_fd, PERF_EVENT_IOC_ENABLE, NULL) ; 
00848 
00849      /* ioctls always return -1 on failure */
00850          if (ret == -1) {
00851             PAPIERROR("ioctl(PERF_EVENT_IOC_ENABLE) failed.\n");
00852             return PAPI_ESYS;
00853      }
00854 
00855      did_something++;
00856       } 
00857    }
00858 
00859    if (!did_something) {
00860       PAPIERROR("Did not enable any counters.\n");
00861       return PAPI_EBUG;
00862    }
00863 
00864    pe_ctx->state |= PERF_EVENTS_RUNNING;
00865 
00866    return PAPI_OK;
00867 
00868 }
00869 
00870 /* Stop all of the counters */
00871 int
00872 _peu_stop( hwd_context_t *ctx, hwd_control_state_t *ctl )
00873 {
00874 
00875    int ret;
00876    int i;
00877    pe_context_t *pe_ctx = ( pe_context_t *) ctx;
00878    pe_control_t *pe_ctl = ( pe_control_t *) ctl;
00879 
00880    /* Just disable the group leaders */
00881    for ( i = 0; i < pe_ctl->num_events; i++ ) {
00882       if ( pe_ctl->events[i].group_leader_fd == -1 ) {
00883      ret=ioctl( pe_ctl->events[i].event_fd, PERF_EVENT_IOC_DISABLE, NULL);
00884      if ( ret == -1 ) {
00885         PAPIERROR( "ioctl(%d, PERF_EVENT_IOC_DISABLE, NULL) "
00886                "returned error, Linux says: %s",
00887                pe_ctl->events[i].event_fd, strerror( errno ) );
00888         return PAPI_EBUG;
00889      }
00890       }
00891    }
00892 
00893    pe_ctx->state &= ~PERF_EVENTS_RUNNING;
00894 
00895    return PAPI_OK;
00896 }
00897 
00898 /* Set various options on a control state */
00899 int
00900 _peu_ctl( hwd_context_t *ctx, int code, _papi_int_option_t *option )
00901 {
00902    int ret;
00903    pe_context_t *pe_ctx = ( pe_context_t *) ctx;
00904    pe_control_t *pe_ctl = NULL;
00905 
00906    switch ( code ) {
00907       case PAPI_MULTIPLEX:
00908        pe_ctl = ( pe_control_t * ) ( option->multiplex.ESI->ctl_state );
00909 
00910        pe_ctl->multiplexed = 1;
00911        ret = _peu_update_control_state( pe_ctl, NULL,
00912                         pe_ctl->num_events, pe_ctx );
00913        if (ret != PAPI_OK) {
00914           pe_ctl->multiplexed = 0;
00915        }
00916        return ret;
00917 
00918       case PAPI_ATTACH:
00919        pe_ctl = ( pe_control_t * ) ( option->attach.ESI->ctl_state );
00920 
00921        pe_ctl->tid = option->attach.tid;
00922 
00923        /* If events have been already been added, something may */
00924        /* have been done to the kernel, so update */
00925        ret =_peu_update_control_state( pe_ctl, NULL,
00926                         pe_ctl->num_events, pe_ctx);
00927 
00928        return ret;
00929 
00930       case PAPI_DETACH:
00931        pe_ctl = ( pe_control_t *) ( option->attach.ESI->ctl_state );
00932 
00933        pe_ctl->tid = 0;
00934        return PAPI_OK;
00935 
00936       case PAPI_CPU_ATTACH:
00937        pe_ctl = ( pe_control_t *) ( option->cpu.ESI->ctl_state );
00938 
00939        /* this tells the kernel not to count for a thread   */
00940        /* should we warn if we try to set both?  perf_event */
00941        /* will reject it.                                   */
00942        pe_ctl->tid = -1;
00943 
00944        pe_ctl->cpu = option->cpu.cpu_num;
00945 
00946        return PAPI_OK;
00947 
00948       case PAPI_DOMAIN:
00949        pe_ctl = ( pe_control_t *) ( option->domain.ESI->ctl_state );
00950 
00951        /* looks like we are allowed, so set counting domain */
00952        return _pe_set_domain( pe_ctl, option->domain.domain );
00953 
00954       case PAPI_GRANUL:
00955        pe_ctl = (pe_control_t *) ( option->granularity.ESI->ctl_state );
00956 
00957        /* FIXME: we really don't support this yet */
00958 
00959            switch ( option->granularity.granularity  ) {
00960               case PAPI_GRN_PROCG:
00961               case PAPI_GRN_SYS_CPU:
00962               case PAPI_GRN_PROC:
00963            return PAPI_ECMP;
00964 
00965           /* Currently we only support thread and CPU granularity */
00966               case PAPI_GRN_SYS:
00967            pe_ctl->granularity=PAPI_GRN_SYS;
00968            break;
00969 
00970               case PAPI_GRN_THR:
00971            pe_ctl->granularity=PAPI_GRN_THR;
00972            break;
00973 
00974 
00975               default:
00976            return PAPI_EINVAL;
00977        }
00978            return PAPI_OK;
00979 
00980       case PAPI_INHERIT:
00981        pe_ctl = (pe_control_t *) ( option->inherit.ESI->ctl_state );
00982 
00983        if (option->inherit.inherit) {
00984           /* children will inherit counters */
00985           pe_ctl->inherit = 1;
00986        } else {
00987           /* children won't inherit counters */
00988           pe_ctl->inherit = 0;
00989        }
00990        return PAPI_OK;
00991 
00992       case PAPI_DATA_ADDRESS:
00993        return PAPI_ENOSUPP;
00994 
00995       case PAPI_INSTR_ADDRESS:
00996        return PAPI_ENOSUPP;
00997 
00998       case PAPI_DEF_ITIMER:
00999        return PAPI_ENOSUPP;
01000 
01001       case PAPI_DEF_MPX_NS:
01002        return PAPI_ENOSUPP;
01003 
01004       case PAPI_DEF_ITIMER_NS:
01005        return PAPI_ENOSUPP;
01006 
01007       default:
01008        return PAPI_ENOSUPP;
01009    }
01010 }
01011 
01012 
01013 int
01014 _peu_ntv_enum_events( unsigned int *PapiEventCode, int modifier )
01015 {
01016 
01017   if (_perf_event_uncore_vector.cmp_info.disabled) return PAPI_ENOEVNT;
01018 
01019 
01020   return _peu_libpfm4_ntv_enum_events(PapiEventCode, modifier,
01021                                        &uncore_native_event_table);
01022 }
01023 
01024 int
01025 _peu_ntv_name_to_code( char *name, unsigned int *event_code) {
01026 
01027   if (_perf_event_uncore_vector.cmp_info.disabled) return PAPI_ENOEVNT;
01028 
01029   return _peu_libpfm4_ntv_name_to_code(name,event_code,
01030                                         &uncore_native_event_table);
01031 }
01032 
01033 int
01034 _peu_ntv_code_to_name(unsigned int EventCode,
01035                           char *ntv_name, int len) {
01036 
01037    if (_perf_event_uncore_vector.cmp_info.disabled) return PAPI_ENOEVNT;
01038 
01039    return _peu_libpfm4_ntv_code_to_name(EventCode,
01040                                          ntv_name, len, 
01041                      &uncore_native_event_table);
01042 }
01043 
01044 int
01045 _peu_ntv_code_to_descr( unsigned int EventCode,
01046                             char *ntv_descr, int len) {
01047 
01048    if (_perf_event_uncore_vector.cmp_info.disabled) return PAPI_ENOEVNT;
01049 
01050    return _peu_libpfm4_ntv_code_to_descr(EventCode,ntv_descr,len,
01051                                           &uncore_native_event_table);
01052 }
01053 
01054 int
01055 _peu_ntv_code_to_info(unsigned int EventCode,
01056                           PAPI_event_info_t *info) {
01057 
01058   if (_perf_event_uncore_vector.cmp_info.disabled) return PAPI_ENOEVNT;
01059 
01060   return _peu_libpfm4_ntv_code_to_info(EventCode, info,
01061                                         &uncore_native_event_table);
01062 }
01063 
01064 /* Our component vector */
01065 
01066 papi_vector_t _perf_event_uncore_vector = {
01067    .cmp_info = {
01068        /* component information (unspecified values initialized to 0) */
01069       .name = "perf_event_uncore",
01070       .short_name = "peu",
01071       .version = "5.0",
01072       .description = "Linux perf_event CPU uncore and northbridge",
01073 
01074       .default_domain = PAPI_DOM_ALL,
01075       .available_domains = PAPI_DOM_USER | PAPI_DOM_KERNEL | PAPI_DOM_SUPERVISOR,
01076       .default_granularity = PAPI_GRN_SYS,
01077       .available_granularities = PAPI_GRN_SYS,
01078 
01079       .num_mpx_cntrs = PERF_EVENT_MAX_MPX_COUNTERS,
01080 
01081       /* component specific cmp_info initializations */
01082       .fast_virtual_timer = 0,
01083       .attach = 1,
01084       .attach_must_ptrace = 1,
01085       .cpu = 1,
01086       .inherit = 1,
01087       .cntr_umasks = 1,
01088 
01089   },
01090 
01091   /* sizes of framework-opaque component-private structures */
01092   .size = {
01093       .context = sizeof ( pe_context_t ),
01094       .control_state = sizeof ( pe_control_t ),
01095       .reg_value = sizeof ( int ),
01096       .reg_alloc = sizeof ( int ),
01097   },
01098 
01099   /* function pointers in this component */
01100   .init_component =        _peu_init_component,
01101   .shutdown_component =    _peu_shutdown_component,
01102   .init_thread =           _peu_init_thread,
01103   .init_control_state =    _peu_init_control_state,
01104   .start =                 _peu_start,
01105   .stop =                  _peu_stop,
01106   .read =                  _peu_read,
01107   .shutdown_thread =       _peu_shutdown_thread,
01108   .ctl =                   _peu_ctl,
01109   .update_control_state =  _peu_update_control_state,
01110   .set_domain =            _peu_set_domain,
01111   .reset =                 _peu_reset,
01112   .write =                 _peu_write,
01113 
01114   /* from counter name mapper */
01115   .ntv_enum_events =   _peu_ntv_enum_events,
01116   .ntv_name_to_code =  _peu_ntv_name_to_code,
01117   .ntv_code_to_name =  _peu_ntv_code_to_name,
01118   .ntv_code_to_descr = _peu_ntv_code_to_descr,
01119   .ntv_code_to_info =  _peu_ntv_code_to_info,
01120 };
01121 
01122 
 All Data Structures Files Functions Variables Typedefs Enumerations Enumerator Defines