PAPI  5.0.1.0
perf_events.c File Reference
Include dependency graph for perf_events.c:

Go to the source code of this file.

Data Structures

struct  pe_event_info_t
struct  pe_control_t
struct  pe_context_t
struct  ip_event
struct  lost_event
union  perf_sample_event_t

Defines

#define PERF_EVENT_MAX_MPX_COUNTERS   64
#define WAKEUP_COUNTER_OVERFLOW   0
#define WAKEUP_PROFILING   -1
#define WAKEUP_MODE_COUNTER_OVERFLOW   0
#define WAKEUP_MODE_PROFILING   1
#define PERF_EVENTS_OPENED   0x01
#define PERF_EVENTS_RUNNING   0x02
#define READ_BUFFER_SIZE   (3 + (2 * PERF_EVENT_MAX_MPX_COUNTERS))

Functions

static int bug_check_scheduability (void)
static int bug_format_group (void)
static int bug_sync_read (void)
static int fcntl_setown_fd (int fd)
static int processor_supported (int vendor, int family)
static unsigned int get_read_format (unsigned int multiplex, unsigned int inherit, int format_group)
static int check_permissions (unsigned long tid, unsigned int cpu_num, unsigned int domain, unsigned int multiplex, unsigned int inherit)
static int check_scheduability (pe_context_t *ctx, pe_control_t *ctl, int idx)
static int tune_up_fd (pe_control_t *ctl, int evt_idx)
static int open_pe_events (pe_context_t *ctx, pe_control_t *ctl)
static int close_pe_events (pe_context_t *ctx, pe_control_t *ctl)
static int pe_vendor_fixups (void)
static int detect_rdpmc (void)
static int find_profile_index (EventSetInfo_t *ESI, int evt_idx, int *flags, unsigned int *native_index, int *profile_index)
static uint64_t mmap_read_head (pe_event_info_t *pe)
static void mmap_write_tail (pe_event_info_t *pe, uint64_t tail)
static void mmap_read (ThreadInfo_t **thr, pe_event_info_t *pe, int profile_index)
static int process_smpl_buf (int evt_idx, ThreadInfo_t **thr)
int _papi_pe_set_domain (void *ctl, int domain)
static int _papi_pe_init_component (int cidx)
static int _papi_pe_shutdown_component (void)
static int _papi_pe_init_thread (void *hwd_ctx)
static int _papi_pe_shutdown_thread (void *ctx)
static int _papi_pe_reset (void *ctx, void *ctl)
static int _papi_pe_write (void *ctx, void *ctl, long long *from)
static int _papi_pe_read (void *ctx, void *ctl, long long **events, int flags)
static int _papi_pe_start (void *ctx, void *ctl)
static int _papi_pe_stop (void *ctx, void *ctl)
static int _papi_pe_init_control_state (void *ctl)
static int _papi_pe_update_control_state (void *ctl, NativeInfo_t *native, int count, void *ctx)
static int _papi_pe_ctl (void *ctx, int code, _papi_int_option_t *option)
static void _papi_pe_dispatch_timer (int n, void *info, void *uc)
static int _papi_pe_stop_profiling (ThreadInfo_t *thread, EventSetInfo_t *ESI)
static int _papi_pe_set_overflow (EventSetInfo_t *ESI, int EventIndex, int threshold)
static int _papi_pe_set_profile (EventSetInfo_t *ESI, int EventIndex, int threshold)

Variables

static int nmi_watchdog_active
papi_vector_t _papi_pe_vector

Define Documentation

#define PERF_EVENT_MAX_MPX_COUNTERS   64

Definition at line 50 of file perf_events.c.

#define PERF_EVENTS_OPENED   0x01

Definition at line 96 of file perf_events.c.

#define PERF_EVENTS_RUNNING   0x02

Definition at line 97 of file perf_events.c.

Definition at line 318 of file perf_events.c.

#define WAKEUP_COUNTER_OVERFLOW   0

Definition at line 89 of file perf_events.c.

Definition at line 92 of file perf_events.c.

#define WAKEUP_MODE_PROFILING   1

Definition at line 93 of file perf_events.c.

#define WAKEUP_PROFILING   -1

Definition at line 90 of file perf_events.c.


Function Documentation

static int _papi_pe_ctl ( void *  ctx,
int  code,
_papi_int_option_t option 
) [static]

< Turn on/off or multiplexing for an eventset

< No error

< Permission level does not permit operation

< No error

< Attach to a another tid/pid instead of ourself

< No error

< Permission level does not permit operation

< Detach

< No error

< Specify a cpu number the event set should be tied to

< No error

< Permission level does not permit operation

< No error

< Domain for an eventset

< No error

< Permission level does not permit operation

< Granularity for an eventset

< PAPI counters for each individual process group

< PAPI counters for the current CPU, are you bound?

< PAPI counters for all CPUs individually

< PAPI counters for each individual process

< Not supported by component

< PAPI counters for each individual thread

< Invalid argument

< No error

< Option to set counter inheritance flag

< No error

< Permission level does not permit operation

< No error

< Option to set data address range restriction

< Not supported

< Option to set instruction address range restriction

< Not supported

< Option to set the type of itimer used in both software multiplexing, overflowing and profiling

< No error

< Multiplexing/overflowing interval in ns, same as PAPI_DEF_ITIMER_NS

< Not supported

< Multiplexing/overflowing interval in ns, same as PAPI_DEF_MPX_NS

< No error

< Not supported

Definition at line 1510 of file perf_events.c.

{
   int ret;
   pe_context_t *pe_ctx = ( pe_context_t *) ctx;
   pe_control_t *pe_ctl = NULL;

   switch ( code ) {
      case PAPI_MULTIPLEX:
       pe_ctl = ( pe_control_t * ) ( option->multiplex.ESI->ctl_state );
       if (check_permissions( pe_ctl->tid, pe_ctl->cpu, pe_ctl->domain, 
                  1, pe_ctl->inherit ) != PAPI_OK) {
          return PAPI_EPERM;
       }

       /* looks like we are allowed, so set multiplexed attribute */
       pe_ctl->multiplexed = 1;
       ret = _papi_pe_update_control_state( pe_ctl, NULL, 
                        pe_ctl->num_events, pe_ctx );
       if (ret != PAPI_OK) {
          pe_ctl->multiplexed = 0;
       }
       return ret;
    
      case PAPI_ATTACH:
       pe_ctl = ( pe_control_t * ) ( option->attach.ESI->ctl_state );
       if (check_permissions( option->attach.tid, pe_ctl->cpu, 
                  pe_ctl->domain, pe_ctl->multiplexed, 
                  pe_ctl->inherit ) != PAPI_OK) {
          return PAPI_EPERM;
       }

       pe_ctl->tid = option->attach.tid;

       /* If events have been already been added, something may */
       /* have been done to the kernel, so update */
       ret = _papi_pe_update_control_state( pe_ctl, NULL, 
                        pe_ctl->num_events, pe_ctx);
       
       return ret;

      case PAPI_DETACH:
       pe_ctl = ( pe_control_t *) ( option->attach.ESI->ctl_state );

       pe_ctl->tid = 0;
       return PAPI_OK;

      case PAPI_CPU_ATTACH:
       pe_ctl = ( pe_control_t *) ( option->cpu.ESI->ctl_state );
       if (check_permissions( pe_ctl->tid, option->cpu.cpu_num, 
                  pe_ctl->domain, pe_ctl->multiplexed, 
                  pe_ctl->inherit ) != PAPI_OK) {
           return PAPI_EPERM;
       }
       /* looks like we are allowed so set cpu number */

       /* this tells the kernel not to count for a thread   */
       /* should we warn if we try to set both?  perf_event */
       /* will reject it.                                   */
       pe_ctl->tid = -1;      

       pe_ctl->cpu = option->cpu.cpu_num;

       return PAPI_OK;

      case PAPI_DOMAIN:
       pe_ctl = ( pe_control_t *) ( option->domain.ESI->ctl_state );
       if (check_permissions( pe_ctl->tid, pe_ctl->cpu, 
                  option->domain.domain, pe_ctl->multiplexed,
                  pe_ctl->inherit ) != PAPI_OK) {
          return PAPI_EPERM;
       }
       /* looks like we are allowed, so set counting domain */
       return _papi_pe_set_domain( pe_ctl, option->domain.domain );

      case PAPI_GRANUL:
       pe_ctl = (pe_control_t *) ( option->granularity.ESI->ctl_state );

       /* FIXME: we really don't support this yet */

           switch ( option->granularity.granularity  ) {
              case PAPI_GRN_PROCG:
              case PAPI_GRN_SYS:
              case PAPI_GRN_SYS_CPU:
              case PAPI_GRN_PROC:
           return PAPI_ECMP;
     
          /* Currently we only support thread granularity */
              case PAPI_GRN_THR:
           break;

              default:
           return PAPI_EINVAL;
       }
           return PAPI_OK;

      case PAPI_INHERIT:
       pe_ctl = (pe_control_t *) ( option->inherit.ESI->ctl_state );
       if (check_permissions( pe_ctl->tid, pe_ctl->cpu, pe_ctl->domain, 
                  pe_ctl->multiplexed, 
                  option->inherit.inherit ) != PAPI_OK) {
          return PAPI_EPERM;
       }
       /* looks like we are allowed, so set the requested inheritance */
       if (option->inherit.inherit) {
          /* children will inherit counters */
          pe_ctl->inherit = 1;
       } else {
          /* children won't inherit counters */
          pe_ctl->inherit = 0;
       }
       return PAPI_OK;

      case PAPI_DATA_ADDRESS:
       return PAPI_ENOSUPP;
#if 0
       pe_ctl = (pe_control_t *) (option->address_range.ESI->ctl_state);
       ret = set_default_domain( pe_ctl, option->address_range.domain );
       if ( ret != PAPI_OK ) {
          return ret;
       }
       set_drange( pe_ctx, pe_ctl, option );
       return PAPI_OK;
#endif
      case PAPI_INSTR_ADDRESS:
       return PAPI_ENOSUPP;
#if 0
       pe_ctl = (pe_control_t *) (option->address_range.ESI->ctl_state);
       ret = set_default_domain( pe_ctl, option->address_range.domain );
       if ( ret != PAPI_OK ) {
          return ret;
       }
       set_irange( pe_ctx, pe_ctl, option );
       return PAPI_OK;
#endif

      case PAPI_DEF_ITIMER:
       /* What should we be checking for here?                   */
       /* This seems like it should be OS-specific not component */
       /* specific.                                              */

       return PAPI_OK;
    
      case PAPI_DEF_MPX_NS:
       /* Defining a given ns per set is not current supported */
       return PAPI_ENOSUPP;
    
      case PAPI_DEF_ITIMER_NS:
       /* We don't support this... */
       return PAPI_OK;
    
      default:
       return PAPI_ENOSUPP;
   }
}

Here is the call graph for this function:

static void _papi_pe_dispatch_timer ( int  n,
void *  info,
void *  uc 
) [static]

< Force using Software

< Using Hardware

< EventSet has profiling enabled

< Force Software overflow in profiling

Definition at line 1672 of file perf_events.c.

{
   ( void ) n;               /*unused */
   _papi_hwi_context_t hw_context;
   int found_evt_idx = -1, fd = info->si_fd;
   caddr_t address;
   ThreadInfo_t *thread = _papi_hwi_lookup_thread( 0 );
   int cidx = _papi_pe_vector.cmp_info.CmpIdx;
   int i;
   pe_control_t *ctl;

   if ( thread == NULL ) {
      PAPIERROR( "thread == NULL in _papi_pe_dispatch_timer for fd %d!", fd );
      return;
   }

   if ( thread->running_eventset[cidx] == NULL ) {
      PAPIERROR( "thread->running_eventset == NULL in "
         "_papi_pe_dispatch_timer for fd %d!",fd );
      return;
   }

   if ( thread->running_eventset[cidx]->overflow.flags == 0 ) {
      PAPIERROR( "thread->running_eventset->overflow.flags == 0 in "
         "_papi_pe_dispatch_timer for fd %d!", fd );
      return;
   }
    
   hw_context.si = info;
   hw_context.ucontext = ( hwd_ucontext_t * ) uc;

   if ( thread->running_eventset[cidx]->overflow.flags & 
    PAPI_OVERFLOW_FORCE_SW ) {
      address = GET_OVERFLOW_ADDRESS( hw_context );
      _papi_hwi_dispatch_overflow_signal( ( void * ) &hw_context, 
                      address, NULL, 0,
                      0, &thread, cidx );
      return;
   }

   if ( thread->running_eventset[cidx]->overflow.flags !=
         PAPI_OVERFLOW_HARDWARE ) {
      PAPIERROR( "thread->running_eventset->overflow.flags is set to "
         "something other than PAPI_OVERFLOW_HARDWARE or "
         "PAPI_OVERFLOW_FORCE_SW for fd %d (%x)",
         fd , thread->running_eventset[cidx]->overflow.flags);
   }

   /* convoluted way to get ctl */
   ctl= thread->running_eventset[cidx]->ctl_state;

   /* See if the fd is one that's part of the this thread's context */
   for( i=0; i < ctl->num_events; i++ ) {
      if ( fd == ctl->events[i].event_fd ) {
     found_evt_idx = i;
     break;
      }
   }

   if ( found_evt_idx == -1 ) {
      PAPIERROR( "Unable to find fd %d among the open event fds "
         "_papi_hwi_dispatch_timer!", fd );
      return;
   }
    
   ioctl( fd, PERF_EVENT_IOC_DISABLE, NULL );

   if ( ( thread->running_eventset[cidx]->state & PAPI_PROFILING ) && 
    !( thread->running_eventset[cidx]->profile.flags & 
       PAPI_PROFIL_FORCE_SW ) ) {
      process_smpl_buf( found_evt_idx, &thread );
   }
   else {
      uint64_t ip;
      unsigned int head;
      pe_event_info_t *pe = &(ctl->events[found_evt_idx]);
      unsigned char *data = ((unsigned char*)pe->mmap_buf) + getpagesize(  );

      /*
       * Read up the most recent IP from the sample in the mmap buffer.  To
       * do this, we make the assumption that all of the records in the
       * mmap buffer are the same size, and that they all contain the IP as
       * their only record element.  This means that we can use the
       * data_head element from the user page and move backward one record
       * from that point and read the data.  Since we don't actually need
       * to access the header of the record, we can just subtract 8 (size
       * of the IP) from data_head and read up that word from the mmap
       * buffer.  After we subtract 8, we account for mmap buffer wrapping
       * by AND'ing this offset with the buffer mask.
       */
      head = mmap_read_head( pe );

      if ( head == 0 ) {
     PAPIERROR( "Attempting to access memory which may be inaccessable" );
     return;
      }

      ip = *( uint64_t * ) ( data + ( ( head - 8 ) & pe->mask ) );
      /*
       * Update the tail to the current head pointer. 
       *
       * Note: that if we were to read the record at the tail pointer,
       * rather than the one at the head (as you might otherwise think
       * would be natural), we could run into problems.  Signals don't
       * stack well on Linux, particularly if not using RT signals, and if
       * they come in rapidly enough, we can lose some.  Overtime, the head
       * could catch up to the tail and monitoring would be stopped, and
       * since no more signals are coming in, this problem will never be
       * resolved, resulting in a complete loss of overflow notification
       * from that point on.  So the solution we use here will result in
       * only the most recent IP value being read every time there are two
       * or more samples in the buffer (for that one overflow signal).  But
       * the handler will always bring up the tail, so the head should
       * never run into the tail.
       */
      mmap_write_tail( pe, head );

      /*
       * The fourth parameter is supposed to be a vector of bits indicating
       * the overflowed hardware counters, but it's not really clear that
       * it's useful, because the actual hardware counters used are not
       * exposed to the PAPI user.  For now, I'm just going to set the bit
       * that indicates which event register in the array overflowed.  The
       * result is that the overflow vector will not be identical to the
       * perfmon implementation, and part of that is due to the fact that
       * which hardware register is actually being used is opaque at the
       * user level (the kernel event dispatcher hides that info).
       */

      _papi_hwi_dispatch_overflow_signal( ( void * ) &hw_context,
                      ( caddr_t ) ( unsigned long ) ip,
                      NULL, ( 1 << found_evt_idx ), 0,
                      &thread, cidx );

   }

   /* Restart the counters */
   if (ioctl( fd, PERF_EVENT_IOC_REFRESH, 1 ) == -1) {
      PAPIERROR( "overflow refresh failed", 0 );
   }
}

Here is the call graph for this function:

static int _papi_pe_init_component ( int  cidx) [static]

< Component Index isn't set

< Component Index isn't set

< No error

< No error

Definition at line 963 of file perf_events.c.

{

   int retval;
   int paranoid_level;

   FILE *fff;

   ( void ) cidx;          /*unused */

   /* The is the official way to detect if perf_event support exists */
   /* The file is called perf_counter_paranoid on 2.6.31             */
   /* currently we are lazy and do not support 2.6.31 kernels        */
   fff=fopen("/proc/sys/kernel/perf_event_paranoid","r");
   if (fff==NULL) {
      strncpy(_papi_pe_vector.cmp_info.disabled_reason,
          "perf_event support not detected",PAPI_MAX_STR_LEN);
      return PAPI_ENOCMP;
   }

   /* 2 means no measurements allowed          */
   /* 1 means normal counter access            */
   /* 0 means you can access CPU-specific data */
   /* -1 means no restrictions                 */
   retval=fscanf(fff,"%d",&paranoid_level);
   if (retval!=1) fprintf(stderr,"Error reading paranoid level\n");
   fclose(fff);

   if (paranoid_level==2) {
      strncpy(_papi_pe_vector.cmp_info.disabled_reason,
          "/proc/sys/kernel/perf_event_paranoid prohibits using counters",
          PAPI_MAX_STR_LEN);
      return PAPI_ENOCMP;
   }

   /* Detect NMI watchdog which can steal counters */
   nmi_watchdog_active=_linux_detect_nmi_watchdog();
   if (nmi_watchdog_active) {
      SUBDBG("The Linux nmi_watchdog is using one of the performance "
             "counters, reducing the total number available.\n");
   }

   /* Kernel multiplexing is broken prior to kernel 2.6.34 */
   /* The fix was probably git commit:                     */
   /*     45e16a6834b6af098702e5ea6c9a40de42ff77d8         */
   if (_papi_os_info.os_version < LINUX_VERSION(2,6,34)) {
      _papi_pe_vector.cmp_info.kernel_multiplex = 0;
   }
   else {
      _papi_pe_vector.cmp_info.kernel_multiplex = 1;
   }

   /* We use the RealTime signal for some reason */
   _papi_pe_vector.cmp_info.hardware_intr_sig = SIGRTMIN + 2;

   /* Check that processor is supported */
   if (processor_supported(_papi_hwi_system_info.hw_info.vendor,
               _papi_hwi_system_info.hw_info.cpuid_family)!=
      PAPI_OK) {
      fprintf(stderr,"warning, your processor is unsupported\n");
      /* should not return error, as software events should still work */
   }

   /* Setup mmtimers, if appropriate */
   retval=mmtimer_setup();
   if (retval) {
      strncpy(_papi_pe_vector.cmp_info.disabled_reason,
          "Error initializing mmtimer",PAPI_MAX_STR_LEN);
      return retval;
   }

   /* Detect if we can use rdpmc (or equivalent) */
   /* We currently do not use rdpmc as it is slower in tests */
   /* than regular read (as of Linux 3.5)                    */
   retval=detect_rdpmc();
   if (retval < 0 ) {
      strncpy(_papi_pe_vector.cmp_info.disabled_reason,
         "Error detecting rdpmc",PAPI_MAX_STR_LEN);
      return retval;
   }
   _papi_pe_vector.cmp_info.fast_counter_read = retval;

   /* Run Vendor-specific fixups */
   pe_vendor_fixups();

   /* Run the libpfm4-specific setup */
   retval = _papi_libpfm4_init(&_papi_pe_vector, cidx);
   if (retval) {
      strncpy(_papi_pe_vector.cmp_info.disabled_reason,
          "Error initializing libpfm4",PAPI_MAX_STR_LEN);
      return retval;
   }

   return PAPI_OK;

}

Here is the call graph for this function:

static int _papi_pe_init_control_state ( void *  ctl) [static]

< No error

Definition at line 1427 of file perf_events.c.

{
   pe_control_t *pe_ctl = ( pe_control_t *) ctl;
    
   /* clear the contents */
   memset( pe_ctl, 0, sizeof ( pe_control_t ) );
   _papi_pe_set_domain( ctl, _papi_pe_vector.cmp_info.default_domain );

   /* Set cpu number in the control block to show events */
   /* are not tied to specific cpu                       */
   pe_ctl->cpu = -1;
   return PAPI_OK;
}

Here is the call graph for this function:

static int _papi_pe_init_thread ( void *  hwd_ctx) [static]

< No error

Definition at line 1073 of file perf_events.c.

{

  pe_context_t *pe_ctx = ( pe_context_t *) hwd_ctx;

  /* clear the context structure and mark as initialized */
  memset( pe_ctx, 0, sizeof ( pe_context_t ) );
  pe_ctx->initialized=1;

  return PAPI_OK;
}
static int _papi_pe_read ( void *  ctx,
void *  ctl,
long long **  events,
int  flags 
) [static]

< A System/C library call failed

< A System/C library call failed

< A System/C library call failed

< A System/C library call failed

< A System/C library call failed

< A System/C library call failed

< A System/C library call failed

< A System/C library call failed

< A System/C library call failed

< No error

Definition at line 1156 of file perf_events.c.

{
   ( void ) flags;           /*unused */
   int i, ret = -1;
   pe_context_t *pe_ctx = ( pe_context_t *) ctx;
   pe_control_t *pe_ctl = ( pe_control_t *) ctl;
   long long papi_pe_buffer[READ_BUFFER_SIZE];
   long long tot_time_running, tot_time_enabled, scale;

   /* On kernels before 2.6.33 the TOTAL_TIME_ENABLED and TOTAL_TIME_RUNNING */
   /* fields are always 0 unless the counter is disabled.  So if we are on   */
   /* one of these kernels, then we must disable events before reading.      */

   /* Elsewhere though we disable multiplexing on kernels before 2.6.34 */
   /* so maybe this isn't even necessary.                               */

   if (bug_sync_read()) {
      if ( pe_ctx->state & PERF_EVENTS_RUNNING ) {
         for ( i = 0; i < pe_ctl->num_events; i++ ) {
        /* disable only the group leaders */
        if ( pe_ctl->events[i].group_leader_fd == -1 ) {
           ret = ioctl( pe_ctl->events[i].event_fd, 
               PERF_EVENT_IOC_DISABLE, NULL );
           if ( ret == -1 ) {
              PAPIERROR("ioctl(PERF_EVENT_IOC_DISABLE) "
               "returned an error: ", strerror( errno ));
              return PAPI_ESYS;
           }
        }
     }
      }
   }


   /* Handle case where we are multiplexing */
   if (pe_ctl->multiplexed) {

      /* currently we handle multiplexing by having individual events */
      /* so we read from each in turn.                                */

      for ( i = 0; i < pe_ctl->num_events; i++ ) {
             
         ret = read( pe_ctl->events[i].event_fd, papi_pe_buffer, 
            sizeof ( papi_pe_buffer ) );
         if ( ret == -1 ) {
        PAPIERROR("read returned an error: ", strerror( errno ));
        return PAPI_ESYS;
     }

     /* We should read 3 64-bit values from the counter */
     if (ret<(signed)(3*sizeof(long long))) {
        PAPIERROR("Error!  short read!\n");  
        return PAPI_ESYS;
     }        

         SUBDBG("read: fd: %2d, tid: %ld, cpu: %d, ret: %d\n", 
            pe_ctl->events[i].event_fd, 
        (long)pe_ctl->tid, pe_ctl->cpu, ret);
         SUBDBG("read: %lld %lld %lld\n",papi_pe_buffer[0],
            papi_pe_buffer[1],papi_pe_buffer[2]);

         tot_time_enabled = papi_pe_buffer[1];     
         tot_time_running = papi_pe_buffer[2];

         SUBDBG("count[%d] = (papi_pe_buffer[%d] %lld * "
        "tot_time_enabled %lld) / tot_time_running %lld\n",
        i, 0,papi_pe_buffer[0],
        tot_time_enabled,tot_time_running);
    
         if (tot_time_running == tot_time_enabled) {
        /* No scaling needed */
        pe_ctl->counts[i] = papi_pe_buffer[0];
         } else if (tot_time_running && tot_time_enabled) {
        /* Scale factor of 100 to avoid overflows when computing */
        /*enabled/running */

        scale = (tot_time_enabled * 100LL) / tot_time_running;
        scale = scale * papi_pe_buffer[0];
        scale = scale / 100LL;
        pe_ctl->counts[i] = scale;
     } else {
       /* This should not happen, but Phil reports it sometime does. */
        SUBDBG("perf_event kernel bug(?) count, enabled, "
           "running: %lld, %lld, %lld\n",
           papi_pe_buffer[0],tot_time_enabled,
           tot_time_running);

        pe_ctl->counts[i] = papi_pe_buffer[0];
     }
      }
   }

   /* Handle cases where we cannot use FORMAT GROUP */
   else if (bug_format_group() || pe_ctl->inherit) {

      /* we must read each counter individually */
      for ( i = 0; i < pe_ctl->num_events; i++ ) {

         ret = read( pe_ctl->events[i].event_fd, papi_pe_buffer, 
            sizeof ( papi_pe_buffer ) );
         if ( ret == -1 ) {
        PAPIERROR("read returned an error: ", strerror( errno ));
        return PAPI_ESYS;
     }

     /* we should read one 64-bit value from each counter */
     if (ret!=sizeof(long long)) {
        PAPIERROR("Error!  short read!\n");
        PAPIERROR("read: fd: %2d, tid: %ld, cpu: %d, ret: %d\n",
           pe_ctl->events[i].event_fd,
           (long)pe_ctl->tid, pe_ctl->cpu, ret);
        return PAPI_ESYS;
     }     

         SUBDBG("read: fd: %2d, tid: %ld, cpu: %d, ret: %d\n", 
            pe_ctl->events[i].event_fd, (long)pe_ctl->tid, 
        pe_ctl->cpu, ret);
         SUBDBG("read: %lld\n",papi_pe_buffer[0]);
     
     pe_ctl->counts[i] = papi_pe_buffer[0];
      }
   }

   
   /* Handle cases where we are using FORMAT_GROUP   */
   /* We assume only one group leader, in position 0 */

   else {
      if (pe_ctl->events[0].group_leader_fd!=-1) {
     PAPIERROR("Was expecting group leader!\n");
      }

      ret = read( pe_ctl->events[0].event_fd, papi_pe_buffer, 
          sizeof ( papi_pe_buffer ) );

      if ( ret == -1 ) {
     PAPIERROR("read returned an error: ", strerror( errno ));
     return PAPI_ESYS;
      }

      /* we read 1 64-bit value (number of events) then     */
      /* num_events more 64-bit values that hold the counts */
      if (ret<(signed)((1+pe_ctl->num_events)*sizeof(long long))) {
     PAPIERROR("Error! short read!\n");
     return PAPI_ESYS;
      }

      SUBDBG("read: fd: %2d, tid: %ld, cpu: %d, ret: %d\n", 
         pe_ctl->events[0].event_fd, 
         (long)pe_ctl->tid, pe_ctl->cpu, ret);
      { 
     int j;
     for(j=0;j<ret/8;j++) {
            SUBDBG("read %d: %lld\n",j,papi_pe_buffer[j]);
     }
      }

      /* Make sure the kernel agrees with how many events we have */
      if (papi_pe_buffer[0]!=pe_ctl->num_events) {
     PAPIERROR("Error!  Wrong number of events!\n");
     return PAPI_ESYS;
      }

      /* put the count values in their proper location */
      for(i=0;i<papi_pe_buffer[0];i++) {
         pe_ctl->counts[i] = papi_pe_buffer[1+i];
      }
   }


   /* If we disabled the counters due to the sync_read_bug(), */
   /* then we need to re-enable them now.                     */
   if (bug_sync_read()) {
      if ( pe_ctx->state & PERF_EVENTS_RUNNING ) {
         for ( i = 0; i < pe_ctl->num_events; i++ ) {
        if ( pe_ctl->events[i].group_leader_fd == -1 ) {
           /* this should refresh any overflow counters too */
           ret = ioctl( pe_ctl->events[i].event_fd, 
                PERF_EVENT_IOC_ENABLE, NULL );
           if ( ret == -1 ) {
              /* Should never happen */
              PAPIERROR("ioctl(PERF_EVENT_IOC_ENABLE) returned an error: ",
                strerror( errno ));
              return PAPI_ESYS;
           }
        }
     }
      }
   }

   /* point PAPI to the values we read */
   *events = pe_ctl->counts;

   return PAPI_OK;
}

Here is the call graph for this function:

static int _papi_pe_reset ( void *  ctx,
void *  ctl 
) [static]

< A System/C library call failed

< No error

Definition at line 1101 of file perf_events.c.

{
   int i, ret;
   pe_control_t *pe_ctl = ( pe_control_t *) ctl;

   ( void ) ctx;             /*unused */

   /* We need to reset all of the events, not just the group leaders */
   for( i = 0; i < pe_ctl->num_events; i++ ) {
      ret = ioctl( pe_ctl->events[i].event_fd, PERF_EVENT_IOC_RESET, NULL );
      if ( ret == -1 ) {
     PAPIERROR("ioctl(%d, PERF_EVENT_IOC_RESET, NULL) "
           "returned error, Linux says: %s",
           pe_ctl->events[i].event_fd, strerror( errno ) );
     return PAPI_ESYS;
      }
   }

   return PAPI_OK;
}

Here is the call graph for this function:

Here is the caller graph for this function:

int _papi_pe_set_domain ( void *  ctl,
int  domain 
)

< User context counted

< Kernel/OS context counted

< Supervisor/hypervisor context counted

< No error

Definition at line 937 of file perf_events.c.

{
    
   int i;
   pe_control_t *pe_ctl = ( pe_control_t *) ctl;

   SUBDBG("old control domain %d, new domain %d, default domain %d\n",
      pe_ctl->domain,domain,_papi_pe_vector.cmp_info.default_domain);

   pe_ctl->domain = domain;
     
   /* Force the domain on all events */
   for( i = 0; i < pe_ctl->num_events; i++ ) {
      pe_ctl->events[i].attr.exclude_user = 
                    !( pe_ctl->domain & PAPI_DOM_USER );
      pe_ctl->events[i].attr.exclude_kernel =
            !( pe_ctl->domain & PAPI_DOM_KERNEL );
      pe_ctl->events[i].attr.exclude_hv =
            !( pe_ctl->domain & PAPI_DOM_SUPERVISOR );
   }
   return PAPI_OK;
}

Here is the caller graph for this function:

static int _papi_pe_set_overflow ( EventSetInfo_t ESI,
int  EventIndex,
int  threshold 
) [static]

< No error

< Invalid argument

< Invalid argument

< Internal error, please send mail to the developers

< No error

Definition at line 1843 of file perf_events.c.

{
   int cidx = _papi_pe_vector.cmp_info.CmpIdx;
   pe_context_t *ctx = ( pe_context_t *) ( ESI->master->context[cidx] );
   pe_control_t *ctl = (pe_control_t *) ( ESI->ctl_state );
   int i, evt_idx, found_non_zero_sample_period = 0, retval = PAPI_OK;

   evt_idx = ESI->EventInfoArray[EventIndex].pos[0];

   SUBDBG("Attempting to set overflow for index %d (%d) of EventSet %d\n",
      evt_idx,EventIndex,ESI->EventSetIndex);

   if (evt_idx<0) {
      return PAPI_EINVAL;
   }

   if ( threshold == 0 ) {
      /* If this counter isn't set to overflow, it's an error */
      if ( ctl->events[evt_idx].attr.sample_period == 0 ) return PAPI_EINVAL;
   }

   ctl->events[evt_idx].attr.sample_period = threshold;

   /*
    * Note that the wakeup_mode field initially will be set to zero
    * (WAKEUP_MODE_COUNTER_OVERFLOW) as a result of a call to memset 0 to
    * all of the events in the ctl struct.
    *
    * Is it even set to any other value elsewhere?
    */
   switch ( ctl->events[evt_idx].wakeup_mode ) {
    case WAKEUP_MODE_PROFILING:
         /* Setting wakeup_events to special value zero means issue a */
         /* wakeup (signal) on every mmap page overflow.              */
         ctl->events[evt_idx].attr.wakeup_events = 0;
         break;

    case WAKEUP_MODE_COUNTER_OVERFLOW:
         /* Can this code ever be called? */

         /* Setting wakeup_events to one means issue a wakeup on every */
             /* counter overflow (not mmap page overflow).                 */
         ctl->events[evt_idx].attr.wakeup_events = 1;
         /* We need the IP to pass to the overflow handler */
         ctl->events[evt_idx].attr.sample_type = PERF_SAMPLE_IP;
         /* one for the user page, and two to take IP samples */
         ctl->events[evt_idx].nr_mmap_pages = 1 + 2;
         break;
    default:
         PAPIERROR( "ctl->wakeup_mode[%d] set to an unknown value - %u",
             evt_idx, ctl->events[evt_idx].wakeup_mode);
         return PAPI_EBUG;
   }

   /* Check for non-zero sample period */
   for ( i = 0; i < ctl->num_events; i++ ) {
      if ( ctl->events[evt_idx].attr.sample_period ) {
     found_non_zero_sample_period = 1;
     break;
      }
   }

   if ( found_non_zero_sample_period ) {
      /* turn on internal overflow flag for this event set */
      ctl->overflow = 1;
        
      /* Enable the signal handler */
      retval = _papi_hwi_start_signal( 
                  _papi_pe_vector.cmp_info.hardware_intr_sig, 
                  1, _papi_pe_vector.cmp_info.CmpIdx );
   } else {
      /* turn off internal overflow flag for this event set */
      ctl->overflow = 0;
        
      /* Remove the signal handler, if there are no remaining non-zero */
      /* sample_periods set                                            */
      retval = _papi_hwi_stop_signal( 
                 _papi_pe_vector.cmp_info.hardware_intr_sig );
      if ( retval != PAPI_OK ) return retval;
   }
    
   retval = _papi_pe_update_control_state( ctl, NULL,
                ( (pe_control_t *) (ESI->ctl_state) )->num_events,
                       ctx );

   return retval;
}

Here is the call graph for this function:

Here is the caller graph for this function:

static int _papi_pe_set_profile ( EventSetInfo_t ESI,
int  EventIndex,
int  threshold 
) [static]

< EventSet has overflowing enabled

< Using Hardware

< Use data address register profiling

< Use instruction address register profiling

< Not supported

< Drop a random 25% of the samples.

< Not supported

< No error

< No error

Definition at line 1933 of file perf_events.c.

{
   int ret;
   int evt_idx;
   pe_control_t *ctl = ( pe_control_t *) ( ESI->ctl_state );

   /* Since you can't profile on a derived event, the event is always the */
   /* first and only event in the native event list.                      */
   evt_idx = ESI->EventInfoArray[EventIndex].pos[0];

   if ( threshold == 0 ) {
      SUBDBG( "MUNMAP(%p,%"PRIu64")\n", ctl->events[evt_idx].mmap_buf,
          ( uint64_t ) ctl->events[evt_idx].nr_mmap_pages *
          getpagesize(  ) );

      if ( ctl->events[evt_idx].mmap_buf ) {
     munmap( ctl->events[evt_idx].mmap_buf,
         ctl->events[evt_idx].nr_mmap_pages * getpagesize() );
      }

      ctl->events[evt_idx].mmap_buf = NULL;
      ctl->events[evt_idx].nr_mmap_pages = 0;
      ctl->events[evt_idx].attr.sample_type &= ~PERF_SAMPLE_IP;
      ret = _papi_pe_set_overflow( ESI, EventIndex, threshold );
      /* ??? #warning "This should be handled somewhere else" */
      ESI->state &= ~( PAPI_OVERFLOWING );
      ESI->overflow.flags &= ~( PAPI_OVERFLOW_HARDWARE );

      return ret;
   }

   /* Look up the native event code */
   if ( ESI->profile.flags & (PAPI_PROFIL_DATA_EAR | PAPI_PROFIL_INST_EAR)) {
      /* Not supported yet... */

      return PAPI_ENOSUPP;
   }

   if ( ESI->profile.flags & PAPI_PROFIL_RANDOM ) {
      /* This requires an ability to randomly alter the sample_period within */
      /* a given range.  Kernel does not have this ability. FIXME            */
      return PAPI_ENOSUPP;
   }

   /* Just a guess at how many pages would make this relatively efficient.  */
   /* Note that it's "1 +" because of the need for a control page, and the  */
   /* number following the "+" must be a power of 2 (1, 4, 8, 16, etc) or   */
   /* zero.  This is required to optimize dealing with circular buffer      */
   /* wrapping of the mapped pages.                                         */

   ctl->events[evt_idx].nr_mmap_pages = (1+8);
   ctl->events[evt_idx].attr.sample_type |= PERF_SAMPLE_IP;

   ret = _papi_pe_set_overflow( ESI, EventIndex, threshold );
   if ( ret != PAPI_OK ) return ret;

   return PAPI_OK;
}

Here is the call graph for this function:

static int _papi_pe_shutdown_component ( void  ) [static]

< No error

Definition at line 1062 of file perf_events.c.

                                    {

  /* Shutdown libpfm4 */
  _papi_libpfm4_shutdown();

  return PAPI_OK;
}

Here is the call graph for this function:

static int _papi_pe_shutdown_thread ( void *  ctx) [static]

< No error

Definition at line 1087 of file perf_events.c.

{
    pe_context_t *pe_ctx = ( pe_context_t *) ctx;

    pe_ctx->initialized=0;

    return PAPI_OK;
}
static int _papi_pe_start ( void *  ctx,
void *  ctl 
) [static]

< A System/C library call failed

< Internal error, please send mail to the developers

< No error

Definition at line 1355 of file perf_events.c.

{
   int ret;
   int i;
   int did_something = 0;
   pe_context_t *pe_ctx = ( pe_context_t *) ctx;
   pe_control_t *pe_ctl = ( pe_control_t *) ctl;

   /* Reset the counters first.  Is this necessary? */
   ret = _papi_pe_reset( pe_ctx, pe_ctl );
   if ( ret ) {
      return ret;
   }

   /* Enable all of the group leaders                */
   /* All group leaders have a group_leader_fd of -1 */
   for( i = 0; i < pe_ctl->num_events; i++ ) {
      if (pe_ctl->events[i].group_leader_fd == -1) {
     SUBDBG("ioctl(enable): fd: %d\n", pe_ctl->events[i].event_fd);
     ret=ioctl( pe_ctl->events[i].event_fd, PERF_EVENT_IOC_ENABLE, NULL) ; 

     /* ioctls always return -1 on failure */
         if (ret == -1) {
            PAPIERROR("ioctl(PERF_EVENT_IOC_ENABLE) failed.\n");
            return PAPI_ESYS;
     }

     did_something++;
      } 
   }

   if (!did_something) {
      PAPIERROR("Did not enable any counters.\n");
      return PAPI_EBUG;
   }

   pe_ctx->state |= PERF_EVENTS_RUNNING;

   return PAPI_OK;

}

Here is the call graph for this function:

static int _papi_pe_stop ( void *  ctx,
void *  ctl 
) [static]

< Internal error, please send mail to the developers

< No error

Definition at line 1399 of file perf_events.c.

{
    
   int ret;
   int i;
   pe_context_t *pe_ctx = ( pe_context_t *) ctx;
   pe_control_t *pe_ctl = ( pe_control_t *) ctl;

   /* Just disable the group leaders */
   for ( i = 0; i < pe_ctl->num_events; i++ ) {
      if ( pe_ctl->events[i].group_leader_fd == -1 ) {
     ret=ioctl( pe_ctl->events[i].event_fd, PERF_EVENT_IOC_DISABLE, NULL);
     if ( ret == -1 ) {
        PAPIERROR( "ioctl(%d, PERF_EVENT_IOC_DISABLE, NULL) "
               "returned error, Linux says: %s",
               pe_ctl->events[i].event_fd, strerror( errno ) );
        return PAPI_EBUG;
     }
      }
   }

   pe_ctx->state &= ~PERF_EVENTS_RUNNING;

   return PAPI_OK;
}

Here is the call graph for this function:

static int _papi_pe_stop_profiling ( ThreadInfo_t thread,
EventSetInfo_t ESI 
) [static]

< No error

Definition at line 1816 of file perf_events.c.

{
   int i, ret = PAPI_OK;
   pe_control_t *ctl;

   ctl=ESI->ctl_state;

   /* Loop through all of the events and process those which have mmap */
   /* buffers attached.                                                */
   for ( i = 0; i < ctl->num_events; i++ ) {
      /* Use the mmap_buf field as an indicator of this fd being used for */
      /* profiling.                                                       */
      if ( ctl->events[i].mmap_buf ) {
     /* Process any remaining samples in the sample buffer */
     ret = process_smpl_buf( i, &thread );
     if ( ret ) {
        PAPIERROR( "process_smpl_buf returned error %d", ret );
        return ret;
     }
      }
   }
   return ret;
}

Here is the call graph for this function:

static int _papi_pe_update_control_state ( void *  ctl,
NativeInfo_t native,
int  count,
void *  ctx 
) [static]

< No error

< No error

< No error

< No error

Definition at line 1447 of file perf_events.c.

{
   int i = 0, ret;
   pe_context_t *pe_ctx = ( pe_context_t *) ctx;
   pe_control_t *pe_ctl = ( pe_control_t *) ctl;

   /* close all of the existing fds and start over again */
   /* In theory we could have finer-grained control and know if             */
   /* things were changed, but it's easier to tear things down and rebuild. */
   close_pe_events( pe_ctx, pe_ctl );

   /* Calling with count==0 should be OK, it's how things are deallocated */
   /* when an eventset is destroyed.                                      */
   if ( count == 0 ) {
      SUBDBG( "Called with count == 0\n" );
      return PAPI_OK;
   }

   /* set up all the events */
   for( i = 0; i < count; i++ ) {
      if ( native ) {
     /* Have libpfm4 set the config values for the event */
     ret=_papi_libpfm4_setup_counters(&pe_ctl->events[i].attr,
                     native[i].ni_event);
     SUBDBG( "pe_ctl->eventss[%d].config=%"PRIx64"\n",i,
         pe_ctl->events[i].attr.config);
     if (ret!=PAPI_OK) return ret;

      } else {
      /* I'm not sure how we'd end up in this case */
          /* should it be an error?                    */
      }

      /* Copy the inherit flag into the attribute block that will be   */
      /* passed to the kernel */
      pe_ctl->events[i].attr.inherit = pe_ctl->inherit;

      /* Set the position in the native structure */
      /* We just set up events linearly           */
      if ( native ) {
     native[i].ni_position = i;
      }
   }

   pe_ctl->num_events = count;
   _papi_pe_set_domain( ctl, pe_ctl->domain );

   /* actuall open the events */
   /* (why is this a separate function?) */
   ret = open_pe_events( pe_ctx, pe_ctl );
   if ( ret != PAPI_OK ) {
      SUBDBG("open_pe_events failed\n");
      /* Restore values ? */
      return ret;
   }

   return PAPI_OK;
}

Here is the call graph for this function:

Here is the caller graph for this function:

static int _papi_pe_write ( void *  ctx,
void *  ctl,
long long *  from 
) [static]

< Not supported

Definition at line 1126 of file perf_events.c.

{
   ( void ) ctx;             /*unused */
   ( void ) ctl;             /*unused */
   ( void ) from;            /*unused */
   /*
    * Counters cannot be written.  Do we need to virtualize the
    * counters so that they can be written, or perhaps modify code so that
    * they can be written? FIXME ?
    */
    
    return PAPI_ENOSUPP;
}
static int bug_check_scheduability ( void  ) [static]

Definition at line 120 of file perf_events.c.

                              {

#if defined(__powerpc__)
  /* PowerPC not affected by this bug */
#elif defined(__mips__)
  /* MIPS as of kernel 3.1 does not properly detect schedulability */
  return 1;
#else
  if (_papi_os_info.os_version < LINUX_VERSION(2,6,33)) return 1;
#endif

  if (nmi_watchdog_active) return 1;

  return 0;
}

Here is the caller graph for this function:

static int bug_format_group ( void  ) [static]

Definition at line 142 of file perf_events.c.

                       {

  if (_papi_os_info.os_version < LINUX_VERSION(2,6,34)) return 1;

  /* MIPS, as of version 3.1, does not support this properly */

#if defined(__mips__)
  return 1;
#endif

  return 0;

}

Here is the caller graph for this function:

static int bug_sync_read ( void  ) [static]

Definition at line 162 of file perf_events.c.

                    {

  if (_papi_os_info.os_version < LINUX_VERSION(2,6,33)) return 1;

  return 0;

}

Here is the caller graph for this function:

static int check_permissions ( unsigned long  tid,
unsigned int  cpu_num,
unsigned int  domain,
unsigned int  multiplex,
unsigned int  inherit 
) [static]

Check if the current set of options is supported by

< Supervisor/hypervisor context counted

< User context counted

< Kernel/OS context counted

< Permission level does not permit operation

< No error

Definition at line 268 of file perf_events.c.

{
   int ev_fd;
   struct perf_event_attr attr;

   /* clearing this will set a type of hardware and to count all domains */
   memset(&attr, '\0', sizeof(attr));
   attr.read_format = get_read_format(multiplex, inherit, 1);

   /* set the event id (config field) to instructios */
   /* (an event that should always exist)            */
   /* This was cycles but that is missing on Niagara */
   attr.config = PERF_COUNT_HW_INSTRUCTIONS;
    
   /* now set up domains this event set will be counting */
   if (!(domain & PAPI_DOM_SUPERVISOR)) {
      attr.exclude_hv = 1;
   }
   if (!(domain & PAPI_DOM_USER)) {
      attr.exclude_user = 1;
   }
   if (!(domain & PAPI_DOM_KERNEL)) {
      attr.exclude_kernel = 1;
   }

   SUBDBG("Calling sys_perf_event_open() from check_permissions\n");

   ev_fd = sys_perf_event_open( &attr, tid, cpu_num, -1, 0 );
   if ( ev_fd == -1 ) {
      SUBDBG("sys_perf_event_open returned error.  Linux says, %s", 
         strerror( errno ) );
      return PAPI_EPERM;
   }
    
   /* now close it, this was just to make sure we have permissions */
   /* to set these options                                         */
   close(ev_fd);
   return PAPI_OK;
}

Here is the call graph for this function:

Here is the caller graph for this function:

static int check_scheduability ( pe_context_t ctx,
pe_control_t ctl,
int  idx 
) [static]

< A System/C library call failed

< A System/C library call failed

< A System/C library call failed

< Event exists, but cannot be counted due to counter resource limitations

< A System/C library call failed

< No error

Definition at line 328 of file perf_events.c.

{
   int retval = 0, cnt = -1;
   ( void ) ctx;             /*unused */
   long long papi_pe_buffer[READ_BUFFER_SIZE];
   int i,group_leader_fd;

   if (bug_check_scheduability()) {

      /* If the kernel isn't tracking scheduability right       */
      /* Then we need to start/stop/read to force the event     */
      /* to be scheduled and see if an error condition happens. */

      /* get the proper fd to start */
      group_leader_fd=ctl->events[idx].group_leader_fd;
      if (group_leader_fd==-1) group_leader_fd=ctl->events[idx].event_fd;

      /* start the event */
      retval = ioctl( group_leader_fd, PERF_EVENT_IOC_ENABLE, NULL );
      if (retval == -1) {
     PAPIERROR("ioctl(PERF_EVENT_IOC_ENABLE) failed.\n");
     return PAPI_ESYS;
      }

      /* stop the event */
      retval = ioctl(group_leader_fd, PERF_EVENT_IOC_DISABLE, NULL );
      if (retval == -1) {
     PAPIERROR( "ioctl(PERF_EVENT_IOC_DISABLE) failed.\n" );
     return PAPI_ESYS;
      }

      /* See if a read returns any results */
      cnt = read( group_leader_fd, papi_pe_buffer, sizeof(papi_pe_buffer));
      if ( cnt == -1 ) {
     SUBDBG( "read returned an error!  Should never happen.\n" );
     return PAPI_ESYS;
      }

      if ( cnt == 0 ) {
         /* We read 0 bytes if we could not schedule the event */
         /* The kernel should have detected this at open       */
         /* but various bugs (including NMI watchdog)          */
         /* result in this behavior                            */

     return PAPI_ECNFLCT;

     } else {

    /* Reset all of the counters (opened so far) back to zero      */
    /* from the above brief enable/disable call pair.              */

    /* We have to reset all events because reset of group leader      */
        /* does not reset all.                                            */
    /* we assume that the events are being added one by one and that  */
        /* we do not need to reset higher events (doing so may reset ones */
        /* that have not been initialized yet.                            */

    /* Note... PERF_EVENT_IOC_RESET does not reset time running       */
    /* info if multiplexing, so we should avoid coming here if        */
    /* we are multiplexing the event.                                 */
        for( i = 0; i < idx; i++) {
       retval=ioctl( ctl->events[i].event_fd, PERF_EVENT_IOC_RESET, NULL );
       if (retval == -1) {
          PAPIERROR( "ioctl(PERF_EVENT_IOC_RESET) #%d/%d %d "
             "(fd %d)failed.\n",
             i,ctl->num_events,idx,ctl->events[i].event_fd);
          return PAPI_ESYS;
       }
    }
      }
   }
   return PAPI_OK;
}

Here is the call graph for this function:

Here is the caller graph for this function:

static int close_pe_events ( pe_context_t ctx,
pe_control_t ctl 
) [static]

< A System/C library call failed

< A System/C library call failed

< A System/C library call failed

< A System/C library call failed

< Internal error, please send mail to the developers

< No error

Definition at line 589 of file perf_events.c.

{
   int i;
   int num_closed=0;

   /* should this be a more serious error? */
   if ( ctx->state & PERF_EVENTS_RUNNING ) {
      SUBDBG("Closing without stopping first\n");
   }

   /* Close child events first */
   for( i=0; i<ctl->num_events; i++ ) {

      if (ctl->events[i].event_opened) {

         if (ctl->events[i].group_leader_fd!=-1) {
            if ( ctl->events[i].mmap_buf ) {
           if ( munmap ( ctl->events[i].mmap_buf,
                     ctl->events[i].nr_mmap_pages * getpagesize() ) ) {
              PAPIERROR( "munmap of fd = %d returned error: %s",
                 ctl->events[i].event_fd, strerror( errno ) );
              return PAPI_ESYS;
           }
        }

            if ( close( ctl->events[i].event_fd ) ) {
           PAPIERROR( "close of fd = %d returned error: %s",
               ctl->events[i].event_fd, strerror( errno ) );
           return PAPI_ESYS;
        } else {
           num_closed++;
        }
        ctl->events[i].event_opened=0;
     }
      }
   }

   /* Close the group leaders last */
   for( i=0; i<ctl->num_events; i++ ) {

      if (ctl->events[i].event_opened) {

         if (ctl->events[i].group_leader_fd==-1) {
            if ( ctl->events[i].mmap_buf ) {
           if ( munmap ( ctl->events[i].mmap_buf,
                     ctl->events[i].nr_mmap_pages * getpagesize() ) ) {
              PAPIERROR( "munmap of fd = %d returned error: %s",
                 ctl->events[i].event_fd, strerror( errno ) );
              return PAPI_ESYS;
           }
        }


            if ( close( ctl->events[i].event_fd ) ) {
           PAPIERROR( "close of fd = %d returned error: %s",
               ctl->events[i].event_fd, strerror( errno ) );
           return PAPI_ESYS;
        } else {
           num_closed++;
        }
        ctl->events[i].event_opened=0;
     }
      }
   }


   if (ctl->num_events!=num_closed) {
      PAPIERROR("Didn't close all events\n");
      return PAPI_EBUG;
   }

   ctl->num_events=0;

   ctx->state &= ~PERF_EVENTS_OPENED;

   return PAPI_OK;
}

Here is the call graph for this function:

Here is the caller graph for this function:

static int detect_rdpmc ( void  ) [static]

< A System/C library call failed

< A System/C library call failed

Definition at line 710 of file perf_events.c.

                              {

   struct perf_event_attr pe;
   int fd,rdpmc_exists=1;
   void *addr;
   struct perf_event_mmap_page *our_mmap;

   /* Create a fake instructions event so we can read a mmap page */
   memset(&pe,0,sizeof(struct perf_event_attr));

   pe.type=PERF_TYPE_HARDWARE;
   pe.size=sizeof(struct perf_event_attr);
   pe.config=PERF_COUNT_HW_INSTRUCTIONS;

   fd=sys_perf_event_open(&pe,0,-1,-1,0);
   if (fd<0) {
      return PAPI_ESYS;
   }

   /* create the mmap page */
   addr=mmap(NULL, 4096, PROT_READ, MAP_SHARED,fd,0);
   if (addr == (void *)(-1)) {
      close(fd);
      return PAPI_ESYS;
   }

   /* get the rdpmc info */
   our_mmap=(struct perf_event_mmap_page *)addr;
   if (our_mmap->cap_usr_rdpmc==0) {
      rdpmc_exists=0;
   }

   /* close the fake event */
   munmap(addr,4096);
   close(fd);

   return rdpmc_exists;

} 

Here is the call graph for this function:

Here is the caller graph for this function:

static int fcntl_setown_fd ( int  fd) [static]

< A System/C library call failed

< A System/C library call failed

< No error

Definition at line 176 of file perf_events.c.

                        {

   int ret;
   struct f_owner_ex fown_ex;

      /* F_SETOWN_EX is not available until 2.6.32 */
   if (_papi_os_info.os_version < LINUX_VERSION(2,6,32)) {
       
      /* get ownership of the descriptor */
      ret = fcntl( fd, F_SETOWN, mygettid(  ) );
      if ( ret == -1 ) {
     PAPIERROR( "cannot fcntl(F_SETOWN) on %d: %s", fd, strerror(errno) );
     return PAPI_ESYS;
      }
   }
   else {
      /* set ownership of the descriptor */   
      fown_ex.type = F_OWNER_TID;
      fown_ex.pid  = mygettid();
      ret = fcntl(fd, F_SETOWN_EX, (unsigned long)&fown_ex );
   
      if ( ret == -1 ) {
     PAPIERROR( "cannot fcntl(F_SETOWN_EX) on %d: %s", 
            fd, strerror( errno ) );
     return PAPI_ESYS;
      }
   }
   return PAPI_OK;
}

Here is the call graph for this function:

Here is the caller graph for this function:

static int find_profile_index ( EventSetInfo_t ESI,
int  evt_idx,
int *  flags,
unsigned int *  native_index,
int *  profile_index 
) [static]

< No error

< Internal error, please send mail to the developers

Definition at line 752 of file perf_events.c.

{
   int pos, esi_index, count;

   for ( count = 0; count < ESI->profile.event_counter; count++ ) {
       esi_index = ESI->profile.EventIndex[count];
       pos = ESI->EventInfoArray[esi_index].pos[0];
        
       if ( pos == evt_idx ) {
      *profile_index = count;
      *native_index = ESI->NativeInfoArray[pos].ni_event & 
                      PAPI_NATIVE_AND_MASK;
      *flags = ESI->profile.flags;
      SUBDBG( "Native event %d is at profile index %d, flags %d\n",
          *native_index, *profile_index, *flags );
      return PAPI_OK;
       }
   }

   PAPIERROR( "wrong count: %d vs. ESI->profile.event_counter %d", count,
          ESI->profile.event_counter );
   return PAPI_EBUG;
}

Here is the call graph for this function:

Here is the caller graph for this function:

static unsigned int get_read_format ( unsigned int  multiplex,
unsigned int  inherit,
int  format_group 
) [static]

Definition at line 229 of file perf_events.c.

{
   unsigned int format = 0;

   /* if we need read format options for multiplexing, add them now */
   if (multiplex) {
      format |= PERF_FORMAT_TOTAL_TIME_ENABLED;
      format |= PERF_FORMAT_TOTAL_TIME_RUNNING;
   }

   /* if our kernel supports it and we are not using inherit, */
   /* add the group read options                              */
   if ( (!bug_format_group()) && !inherit) {
      if (format_group) {
     format |= PERF_FORMAT_GROUP;
      }
   }

   SUBDBG("multiplex: %d, inherit: %d, group_leader: %d, format: 0x%x\n",
      multiplex, inherit, format_group, format);

   return format;
}

Here is the call graph for this function:

Here is the caller graph for this function:

static void mmap_read ( ThreadInfo_t **  thr,
pe_event_info_t pe,
int  profile_index 
) [static]

Definition at line 828 of file perf_events.c.

{
   int cidx = _papi_pe_vector.cmp_info.CmpIdx;
   uint64_t head = mmap_read_head( pe );
   uint64_t old = pe->tail;
   unsigned char *data = ((unsigned char*)pe->mmap_buf) + getpagesize(  );
   int diff;

   diff = head - old;
   if ( diff < 0 ) {
      SUBDBG( "WARNING: failed to keep up with mmap data. head = %" PRIu64
          ",  tail = %" PRIu64 ". Discarding samples.\n", head, old );
      /* head points to a known good entry, start there. */
      old = head;
   }

   for( ; old != head; ) {

      perf_sample_event_t *event = ( perf_sample_event_t * ) 
                               & data[old & pe->mask];
      perf_sample_event_t event_copy;
      size_t size = event->header.size;

      /* Event straddles the mmap boundary -- header should always */
      /* be inside due to u64 alignment of output.                 */
      if ( ( old & pe->mask ) + size != ( ( old + size ) & pe->mask ) ) {
     uint64_t offset = old;
     uint64_t len = min( sizeof ( *event ), size ), cpy;
     void *dst = &event_copy;

     do {
        cpy = min( pe->mask + 1 - ( offset & pe->mask ), len );
        memcpy( dst, &data[offset & pe->mask], cpy );
        offset += cpy;
        dst = ((unsigned char*)dst) + cpy;
        len -= cpy;
     } while ( len );

     event = &event_copy;
      }

      old += size;

      SUBDBG( "event->type = %08x\n", event->header.type );
      SUBDBG( "event->size = %d\n", event->header.size );

      switch ( event->header.type ) {
         case PERF_RECORD_SAMPLE:
          _papi_hwi_dispatch_profile( ( *thr )->running_eventset[cidx],
                ( caddr_t ) ( unsigned long ) event->ip.ip, 
                      0, profile_index );
          break;

     case PERF_RECORD_LOST:
          SUBDBG( "Warning: because of a mmap buffer overrun, %" PRId64
              " events were lost.\n"
              "Loss was recorded when counter id 0x%"PRIx64 
              " overflowed.\n", event->lost.lost, event->lost.id );
          break;

     default:
          SUBDBG( "Error: unexpected header type - %d\n",
                    event->header.type );
          break;
      }
   }

   pe->tail = old;
   mmap_write_tail( pe, old );
}

Here is the call graph for this function:

Here is the caller graph for this function:

static uint64_t mmap_read_head ( pe_event_info_t pe) [static]

Definition at line 782 of file perf_events.c.

{
   struct perf_event_mmap_page *pc = pe->mmap_buf;
   int head;

   if ( pc == NULL ) {
      PAPIERROR( "perf_event_mmap_page is NULL" );
      return 0;
   }

   head = pc->data_head;
   rmb(  );

   return head;
}

Here is the call graph for this function:

Here is the caller graph for this function:

static void mmap_write_tail ( pe_event_info_t pe,
uint64_t  tail 
) [static]

Definition at line 799 of file perf_events.c.

{
   struct perf_event_mmap_page *pc = pe->mmap_buf;

   /* ensure all reads are done before we write the tail out. */
   mb(  );
   pc->data_tail = tail;
}

Here is the caller graph for this function:

static int open_pe_events ( pe_context_t ctx,
pe_control_t ctl 
) [static]

< No error

< Event exists, but cannot be counted due to counter resource limitations

< No error

< No error

< No error

Definition at line 473 of file perf_events.c.

{

   int i, ret = PAPI_OK;

   for( i = 0; i < ctl->num_events; i++ ) {

      ctl->events[i].event_opened=0;

      /* set up the attr structure.  We don't set up all fields here */
      /* as some have already been set up previously.                */

      /* group leader (event 0) is special                */
      /* If we're multiplexed, everyone is a group leader */
      if (( i == 0 ) || (ctl->multiplexed)) {
         ctl->events[i].attr.pinned = !ctl->multiplexed;
     ctl->events[i].attr.disabled = 1;
     ctl->events[i].group_leader_fd=-1;
         ctl->events[i].attr.read_format = get_read_format(ctl->multiplexed, 
                               ctl->inherit, 
                               !ctl->multiplexed );
      } else {
     ctl->events[i].attr.pinned=0;
     ctl->events[i].attr.disabled = 0;
     ctl->events[i].group_leader_fd=ctl->events[0].event_fd;
         ctl->events[i].attr.read_format = get_read_format(ctl->multiplexed, 
                               ctl->inherit, 
                               0 );
      }


      /* try to open */
      ctl->events[i].event_fd = sys_perf_event_open( &ctl->events[i].attr, 
                             ctl->tid,
                             ctl->cpu,
                   ctl->events[i].group_leader_fd,
                             0 /* flags */
                             );
      
      if ( ctl->events[i].event_fd == -1 ) {
     SUBDBG("sys_perf_event_open returned error on event #%d."
        "  Error: %s\n",
        i, strerror( errno ) );
     ret = PAPI_ECNFLCT;
     goto open_pe_cleanup;
      }

      SUBDBG ("sys_perf_event_open: tid: %ld, cpu_num: %d,"
              " group_leader/fd: %d, event_fd: %d,"
              " read_format: 0x%"PRIu64"\n",
          (long)ctl->tid, ctl->cpu, ctl->events[i].group_leader_fd, 
          ctl->events[i].event_fd, ctl->events[i].attr.read_format);


      /* in many situations the kernel will indicate we opened fine */
      /* yet things will fail later.  So we need to double check    */
      /* we actually can use the events we've set up.               */

      /* This is not necessary if we are multiplexing, and in fact */
      /* we cannot do this properly if multiplexed because         */
      /* PERF_EVENT_IOC_RESET does not reset the time running info */
      if (!ctl->multiplexed) {
     ret = check_scheduability( ctx, ctl, i );

         if ( ret != PAPI_OK ) {
        /* the last event did open, so we need to bump the counter */
        /* before doing the cleanup                                */
        i++;
                                  
            goto open_pe_cleanup;
     }
      }
      ctl->events[i].event_opened=1;
   }

   /* Now that we've successfully opened all of the events, do whatever  */
   /* "tune-up" is needed to attach the mmap'd buffers, signal handlers, */
   /* and so on.                                                         */
   for ( i = 0; i < ctl->num_events; i++ ) {

      /* If sampling is enabled, hook up signal handler */
      if ( ctl->events[i].attr.sample_period ) {
     ret = tune_up_fd( ctl, i );
     if ( ret != PAPI_OK ) {
        /* All of the fds are open, so we need to clean up all of them */
        i = ctl->num_events;
        goto open_pe_cleanup;
     }
      } else {
     /* Make sure this is NULL so close_pe_events works right */
     ctl->events[i].mmap_buf = NULL;
      }
   }

   /* Set num_evts only if completely successful */
   ctx->state |= PERF_EVENTS_OPENED;
        
   return PAPI_OK;

open_pe_cleanup:
   /* We encountered an error, close up the fds we successfully opened.  */
   /* We go backward in an attempt to close group leaders last, although */
   /* That's probably not strictly necessary.                            */
   while ( i > 0 ) {
      i--;
      if (ctl->events[i].event_fd>=0) {
     close( ctl->events[i].event_fd );
     ctl->events[i].event_opened=0;
      }
   }

   return ret;
}

Here is the call graph for this function:

Here is the caller graph for this function:

static int pe_vendor_fixups ( void  ) [static]

< Kernel/OS context counted

< Supervisor/hypervisor context counted

< User context counted

< Kernel/OS context counted

< Supervisor/hypervisor context counted

< Kernel/OS context counted

< User context counted

< Kernel/OS context counted

< Supervisor/hypervisor context counted

< User context counted

< Kernel/OS context counted

< Supervisor/hypervisor context counted

< Exception/transient mode (like user TLB misses )

< No error

Definition at line 669 of file perf_events.c.

Here is the caller graph for this function:

static int process_smpl_buf ( int  evt_idx,
ThreadInfo_t **  thr 
) [static]

< No error

< No error

Definition at line 902 of file perf_events.c.

{
   int ret, flags, profile_index;
   unsigned native_index;
   int cidx = _papi_pe_vector.cmp_info.CmpIdx;
   pe_control_t *ctl;

   ret = find_profile_index( ( *thr )->running_eventset[cidx], evt_idx, 
                 &flags, &native_index, &profile_index );
   if ( ret != PAPI_OK ) {
      return ret;
   }

   ctl= (*thr)->running_eventset[cidx]->ctl_state;

   mmap_read( thr, 
          &(ctl->events[evt_idx]),
          profile_index );

   return PAPI_OK;
}

Here is the call graph for this function:

Here is the caller graph for this function:

static int processor_supported ( int  vendor,
int  family 
) [static]

< Not supported

< No error

Definition at line 212 of file perf_events.c.

                                            {

   /* Error out if kernel too early to support p4 */
   if (( vendor == PAPI_VENDOR_INTEL ) && (family == 15)) {   
      if (_papi_os_info.os_version < LINUX_VERSION(2,6,35)) {
     PAPIERROR("Pentium 4 not supported on kernels before 2.6.35");
     return PAPI_ENOSUPP;
      }
   }
   return PAPI_OK;
}

Here is the call graph for this function:

Here is the caller graph for this function:

static int tune_up_fd ( pe_control_t ctl,
int  evt_idx 
) [static]

< A System/C library call failed

< No error

< A System/C library call failed

< A System/C library call failed

< A System/C library call failed

< No error

Definition at line 406 of file perf_events.c.

{
   int ret;
   void *buf_addr;
   int fd = ctl->events[evt_idx].event_fd;

   /* Register that we would like a SIGIO notification when a mmap'd page */
   /* becomes full.                                                       */
   ret = fcntl( fd, F_SETFL, O_ASYNC | O_NONBLOCK );
   if ( ret ) {
      PAPIERROR ( "fcntl(%d, F_SETFL, O_ASYNC | O_NONBLOCK) "
          "returned error: %s", fd, strerror( errno ) );
      return PAPI_ESYS;
   }

   /* Set the F_SETOWN_EX flag on the fd.                          */
   /* This affects which thread an overflow signal gets sent to.   */
   ret=fcntl_setown_fd(fd);
   if (ret!=PAPI_OK) return ret;
       
   /* Set FD_CLOEXEC.  Otherwise if we do an exec with an overflow */
   /* running, the overflow handler will continue into the exec()'d*/
   /* process and kill it because no signal handler is set up.     */
   ret=fcntl(fd, F_SETFD, FD_CLOEXEC);
   if (ret) {
      return PAPI_ESYS;
   }

   /* when you explicitely declare that you want a particular signal,  */
   /* even with you use the default signal, the kernel will send more  */
   /* information concerning the event to the signal handler.          */
   /*                                                                  */
   /* In particular, it will send the file descriptor from which the   */
   /* event is originating which can be quite useful when monitoring   */
   /* multiple tasks from a single thread.                             */
   ret = fcntl( fd, F_SETSIG, _papi_pe_vector.cmp_info.hardware_intr_sig );
   if ( ret == -1 ) {
      PAPIERROR( "cannot fcntl(F_SETSIG,%d) on %d: %s",
         _papi_pe_vector.cmp_info.hardware_intr_sig, fd,
         strerror( errno ) );
      return PAPI_ESYS;
   }

   /* mmap() the sample buffer */
   buf_addr = mmap( NULL, ctl->events[evt_idx].nr_mmap_pages * getpagesize(),
            PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0 );
   if ( buf_addr == MAP_FAILED ) {
      PAPIERROR( "mmap(NULL,%d,%d,%d,%d,0): %s",
         ctl->events[evt_idx].nr_mmap_pages * getpagesize(  ), 
         PROT_READ, MAP_SHARED, fd, strerror( errno ) );
      return ( PAPI_ESYS );
   }

   SUBDBG( "Sample buffer for fd %d is located at %p\n", fd, buf_addr );

   /* Set up the mmap buffer and its associated helpers */
   ctl->events[evt_idx].mmap_buf = (struct perf_counter_mmap_page *) buf_addr;
   ctl->events[evt_idx].tail = 0;
   ctl->events[evt_idx].mask = ( ctl->events[evt_idx].nr_mmap_pages - 1 ) * 
                               getpagesize() - 1;

   return PAPI_OK;
}

Here is the call graph for this function:

Here is the caller graph for this function:


Variable Documentation

Definition at line 103 of file perf_events.c.

int nmi_watchdog_active [static]

Definition at line 100 of file perf_events.c.

 All Data Structures Files Functions Variables Typedefs Enumerations Enumerator Defines