PAPI  5.7.0.0
perf_event.c
Go to the documentation of this file.
1 /*
2 * File: perf_event.c
3 *
4 * Author: Corey Ashford
5 * cjashfor@us.ibm.com
6 * - based upon perfmon.c written by -
7 * Philip Mucci
8 * mucci@cs.utk.edu
9 * Mods: Gary Mohr
10 * gary.mohr@bull.com
11 * Mods: Vince Weaver
12 * vweaver1@eecs.utk.edu
13 * Mods: Philip Mucci
14 * mucci@eecs.utk.edu
15 * Mods: Gary Mohr
16 * gary.mohr@bull.com
17 * Modified the perf_event component to use PFM_OS_PERF_EVENT_EXT mode in libpfm4.
18 * This adds several new event masks, including cpu=, u=, and k= which give the user
19 * the ability to set cpu number to use or control the domain (user, kernel, or both)
20 * in which the counter should be incremented. These are event masks so it is now
21 * possible to have multiple events in the same event set that count activity from
22 * differennt cpu's or count activity in different domains.
23 */
24 
25 
26 #include <fcntl.h>
27 #include <string.h>
28 #include <errno.h>
29 #include <signal.h>
30 #include <syscall.h>
31 #include <sys/utsname.h>
32 #include <sys/mman.h>
33 #include <sys/ioctl.h>
34 
35 /* PAPI-specific includes */
36 #include "papi.h"
37 #include "papi_memory.h"
38 #include "papi_internal.h"
39 #include "papi_vector.h"
40 #include "extras.h"
41 
42 /* libpfm4 includes */
43 #include "papi_libpfm4_events.h"
44 #include "pe_libpfm4_events.h"
45 #include "perfmon/pfmlib.h"
46 #include PEINCLUDE
47 
48 /* Linux-specific includes */
49 #include "mb.h"
50 #include "linux-memory.h"
51 #include "linux-timer.h"
52 #include "linux-common.h"
53 #include "linux-context.h"
54 
55 #include "perf_event_lib.h"
56 #include "perf_helpers.h"
57 
58 /* Set to enable pre-Linux 2.6.34 perf_event workarounds */
59 /* If disabling them gets no complaints then we can remove */
60 /* These in a future version of PAPI. */
61 #define OBSOLETE_WORKAROUNDS 0
62 
63 /* Defines for ctx->state */
64 #define PERF_EVENTS_OPENED 0x01
65 #define PERF_EVENTS_RUNNING 0x02
66 
67 /* Forward declaration */
69 
70 /* Globals */
72 static int our_cidx;
74 
75 /* The kernel developers say to never use a refresh value of 0 */
76 /* See https://lkml.org/lkml/2011/5/24/172 */
77 /* However, on some platforms (like Power) a value of 1 does not work */
78 /* We're still tracking down why this happens. */
79 
80 #if defined(__powerpc__)
81 #define PAPI_REFRESH_VALUE 0
82 #else
83 #define PAPI_REFRESH_VALUE 1
84 #endif
85 
86 static int _pe_set_domain( hwd_control_state_t *ctl, int domain);
87 
88 #if (OBSOLETE_WORKAROUNDS==1)
89 
90 /* Check for processor support */
91 /* Can be used for generic checking, though in general we only */
92 /* check for pentium4 here because support was broken for multiple */
93 /* kernel releases and the usual standard detections did not */
94 /* handle this. So we check for pentium 4 explicitly. */
95 static int
96 processor_supported(int vendor, int family) {
97 
98  /* Error out if kernel too early to support p4 */
99  if (( vendor == PAPI_VENDOR_INTEL ) && (family == 15)) {
100  if (_papi_os_info.os_version < LINUX_VERSION(2,6,35)) {
101  PAPIERROR("Pentium 4 not supported on kernels before 2.6.35");
102  return PAPI_ENOSUPP;
103  }
104  }
105  return PAPI_OK;
106 }
107 
108 #endif
109 
110 /* Fix up the config based on what CPU/Vendor we are running on */
111 static int
113 {
114  /* powerpc */
115  /* On IBM and Power6 Machines default domain should include supervisor */
117  vector->cmp_info.available_domains |=
119  if (strcmp(_papi_hwi_system_info.hw_info.model_string, "POWER6" ) == 0 ) {
120  vector->cmp_info.default_domain =
122  }
123  }
124 
127  }
128 
131  vector->cmp_info.fast_real_timer = 1;
132  }
133 
134  /* ARM */
136 
137  /* Some ARMv7 and earlier could not measure */
138  /* KERNEL and USER separately. */
139 
140  /* Whitelist CortexA7 and CortexA15 */
141  /* There might be more */
142 
146 
147  vector->cmp_info.available_domains |=
149  vector->cmp_info.default_domain =
151  }
152  }
153 
154  /* CRAY */
157  }
158 
159  return PAPI_OK;
160 }
161 
162 
163 
164 /******************************************************************/
165 /******** Kernel Version Dependent Routines **********************/
166 /******************************************************************/
167 
168 
169 /* PERF_FORMAT_GROUP allows reading an entire group's counts at once */
170 /* before 2.6.34 PERF_FORMAT_GROUP did not work when reading results */
171 /* from attached processes. We are lazy and disable it for all cases */
172 /* commit was: 050735b08ca8a016bbace4445fa025b88fee770b */
173 
174 static int
176 
177 
178 #if (OBSOLETE_WORKAROUNDS==1)
179  if (_papi_os_info.os_version < LINUX_VERSION(2,6,34)) return 1;
180 #endif
181 
182  /* MIPS, as of version 3.1, does not support this properly */
183  /* FIXME: is this still true? */
184 
185 #if defined(__mips__)
186  return 1;
187 #endif
188 
189  return 0;
190 
191 }
192 
193 #if (OBSOLETE_WORKAROUNDS==1)
194 
195 
196 /* There's a bug prior to Linux 2.6.33 where if you are using */
197 /* PERF_FORMAT_GROUP, the TOTAL_TIME_ENABLED and */
198 /* TOTAL_TIME_RUNNING fields will be zero unless you disable */
199 /* the counters first */
200 static int
201 bug_sync_read(void) {
202 
203  if (_papi_os_info.os_version < LINUX_VERSION(2,6,33)) return 1;
204 
205  return 0;
206 
207 }
208 
209 #endif
210 
211 /* Set the F_SETOWN_EX flag on the fd. */
212 /* This affects which thread an overflow signal gets sent to */
213 /* Handled in a subroutine to handle the fact that the behavior */
214 /* is dependent on kernel version. */
215 static int
217 
218  int ret;
219  struct f_owner_ex fown_ex;
220 
221  /* F_SETOWN_EX is not available until 2.6.32 */
222  /* but PAPI perf_event support didn't work on 2.6.31 anyay */
223 
224  /* set ownership of the descriptor */
225  fown_ex.type = F_OWNER_TID;
226  fown_ex.pid = mygettid();
227  ret = fcntl(fd, F_SETOWN_EX, (unsigned long)&fown_ex );
228 
229  if ( ret == -1 ) {
230  PAPIERROR( "cannot fcntl(F_SETOWN_EX) on %d: %s",
231  fd, strerror( errno ) );
232  return PAPI_ESYS;
233  }
234  return PAPI_OK;
235 }
236 
237 /* The read format on perf_event varies based on various flags that */
238 /* are passed into it. This helper avoids copying this logic */
239 /* multiple places. */
240 static unsigned int
242  unsigned int inherit,
243  int format_group )
244 {
245  unsigned int format = 0;
246 
247  /* if we need read format options for multiplexing, add them now */
248  if (multiplex) {
249  format |= PERF_FORMAT_TOTAL_TIME_ENABLED;
250  format |= PERF_FORMAT_TOTAL_TIME_RUNNING;
251  }
252 
253  /* if our kernel supports it and we are not using inherit, */
254  /* add the group read options */
255  if ( (!bug_format_group()) && !inherit) {
256  if (format_group) {
257  format |= PERF_FORMAT_GROUP;
258  }
259  }
260 
261  SUBDBG("multiplex: %d, inherit: %d, group_leader: %d, format: %#x\n",
262  multiplex, inherit, format_group, format);
263 
264  return format;
265 }
266 
267 
268 /* attr.exclude_guest is enabled by default in recent libpfm4 */
269 /* however older kernels will reject events with it set */
270 /* because the reserved field is not all zeros */
271 static int
273 {
274  int ev_fd;
275  struct perf_event_attr attr;
276 
278 
279  /* First check that we can open a plain instructions event */
280  memset(&attr, 0 , sizeof(attr));
281  attr.config = PERF_COUNT_HW_INSTRUCTIONS;
282 
283  ev_fd = sys_perf_event_open( &attr, 0, -1, -1, 0 );
284  if ( ev_fd == -1 ) {
285  PAPIERROR("Couldn't open hw_instructions in exclude_guest=0 test");
286  return -1;
287  }
288  close(ev_fd);
289 
290  /* Now try again with excude_guest */
291  memset(&attr, 0 , sizeof(attr));
292  attr.config = PERF_COUNT_HW_INSTRUCTIONS;
293  attr.exclude_guest=1;
294 
295  ev_fd = sys_perf_event_open( &attr, 0, -1, -1, 0 );
296  if ( ev_fd == -1 ) {
297  if (errno==EINVAL) {
299  }
300  else {
301  PAPIERROR("Couldn't open hw_instructions in exclude_guest=1 test");
302  }
303  } else {
305  close(ev_fd);
306  }
307 
308  return PAPI_OK;
309 }
310 
311 /*****************************************************************/
312 /********* End Kernel-version Dependent Routines ****************/
313 /*****************************************************************/
314 
315 /*****************************************************************/
316 /********* Begin perf_event low-level code ***********************/
317 /*****************************************************************/
318 
319 static void perf_event_dump_attr( struct perf_event_attr *hw_event,
320  pid_t pid, int cpu, int group_fd, unsigned long int flags) {
321 
322  /* Mark parameters as not used */
323  /* In the common case (no SUBDBG) the function */
324  /* compiles into an empty function and complains */
325  /* about unused variables. */
326  (void)hw_event;
327  (void)pid;
328  (void)cpu;
329  (void)group_fd;
330  (void)flags;
331 
332  SUBDBG("sys_perf_event_open(hw_event: %p, pid: %d, cpu: %d, "
333  "group_fd: %d, flags: %lx\n",
334  hw_event, pid, cpu, group_fd, flags);
335  SUBDBG(" type: %d\n",hw_event->type);
336  SUBDBG(" size: %d\n",hw_event->size);
337  SUBDBG(" config: %"PRIx64" (%"PRIu64")\n",
338  hw_event->config, hw_event->config);
339  SUBDBG(" sample_period: %"PRIu64"\n",hw_event->sample_period);
340  SUBDBG(" sample_type: %"PRIu64"\n",hw_event->sample_type);
341  SUBDBG(" read_format: %"PRIu64"\n",hw_event->read_format);
342  SUBDBG(" disabled: %d\n",hw_event->disabled);
343  SUBDBG(" inherit: %d\n",hw_event->inherit);
344  SUBDBG(" pinned: %d\n",hw_event->pinned);
345  SUBDBG(" exclusive: %d\n",hw_event->exclusive);
346  SUBDBG(" exclude_user: %d\n",hw_event->exclude_user);
347  SUBDBG(" exclude_kernel: %d\n",hw_event->exclude_kernel);
348  SUBDBG(" exclude_hv: %d\n",hw_event->exclude_hv);
349  SUBDBG(" exclude_idle: %d\n",hw_event->exclude_idle);
350  SUBDBG(" mmap: %d\n",hw_event->mmap);
351  SUBDBG(" comm: %d\n",hw_event->comm);
352  SUBDBG(" freq: %d\n",hw_event->freq);
353  SUBDBG(" inherit_stat: %d\n",hw_event->inherit_stat);
354  SUBDBG(" enable_on_exec: %d\n",hw_event->enable_on_exec);
355  SUBDBG(" task: %d\n",hw_event->task);
356  SUBDBG(" watermark: %d\n",hw_event->watermark);
357  SUBDBG(" precise_ip: %d\n",hw_event->precise_ip);
358  SUBDBG(" mmap_data: %d\n",hw_event->mmap_data);
359  SUBDBG(" sample_id_all: %d\n",hw_event->sample_id_all);
360  SUBDBG(" exclude_host: %d\n",hw_event->exclude_host);
361  SUBDBG(" exclude_guest: %d\n",hw_event->exclude_guest);
362  SUBDBG(" exclude_callchain_kernel: %d\n",
363  hw_event->exclude_callchain_kernel);
364  SUBDBG(" exclude_callchain_user: %d\n",
365  hw_event->exclude_callchain_user);
366  SUBDBG(" wakeup_events: %"PRIx32" (%"PRIu32")\n",
367  hw_event->wakeup_events, hw_event->wakeup_events);
368  SUBDBG(" bp_type: %"PRIx32" (%"PRIu32")\n",
369  hw_event->bp_type, hw_event->bp_type);
370  SUBDBG(" config1: %"PRIx64" (%"PRIu64")\n",
371  hw_event->config1, hw_event->config1);
372  SUBDBG(" config2: %"PRIx64" (%"PRIu64")\n",
373  hw_event->config2, hw_event->config2);
374  SUBDBG(" branch_sample_type: %"PRIx64" (%"PRIu64")\n",
375  hw_event->branch_sample_type, hw_event->branch_sample_type);
376  SUBDBG(" sample_regs_user: %"PRIx64" (%"PRIu64")\n",
377  hw_event->sample_regs_user, hw_event->sample_regs_user);
378  SUBDBG(" sample_stack_user: %"PRIx32" (%"PRIu32")\n",
379  hw_event->sample_stack_user, hw_event->sample_stack_user);
380 }
381 
382 
383 static int map_perf_event_errors_to_papi(int perf_event_error) {
384 
385  int ret;
386 
387  /* These mappings are approximate.
388  EINVAL in particular can mean lots of different things */
389  switch(perf_event_error) {
390  case EPERM:
391  case EACCES:
392  ret = PAPI_EPERM;
393  break;
394  case ENODEV:
395  case EOPNOTSUPP:
396  ret = PAPI_ENOSUPP;
397  break;
398  case ENOENT:
399  ret = PAPI_ENOEVNT;
400  break;
401  case ESRCH: /* If cannnot find process to attach to */
402  case ENOSYS:
403  case EAGAIN:
404  case EBUSY:
405  case E2BIG: /* Only happens if attr is the wrong size somehow */
406  case EBADF: /* We are attempting to group with an invalid file descriptor */
407  ret = PAPI_ESYS;
408  break;
409  case ENOMEM:
410  ret = PAPI_ENOMEM;
411  break;
412  case EMFILE: /* Out of file descriptors. Typically max out at 1024 */
413  ret = PAPI_ECOUNT;
414  break;
415  case EINVAL:
416  default:
417  ret = PAPI_EINVAL;
418  break;
419  }
420  return ret;
421 }
422 
423 
425 /* perf_events. */
426 /* We do this by temporarily opening an event with the */
427 /* desired options then closing it again. We use the */
428 /* PERF_COUNT_HW_INSTRUCTION event as a dummy event */
429 /* on the assumption it is available on all */
430 /* platforms. */
431 
432 static int
433 check_permissions( unsigned long tid,
434  unsigned int cpu_num,
435  unsigned int domain,
436  unsigned int granularity,
437  unsigned int multiplex,
438  unsigned int inherit )
439 {
440  int ev_fd;
441  struct perf_event_attr attr;
442 
443  long pid;
444 
445  /* clearing this will set a type of hardware and to count all domains */
446  memset(&attr, '\0', sizeof(attr));
447  attr.read_format = get_read_format(multiplex, inherit, 1);
448 
449  /* set the event id (config field) to instructios */
450  /* (an event that should always exist) */
451  /* This was cycles but that is missing on Niagara */
452  attr.config = PERF_COUNT_HW_INSTRUCTIONS;
453 
454  /* now set up domains this event set will be counting */
455  if (!(domain & PAPI_DOM_SUPERVISOR)) {
456  attr.exclude_hv = 1;
457  }
458  if (!(domain & PAPI_DOM_USER)) {
459  attr.exclude_user = 1;
460  }
461  if (!(domain & PAPI_DOM_KERNEL)) {
462  attr.exclude_kernel = 1;
463  }
464 
465  if (granularity==PAPI_GRN_SYS) {
466  pid = -1;
467  } else {
468  pid = tid;
469  }
470 
471  SUBDBG("Calling sys_perf_event_open() from check_permissions\n");
472 
473  perf_event_dump_attr( &attr, pid, cpu_num, -1, 0 );
474 
475  ev_fd = sys_perf_event_open( &attr, pid, cpu_num, -1, 0 );
476  if ( ev_fd == -1 ) {
477  SUBDBG("sys_perf_event_open returned error. Linux says, %s",
478  strerror( errno ) );
480  }
481 
482  /* now close it, this was just to make sure we have permissions */
483  /* to set these options */
484  close(ev_fd);
485  return PAPI_OK;
486 }
487 
488 /* Maximum size we ever expect to read from a perf_event fd */
489 /* (this is the number of 64-bit values) */
490 /* We use this to size the read buffers */
491 /* The three is for event count, time_enabled, time_running */
492 /* and the counter term is count value and count id for each */
493 /* possible counter value. */
494 #define READ_BUFFER_SIZE (3 + (2 * PERF_EVENT_MAX_MPX_COUNTERS))
495 
496 
497 
498 /* KERNEL_CHECKS_SCHEDUABILITY_UPON_OPEN is a work-around for kernel arch */
499 /* implementations (e.g. x86 before 2.6.33) which don't do a static event */
500 /* scheduability check in sys_perf_event_open. It is also needed if the */
501 /* kernel is stealing an event, such as when NMI watchdog is enabled. */
502 
503 static int
505 {
506  int retval = 0, cnt = -1;
507  ( void ) ctx; /*unused */
508  long long papi_pe_buffer[READ_BUFFER_SIZE];
509  int i,group_leader_fd;
510 
511  /* If the kernel isn't tracking scheduability right */
512  /* Then we need to start/stop/read to force the event */
513  /* to be scheduled and see if an error condition happens. */
514 
515  /* get the proper fd to start */
516  group_leader_fd=ctl->events[idx].group_leader_fd;
517  if (group_leader_fd==-1) group_leader_fd=ctl->events[idx].event_fd;
518 
519  /* start the event */
520  retval = ioctl( group_leader_fd, PERF_EVENT_IOC_ENABLE, NULL );
521  if (retval == -1) {
522  PAPIERROR("ioctl(PERF_EVENT_IOC_ENABLE) failed");
523  return PAPI_ESYS;
524  }
525 
526  /* stop the event */
527  retval = ioctl(group_leader_fd, PERF_EVENT_IOC_DISABLE, NULL );
528  if (retval == -1) {
529  PAPIERROR( "ioctl(PERF_EVENT_IOC_DISABLE) failed" );
530  return PAPI_ESYS;
531  }
532 
533  /* See if a read returns any results */
534  cnt = read( group_leader_fd, papi_pe_buffer, sizeof(papi_pe_buffer));
535  if ( cnt == -1 ) {
536  SUBDBG( "read returned an error! Should never happen.\n" );
537  return PAPI_ESYS;
538  }
539 
540  if ( cnt == 0 ) {
541  /* We read 0 bytes if we could not schedule the event */
542  /* The kernel should have detected this at open */
543  /* but various bugs (including NMI watchdog) */
544  /* result in this behavior */
545 
546  return PAPI_ECNFLCT;
547 
548  } else {
549 
550  /* Reset all of the counters (opened so far) back to zero */
551  /* from the above brief enable/disable call pair. */
552 
553  /* We have to reset all events because reset of group leader */
554  /* does not reset all. */
555  /* we assume that the events are being added one by one and that */
556  /* we do not need to reset higher events (doing so may reset ones */
557  /* that have not been initialized yet. */
558 
559  /* Note... PERF_EVENT_IOC_RESET does not reset time running */
560  /* info if multiplexing, so we should avoid coming here if */
561  /* we are multiplexing the event. */
562  for( i = 0; i < idx; i++) {
563  retval=ioctl( ctl->events[i].event_fd, PERF_EVENT_IOC_RESET, NULL );
564  if (retval == -1) {
565  PAPIERROR( "ioctl(PERF_EVENT_IOC_RESET) #%d/%d %d "
566  "(fd %d)failed",
567  i,ctl->num_events,idx,ctl->events[i].event_fd);
568  return PAPI_ESYS;
569  }
570  }
571  }
572  return PAPI_OK;
573 }
574 
575 
576 /* Do some extra work on a perf_event fd if we're doing sampling */
577 /* This mostly means setting up the mmap buffer. */
578 static int
580 {
581  int ret;
582  int fd = ctl->events[evt_idx].event_fd;
583 
584  /* Register that we would like a SIGIO notification when a mmap'd page */
585  /* becomes full. */
586  ret = fcntl( fd, F_SETFL, O_ASYNC | O_NONBLOCK );
587  if ( ret ) {
588  PAPIERROR ( "fcntl(%d, F_SETFL, O_ASYNC | O_NONBLOCK) "
589  "returned error: %s", fd, strerror( errno ) );
590  return PAPI_ESYS;
591  }
592 
593  /* Set the F_SETOWN_EX flag on the fd. */
594  /* This affects which thread an overflow signal gets sent to. */
596  if (ret!=PAPI_OK) return ret;
597 
598  /* Set FD_CLOEXEC. Otherwise if we do an exec with an overflow */
599  /* running, the overflow handler will continue into the exec()'d*/
600  /* process and kill it because no signal handler is set up. */
601  ret=fcntl(fd, F_SETFD, FD_CLOEXEC);
602  if (ret) {
603  return PAPI_ESYS;
604  }
605 
606  /* when you explicitely declare that you want a particular signal, */
607  /* even with you use the default signal, the kernel will send more */
608  /* information concerning the event to the signal handler. */
609  /* */
610  /* In particular, it will send the file descriptor from which the */
611  /* event is originating which can be quite useful when monitoring */
612  /* multiple tasks from a single thread. */
613  ret = fcntl( fd, F_SETSIG, ctl->overflow_signal );
614  if ( ret == -1 ) {
615  PAPIERROR( "cannot fcntl(F_SETSIG,%d) on %d: %s",
616  ctl->overflow_signal, fd,
617  strerror( errno ) );
618  return PAPI_ESYS;
619  }
620 
621  return PAPI_OK;
622 }
623 
624 static int
625 set_up_mmap( pe_control_t *ctl, int evt_idx)
626 {
627 
628  void *buf_addr;
629  int fd = ctl->events[evt_idx].event_fd;
630 
631  /* mmap() the sample buffer */
632  buf_addr = mmap( NULL,
633  ctl->events[evt_idx].nr_mmap_pages * getpagesize(),
634  PROT_READ | PROT_WRITE,
635  MAP_SHARED,
636  fd, 0 );
637 
638  /* This may happen if we go over the limit in */
639  /* /proc/sys/kernel/perf_event_mlock_kb */
640  /* which defaults to 516k */
641  /* with regular rdpmc events on 4k page archs */
642  /* this is roughly 128 events */
643 
644  /* We sholdn't fail, just fall back to non-rdpmc */
645  /* Although not sure what happens if it's a sample */
646  /* event that fails to mmap. */
647 
648  if ( buf_addr == MAP_FAILED ) {
649  SUBDBG( "mmap(NULL,%d,%d,%d,%d,0): %s",
650  ctl->events[evt_idx].nr_mmap_pages * getpagesize(),
651  PROT_READ | PROT_WRITE,
652  MAP_SHARED,
653  fd, strerror( errno ) );
654 
655  ctl->events[evt_idx].mmap_buf = NULL;
656 
657  /* Easier to just globally disable this, as it should */
658  /* be a fairly uncommon case hopefully. */
660  PAPIERROR("Can't mmap, disabling fast_counter_read\n");
662  }
663  return PAPI_ESYS;
664  }
665 
666  SUBDBG( "Sample buffer for fd %d is located at %p\n", fd, buf_addr );
667 
668  /* Set up the mmap buffer and its associated helpers */
669  ctl->events[evt_idx].mmap_buf = (struct perf_counter_mmap_page *) buf_addr;
670  ctl->events[evt_idx].tail = 0;
671  ctl->events[evt_idx].mask =
672  ( ctl->events[evt_idx].nr_mmap_pages - 1 ) * getpagesize() - 1;
673 
674  return PAPI_OK;
675 }
676 
677 
678 
679 /* Open all events in the control state */
680 static int
682 {
683 
684  int i, ret = PAPI_OK;
685  long pid;
686 
687 
688  /* Set the pid setting */
689  /* If attached, this is the pid of process we are attached to. */
690  /* If GRN_THRD then it is 0 meaning current process only */
691  /* If GRN_SYS then it is -1 meaning all procs on this CPU */
692  /* Note if GRN_SYS then CPU must be specified, not -1 */
693 
694  if (ctl->attached) {
695  pid = ctl->tid;
696  }
697  else {
698  if (ctl->granularity==PAPI_GRN_SYS) {
699  pid = -1;
700  }
701  else {
702  pid = 0;
703  }
704  }
705 
706  for( i = 0; i < ctl->num_events; i++ ) {
707 
708  ctl->events[i].event_opened=0;
709 
710  /* set up the attr structure. */
711  /* We don't set up all fields here */
712  /* as some have already been set up previously. */
713 
714  /* Handle the broken exclude_guest problem */
715  /* libpfm4 sets this by default (PEBS events depend on it) */
716  /* but on older kernels that dont know about exclude_guest */
717  /* perf_event_open() will error out as a "reserved" */
718  /* unknown bit is set to 1. */
719  /* Do we need to also watch for exclude_host, exclude_idle */
720  /* exclude_callchain*? */
721  if ((ctl->events[i].attr.exclude_guest) &&
723  SUBDBG("Disabling exclude_guest in event %d\n",i);
724  ctl->events[i].attr.exclude_guest=0;
725  }
726 
727  /* group leader (event 0) is special */
728  /* If we're multiplexed, everyone is a group leader */
729  if (( i == 0 ) || (ctl->multiplexed)) {
730  ctl->events[i].attr.pinned = !ctl->multiplexed;
731  ctl->events[i].attr.disabled = 1;
732  ctl->events[i].group_leader_fd=-1;
733  ctl->events[i].attr.read_format = get_read_format(
734  ctl->multiplexed,
735  ctl->inherit,
736  !ctl->multiplexed );
737  } else {
738  ctl->events[i].attr.pinned=0;
739  ctl->events[i].attr.disabled = 0;
740  ctl->events[i].group_leader_fd=ctl->events[0].event_fd;
741  ctl->events[i].attr.read_format = get_read_format(
742  ctl->multiplexed,
743  ctl->inherit,
744  0 );
745  }
746 
747  /* try to open */
749  &ctl->events[i].attr,
750  pid,
751  ctl->events[i].cpu,
752  ctl->events[i].group_leader_fd,
753  0 /* flags */ );
754 
756  &ctl->events[i].attr,
757  pid,
758  ctl->events[i].cpu,
759  ctl->events[i].group_leader_fd,
760  0 /* flags */ );
761 
762  /* Try to match Linux errors to PAPI errors */
763  if ( ctl->events[i].event_fd == -1 ) {
764  SUBDBG("sys_perf_event_open returned error "
765  "on event #%d. Error: %s\n",
766  i, strerror( errno ) );
768 
769  goto open_pe_cleanup;
770  }
771 
772  SUBDBG ("sys_perf_event_open: tid: %ld, cpu_num: %d,"
773  " group_leader/fd: %d, event_fd: %d,"
774  " read_format: %"PRIu64"\n",
775  pid, ctl->events[i].cpu,
776  ctl->events[i].group_leader_fd,
777  ctl->events[i].event_fd,
778  ctl->events[i].attr.read_format);
779 
780 
781  /* in many situations the kernel will indicate we opened fine */
782  /* yet things will fail later. So we need to double check */
783  /* we actually can use the events we've set up. */
784 
785  /* This is not necessary if we are multiplexing, and in fact */
786  /* we cannot do this properly if multiplexed because */
787  /* PERF_EVENT_IOC_RESET does not reset the time running info */
788  if (!ctl->multiplexed) {
789  ret = check_scheduability( ctx, ctl, i );
790 
791  if ( ret != PAPI_OK ) {
792  /* the last event did open, so we need to */
793  /* bump the counter before doing the cleanup */
794  i++;
795  goto open_pe_cleanup;
796  }
797  }
798  ctl->events[i].event_opened=1;
799  }
800 
801  /* Now that we've successfully opened all of the events, do whatever */
802  /* "tune-up" is needed to attach the mmap'd buffers, signal handlers, */
803  /* and so on. */
804 
805 
806  /* Make things easier and give each event a mmap() buffer */
807  /* Keeping separate tracking for rdpmc vs regular events */
808  /* Would be a pain. Also perf always gives every event a */
809  /* mmap buffer. */
810 
811  for ( i = 0; i < ctl->num_events; i++ ) {
812 
813  /* Can't mmap() inherited events :( */
814  if (ctl->inherit) {
815  ctl->events[i].nr_mmap_pages = 0;
816  ctl->events[i].mmap_buf = NULL;
817  }
818  else {
819  /* Just a guess at how many pages would make this */
820  /* relatively efficient. */
821  /* Note that it's "1 +" because of the need for a */
822  /* control page, and the number following the "+" */
823  /* must be a power of 2 (1, 4, 8, 16, etc) or zero. */
824  /* This is required to optimize dealing with */
825  /* circular buffer wrapping of the mapped pages. */
826  if (ctl->events[i].sampling) {
827  ctl->events[i].nr_mmap_pages = 1 + 2;
828  }
830  ctl->events[i].nr_mmap_pages = 1;
831  }
832  else {
833  ctl->events[i].nr_mmap_pages = 0;
834  }
835 
836  /* Set up the MMAP sample pages */
837  if (ctl->events[i].nr_mmap_pages) {
838  set_up_mmap(ctl,i);
839  } else {
840  ctl->events[i].mmap_buf = NULL;
841  }
842  }
843  }
844 
845  for ( i = 0; i < ctl->num_events; i++ ) {
846 
847  /* If sampling is enabled, hook up signal handler */
848  if (ctl->events[i].attr.sample_period) {
849 
850  ret = configure_fd_for_sampling( ctl, i );
851  if ( ret != PAPI_OK ) {
852  /* We failed, and all of the fds are open */
853  /* so we need to clean up all of them */
854  i = ctl->num_events;
855  goto open_pe_cleanup;
856  }
857  }
858  }
859 
860  /* Set num_evts only if completely successful */
861  ctx->state |= PERF_EVENTS_OPENED;
862 
863  return PAPI_OK;
864 
865 open_pe_cleanup:
866  /* We encountered an error, close up the fds we successfully opened. */
867  /* We go backward in an attempt to close group leaders last, although */
868  /* That's probably not strictly necessary. */
869  while ( i > 0 ) {
870  i--;
871  if (ctl->events[i].event_fd>=0) {
872  close( ctl->events[i].event_fd );
873  ctl->events[i].event_opened=0;
874  }
875  }
876 
877  return ret;
878 }
879 
880 /* TODO: make code clearer -- vmw */
881 static int
883 {
884  int munmap_error=0,close_error=0;
885 
886  if ( event->mmap_buf ) {
887  if (event->nr_mmap_pages==0) {
888  PAPIERROR("munmap and num pages is zero");
889  }
890  if ( munmap ( event->mmap_buf,
891  event->nr_mmap_pages * getpagesize() ) ) {
892  PAPIERROR( "munmap of fd = %d returned error: %s",
893  event->event_fd,
894  strerror( errno ) );
895  event->mmap_buf=NULL;
896  munmap_error=1;
897  }
898  }
899  if ( close( event->event_fd ) ) {
900  PAPIERROR( "close of fd = %d returned error: %s",
901  event->event_fd, strerror( errno ) );
902  close_error=1;
903  }
904 
905  event->event_opened=0;
906 
907  if ((close_error || munmap_error)) {
908  return PAPI_ESYS;
909  }
910 
911  return 0;
912 }
913 
914 /* Close all of the opened events */
915 static int
917 {
918  int i,result;
919  int num_closed=0;
920  int events_not_opened=0;
921 
922  /* should this be a more serious error? */
923  if ( ctx->state & PERF_EVENTS_RUNNING ) {
924  SUBDBG("Closing without stopping first\n");
925  }
926 
927  /* Close child events first */
928  /* Is that necessary? -- vmw */
929  for( i=0; i<ctl->num_events; i++ ) {
930  if (ctl->events[i].event_opened) {
931  if (ctl->events[i].group_leader_fd!=-1) {
932  result=close_event(&ctl->events[i]);
933  if (result!=0) return result;
934  else num_closed++;
935  }
936  }
937  else {
938  events_not_opened++;
939  }
940  }
941 
942  /* Close the group leaders last */
943  for( i=0; i<ctl->num_events; i++ ) {
944  if (ctl->events[i].event_opened) {
945  if (ctl->events[i].group_leader_fd==-1) {
946  result=close_event(&ctl->events[i]);
947  if (result!=0) return result;
948  else num_closed++;
949  }
950  }
951  }
952 
953  if (ctl->num_events!=num_closed) {
954  if (ctl->num_events!=(num_closed+events_not_opened)) {
955  PAPIERROR("Didn't close all events: "
956  "Closed %d Not Opened: %d Expected %d",
957  num_closed,events_not_opened,ctl->num_events);
958  return PAPI_EBUG;
959  }
960  }
961 
962  ctl->num_events=0;
963 
964  ctx->state &= ~PERF_EVENTS_OPENED;
965 
966  return PAPI_OK;
967 }
968 
969 
970 /********************************************************************/
971 /********************************************************************/
972 /* Functions that are exported via the component interface */
973 /********************************************************************/
974 /********************************************************************/
975 
976 /********************* DOMAIN RELATED *******************************/
977 
978 
979 /* set the domain. */
980 /* perf_events allows per-event control of this, */
981 /* papi allows it to be set at the event level or at the event set level. */
982 /* this will set the event set level domain values */
983 /* but they only get used if no event level domain mask (u= or k=) */
984 /* was specified. */
985 static int
987 {
988  pe_control_t *pe_ctl = ( pe_control_t *) ctl;
989 
990  SUBDBG("old control domain %d, new domain %d\n", pe_ctl->domain,domain);
991  pe_ctl->domain = domain;
992  return PAPI_OK;
993 }
994 
995 
996 /********************* THREAD RELATED *******************************/
997 
998 
999 /* Shutdown a thread */
1000 static int
1002 {
1003  pe_context_t *pe_ctx = ( pe_context_t *) ctx;
1004 
1005  pe_ctx->initialized=0;
1006 
1007  return PAPI_OK;
1008 }
1009 
1010 /* Initialize a thread */
1011 static int
1013 {
1014 
1015  pe_context_t *pe_ctx = ( pe_context_t *) hwd_ctx;
1016 
1017  /* clear the context structure and mark as initialized */
1018  memset( pe_ctx, 0, sizeof ( pe_context_t ) );
1019  pe_ctx->initialized=1;
1021  pe_ctx->cidx=our_cidx;
1022 
1023  return PAPI_OK;
1024 }
1025 
1026 
1027 
1028 /**************************** COUNTER RELATED *******************/
1029 
1030 
1031 /* reset the hardware counters */
1032 /* Note: PAPI_reset() does not necessarily call this */
1033 /* unless the events are actually running. */
1034 static int
1036 {
1037  int i, ret;
1038  pe_control_t *pe_ctl = ( pe_control_t *) ctl;
1039 
1040  ( void ) ctx; /*unused */
1041 
1042  /* We need to reset all of the events, not just the group leaders */
1043  for( i = 0; i < pe_ctl->num_events; i++ ) {
1044  ret = ioctl( pe_ctl->events[i].event_fd,
1045  PERF_EVENT_IOC_RESET, NULL );
1046  if ( ret == -1 ) {
1047  PAPIERROR("ioctl(%d, PERF_EVENT_IOC_RESET, NULL) "
1048  "returned error, Linux says: %s",
1049  pe_ctl->events[i].event_fd,
1050  strerror( errno ) );
1051  return PAPI_ESYS;
1052  }
1053  }
1054 
1055  return PAPI_OK;
1056 }
1057 
1058 
1059 /* write (set) the hardware counters */
1060 /* Currently we do not support this. */
1061 static int
1063  long long *from )
1064 {
1065  ( void ) ctx; /*unused */
1066  ( void ) ctl; /*unused */
1067  ( void ) from; /*unused */
1068  /*
1069  * Counters cannot be written. Do we need to virtualize the
1070  * counters so that they can be written, or perhaps modify code so that
1071  * they can be written? FIXME ?
1072  */
1073 
1074  return PAPI_ENOSUPP;
1075 }
1076 
1077 /*
1078  * perf_event provides a complicated read interface.
1079  * the info returned by read() varies depending on whether
1080  * you have PERF_FORMAT_GROUP, PERF_FORMAT_TOTAL_TIME_ENABLED,
1081  * PERF_FORMAT_TOTAL_TIME_RUNNING, or PERF_FORMAT_ID set
1082  *
1083  * To simplify things we just always ask for everything. This might
1084  * lead to overhead when reading more than we need, but it makes the
1085  * read code a lot simpler than the original implementation we had here.
1086  *
1087  * For more info on the layout see include/uapi/linux/perf_event.h
1088  *
1089  */
1090 
1091 
1092 /* When we read with rdpmc, we must read each counter individually */
1093 /* Because of this we don't need separate multiplexing support */
1094 /* This is all handled by mmap_read_self() */
1095 static int
1097  long long **events, int flags )
1098 {
1099  long long papi_pe_buffer[READ_BUFFER_SIZE];
1100 
1101  SUBDBG("ENTER: ctx: %p, ctl: %p, events: %p, flags: %#x\n",
1102  ctx, ctl, events, flags);
1103 
1104  ( void ) flags; /*unused */
1105  ( void ) ctx; /*unused */
1106  ( void ) papi_pe_buffer; /*unused */
1107  int i;
1108  pe_control_t *pe_ctl = ( pe_control_t *) ctl;
1109  unsigned long long count, enabled = 0, running = 0, adjusted;
1110  int errors=0;
1111 
1112  /* we must read each counter individually */
1113  for ( i = 0; i < pe_ctl->num_events; i++ ) {
1114 
1115  count = mmap_read_self(pe_ctl->events[i].mmap_buf,
1116  &enabled,&running);
1117 
1118  if (count==0xffffffffffffffffULL) {
1119  errors++;
1120  }
1121 
1122  /* Handle multiplexing case */
1123  if (enabled == running) {
1124  /* no adjustment needed */
1125  }
1126  else if (enabled && running) {
1127  adjusted = (enabled * 128LL) / running;
1128  adjusted = adjusted * count;
1129  adjusted = adjusted / 128LL;
1130  count = adjusted;
1131  } else {
1132  /* This should not happen, but we have had it reported */
1133  SUBDBG("perf_event kernel bug(?) count, enabled, "
1134  "running: %lld, %lld, %lld\n",
1135  papi_pe_buffer[0],enabled,running);
1136 
1137  }
1138 
1139  pe_ctl->counts[i] = count;
1140  }
1141  /* point PAPI to the values we read */
1142  *events = pe_ctl->counts;
1143 
1144  SUBDBG("EXIT: *events: %p\n", *events);
1145 
1146  if (errors) return PAPI_ESYS;
1147 
1148  return PAPI_OK;
1149 }
1150 
1151 
1152 static int
1154 {
1155  int i,ret=-1;
1156  long long papi_pe_buffer[READ_BUFFER_SIZE];
1157  long long tot_time_running, tot_time_enabled, scale;
1158 
1159  /* perf_event does not support FORMAT_GROUP on multiplex */
1160  /* so we have to handle separate events when multiplexing */
1161 
1162  for ( i = 0; i < pe_ctl->num_events; i++ ) {
1163 
1164  ret = read( pe_ctl->events[i].event_fd,
1165  papi_pe_buffer,
1166  sizeof ( papi_pe_buffer ) );
1167  if ( ret == -1 ) {
1168  PAPIERROR("read returned an error: ",
1169  strerror( errno ));
1170  return PAPI_ESYS;
1171  }
1172 
1173  /* We should read 3 64-bit values from the counter */
1174  if (ret<(signed)(3*sizeof(long long))) {
1175  PAPIERROR("Error! short read");
1176  return PAPI_ESYS;
1177  }
1178 
1179  SUBDBG("read: fd: %2d, tid: %ld, cpu: %d, ret: %d\n",
1180  pe_ctl->events[i].event_fd,
1181  (long)pe_ctl->tid, pe_ctl->events[i].cpu, ret);
1182  SUBDBG("read: %lld %lld %lld\n",
1183  papi_pe_buffer[0],
1184  papi_pe_buffer[1],
1185  papi_pe_buffer[2]);
1186 
1187  tot_time_enabled = papi_pe_buffer[1];
1188  tot_time_running = papi_pe_buffer[2];
1189 
1190  SUBDBG("count[%d] = (papi_pe_buffer[%d] %lld * "
1191  "tot_time_enabled %lld) / "
1192  "tot_time_running %lld\n",
1193  i, 0,papi_pe_buffer[0],
1194  tot_time_enabled,tot_time_running);
1195 
1196  if (tot_time_running == tot_time_enabled) {
1197  /* No scaling needed */
1198  pe_ctl->counts[i] = papi_pe_buffer[0];
1199  } else if (tot_time_running && tot_time_enabled) {
1200  /* Scale to give better results */
1201  /* avoid truncation. */
1202  /* Why use 100? Would 128 be faster? */
1203  scale = (tot_time_enabled * 100LL) / tot_time_running;
1204  scale = scale * papi_pe_buffer[0];
1205  scale = scale / 100LL;
1206  pe_ctl->counts[i] = scale;
1207  } else {
1208  /* This should not happen, but Phil reports it sometime does. */
1209  SUBDBG("perf_event kernel bug(?) count, enabled, "
1210  "running: %lld, %lld, %lld\n",
1211  papi_pe_buffer[0],tot_time_enabled,
1212  tot_time_running);
1213 
1214  pe_ctl->counts[i] = papi_pe_buffer[0];
1215  }
1216  }
1217  return PAPI_OK;
1218 }
1219 
1220 /* For cases where we can't group counters together */
1221 /* But must read them out individually */
1222 /* This includes when INHERIT is set, as well as various bugs */
1223 
1224 static int
1226 
1227  int i,ret=-1;
1228  long long papi_pe_buffer[READ_BUFFER_SIZE];
1229 
1230  /* we must read each counter individually */
1231  for ( i = 0; i < pe_ctl->num_events; i++ ) {
1232  ret = read( pe_ctl->events[i].event_fd,
1233  papi_pe_buffer,
1234  sizeof ( papi_pe_buffer ) );
1235  if ( ret == -1 ) {
1236  PAPIERROR("read returned an error: ",
1237  strerror( errno ));
1238  return PAPI_ESYS;
1239  }
1240 
1241  /* we should read one 64-bit value from each counter */
1242  if (ret!=sizeof(long long)) {
1243  PAPIERROR("Error! short read");
1244  PAPIERROR("read: fd: %2d, tid: %ld, cpu: %d, ret: %d",
1245  pe_ctl->events[i].event_fd,
1246  (long)pe_ctl->tid, pe_ctl->events[i].cpu, ret);
1247  return PAPI_ESYS;
1248  }
1249 
1250  SUBDBG("read: fd: %2d, tid: %ld, cpu: %d, ret: %d\n",
1251  pe_ctl->events[i].event_fd, (long)pe_ctl->tid,
1252  pe_ctl->events[i].cpu, ret);
1253  SUBDBG("read: %lld\n",papi_pe_buffer[0]);
1254 
1255  pe_ctl->counts[i] = papi_pe_buffer[0];
1256  }
1257 
1258  return PAPI_OK;
1259 
1260 }
1261 
1262 static int
1264  long long **events, int flags )
1265 {
1266  SUBDBG("ENTER: ctx: %p, ctl: %p, events: %p, flags: %#x\n",
1267  ctx, ctl, events, flags);
1268 
1269  ( void ) flags; /*unused */
1270  ( void ) ctx; /*unused */
1271  int i, j, ret = -1;
1272  pe_control_t *pe_ctl = ( pe_control_t *) ctl;
1273  long long papi_pe_buffer[READ_BUFFER_SIZE];
1274  int result;
1275 
1276  /* Handle fast case */
1277  /* FIXME: we fallback to slow reads if *any* event in eventset fails */
1278  /* in theory we could only fall back for the one event */
1279  /* but that makes the code more complicated. */
1280  if ((_perf_event_vector.cmp_info.fast_counter_read) && (!pe_ctl->inherit)) {
1281  result=_pe_rdpmc_read( ctx, ctl, events, flags);
1282  /* if successful we are done, otherwise fall back to read */
1283  if (result==PAPI_OK) return PAPI_OK;
1284  }
1285 
1286  /* Handle case where we are multiplexing */
1287  if (pe_ctl->multiplexed) {
1288  _pe_read_multiplexed(pe_ctl);
1289  }
1290 
1291  /* Handle cases where we cannot use FORMAT GROUP */
1292  else if (bug_format_group() || pe_ctl->inherit) {
1293  _pe_read_nogroup(pe_ctl);
1294  }
1295 
1296  /* Handle common case where we are using FORMAT_GROUP */
1297  /* We assume only one group leader, in position 0 */
1298 
1299  /* By reading the leader file descriptor, we get a series */
1300  /* of 64-bit values. The first is the total number of */
1301  /* events, followed by the counts for them. */
1302 
1303  else {
1304  if (pe_ctl->events[0].group_leader_fd!=-1) {
1305  PAPIERROR("Was expecting group leader");
1306  }
1307 
1308  ret = read( pe_ctl->events[0].event_fd,
1309  papi_pe_buffer,
1310  sizeof ( papi_pe_buffer ) );
1311 
1312  if ( ret == -1 ) {
1313  PAPIERROR("read returned an error: ",
1314  strerror( errno ));
1315  return PAPI_ESYS;
1316  }
1317 
1318  /* we read 1 64-bit value (number of events) then */
1319  /* num_events more 64-bit values that hold the counts */
1320  if (ret<(signed)((1+pe_ctl->num_events)*sizeof(long long))) {
1321  PAPIERROR("Error! short read");
1322  return PAPI_ESYS;
1323  }
1324 
1325  SUBDBG("read: fd: %2d, tid: %ld, cpu: %d, ret: %d\n",
1326  pe_ctl->events[0].event_fd,
1327  (long)pe_ctl->tid, pe_ctl->events[0].cpu, ret);
1328 
1329  for(j=0;j<ret/8;j++) {
1330  SUBDBG("read %d: %lld\n",j,papi_pe_buffer[j]);
1331  }
1332 
1333  /* Make sure the kernel agrees with how many events we have */
1334  if (papi_pe_buffer[0]!=pe_ctl->num_events) {
1335  PAPIERROR("Error! Wrong number of events");
1336  return PAPI_ESYS;
1337  }
1338 
1339  /* put the count values in their proper location */
1340  for(i=0;i<pe_ctl->num_events;i++) {
1341  pe_ctl->counts[i] = papi_pe_buffer[1+i];
1342  }
1343  }
1344 
1345  /* point PAPI to the values we read */
1346  *events = pe_ctl->counts;
1347 
1348  SUBDBG("EXIT: *events: %p\n", *events);
1349 
1350  return PAPI_OK;
1351 }
1352 
1353 #if (OBSOLETE_WORKAROUNDS==1)
1354 /* On kernels before 2.6.33 the TOTAL_TIME_ENABLED and TOTAL_TIME_RUNNING */
1355 /* fields are always 0 unless the counter is disabled. So if we are on */
1356 /* one of these kernels, then we must disable events before reading. */
1357 /* Elsewhere though we disable multiplexing on kernels before 2.6.34 */
1358 /* so maybe this isn't even necessary. */
1359 static int
1360 _pe_read_bug_sync( hwd_context_t *ctx, hwd_control_state_t *ctl,
1361  long long **events, int flags )
1362 {
1363 
1364  ( void ) flags; /*unused */
1365  int i, ret = -1;
1366  pe_context_t *pe_ctx = ( pe_context_t *) ctx;
1367  pe_control_t *pe_ctl = ( pe_control_t *) ctl;
1368  int result;
1369 
1370  if ( pe_ctx->state & PERF_EVENTS_RUNNING ) {
1371  for ( i = 0; i < pe_ctl->num_events; i++ ) {
1372  /* disable only the group leaders */
1373  if ( pe_ctl->events[i].group_leader_fd == -1 ) {
1374  ret = ioctl( pe_ctl->events[i].event_fd,
1375  PERF_EVENT_IOC_DISABLE, NULL );
1376  if ( ret == -1 ) {
1377  PAPIERROR("ioctl(PERF_EVENT_IOC_DISABLE) "
1378  "returned an error: ", strerror( errno ));
1379  return PAPI_ESYS;
1380  }
1381  }
1382  }
1383  }
1384 
1385  result=_pe_read( ctx, ctl, events, flags );
1386 
1387  /* If we disabled the counters due to the sync_read_bug(), */
1388  /* then we need to re-enable them now. */
1389 
1390  if ( pe_ctx->state & PERF_EVENTS_RUNNING ) {
1391  for ( i = 0; i < pe_ctl->num_events; i++ ) {
1392  if ( pe_ctl->events[i].group_leader_fd == -1 ) {
1393  /* this should refresh any overflow counters too */
1394  ret = ioctl( pe_ctl->events[i].event_fd,
1395  PERF_EVENT_IOC_ENABLE, NULL );
1396  if ( ret == -1 ) {
1397  /* Should never happen */
1398  PAPIERROR("ioctl(PERF_EVENT_IOC_ENABLE) returned an error: ",
1399  strerror( errno ));
1400  return PAPI_ESYS;
1401  }
1402  }
1403  }
1404  }
1405 
1406  return result;
1407 }
1408 
1409 #endif
1410 
1411 /* Start counting events */
1412 static int
1414 {
1415  int ret;
1416  int i;
1417  int did_something = 0;
1418  pe_context_t *pe_ctx = ( pe_context_t *) ctx;
1419  pe_control_t *pe_ctl = ( pe_control_t *) ctl;
1420 
1421  /* Reset the counters first. Is this necessary? */
1422  ret = _pe_reset( pe_ctx, pe_ctl );
1423  if ( ret ) {
1424  return ret;
1425  }
1426 
1427  /* Enable all of the group leaders */
1428  /* All group leaders have a group_leader_fd of -1 */
1429  for( i = 0; i < pe_ctl->num_events; i++ ) {
1430  if (pe_ctl->events[i].group_leader_fd == -1) {
1431  SUBDBG("ioctl(enable): fd: %d\n",
1432  pe_ctl->events[i].event_fd);
1433  ret=ioctl( pe_ctl->events[i].event_fd,
1434  PERF_EVENT_IOC_ENABLE, NULL) ;
1435 
1436  /* ioctls always return -1 on failure */
1437  if (ret == -1) {
1438  PAPIERROR("ioctl(PERF_EVENT_IOC_ENABLE) failed");
1439  return PAPI_ESYS;
1440  }
1441 
1442  did_something++;
1443  }
1444  }
1445 
1446  if (!did_something) {
1447  PAPIERROR("Did not enable any counters");
1448  return PAPI_EBUG;
1449  }
1450 
1451  pe_ctx->state |= PERF_EVENTS_RUNNING;
1452 
1453  return PAPI_OK;
1454 
1455 }
1456 
1457 /* Stop all of the counters */
1458 static int
1460 {
1461  SUBDBG( "ENTER: ctx: %p, ctl: %p\n", ctx, ctl);
1462 
1463  int ret;
1464  int i;
1465  pe_context_t *pe_ctx = ( pe_context_t *) ctx;
1466  pe_control_t *pe_ctl = ( pe_control_t *) ctl;
1467 
1468  /* Just disable the group leaders */
1469  for ( i = 0; i < pe_ctl->num_events; i++ ) {
1470  if ( pe_ctl->events[i].group_leader_fd == -1 ) {
1471  ret=ioctl( pe_ctl->events[i].event_fd,
1472  PERF_EVENT_IOC_DISABLE, NULL);
1473  if ( ret == -1 ) {
1474  PAPIERROR( "ioctl(%d, PERF_EVENT_IOC_DISABLE, NULL) "
1475  "returned error, Linux says: %s",
1476  pe_ctl->events[i].event_fd, strerror( errno ) );
1477  return PAPI_EBUG;
1478  }
1479  }
1480  }
1481 
1482  pe_ctx->state &= ~PERF_EVENTS_RUNNING;
1483 
1484  SUBDBG( "EXIT:\n");
1485 
1486  return PAPI_OK;
1487 }
1488 
1489 
1490 
1491 
1492 
1493 /*********************** CONTROL STATE RELATED *******************/
1494 
1495 
1496 /* This function clears the current contents of the control structure and
1497  updates it with whatever resources are allocated for all the native events
1498  in the native info structure array. */
1499 
1500 static int
1503  int count, hwd_context_t *ctx )
1504 {
1505  SUBDBG( "ENTER: ctl: %p, native: %p, count: %d, ctx: %p\n",
1506  ctl, native, count, ctx);
1507  int i;
1508  int j;
1509  int ret;
1510  int skipped_events=0;
1511  struct native_event_t *ntv_evt;
1512  pe_context_t *pe_ctx = ( pe_context_t *) ctx;
1513  pe_control_t *pe_ctl = ( pe_control_t *) ctl;
1514 
1515  /* close all of the existing fds and start over again */
1516  /* In theory we could have finer-grained control and know if */
1517  /* things were changed, but it's easier to tear things down and rebuild. */
1518  close_pe_events( pe_ctx, pe_ctl );
1519 
1520  /* Calling with count==0 should be OK, it's how things are deallocated */
1521  /* when an eventset is destroyed. */
1522  if ( count == 0 ) {
1523  SUBDBG( "EXIT: Called with count == 0\n" );
1524  return PAPI_OK;
1525  }
1526 
1527  /* set up all the events */
1528  for( i = 0; i < count; i++ ) {
1529  if ( native ) {
1530  /* get the native event pointer used for this papi event */
1531  int ntv_idx = _papi_hwi_get_ntv_idx((unsigned)(native[i].ni_papi_code));
1532  if (ntv_idx < -1) {
1533  SUBDBG("papi_event_code: %#x known by papi but not by the component\n", native[i].ni_papi_code);
1534  continue;
1535  }
1536  /* if native index is -1, then we have an event without a mask and need to find the right native index to use */
1537  if (ntv_idx == -1) {
1538  /* find the native event index we want by matching for the right papi event code */
1539  for (j=0 ; j<pe_ctx->event_table->num_native_events ; j++) {
1540  if (pe_ctx->event_table->native_events[j].papi_event_code == native[i].ni_papi_code) {
1541  ntv_idx = j;
1542  }
1543  }
1544  }
1545 
1546  /* if native index is still negative, we did not find event we wanted so just return error */
1547  if (ntv_idx < 0) {
1548  SUBDBG("papi_event_code: %#x not found in native event tables\n", native[i].ni_papi_code);
1549  continue;
1550  }
1551 
1552  /* this native index is positive so there was a mask with the event, the ntv_idx identifies which native event to use */
1553  ntv_evt = (struct native_event_t *)(&(pe_ctx->event_table->native_events[ntv_idx]));
1554  SUBDBG("ntv_evt: %p\n", ntv_evt);
1555 
1556  SUBDBG("i: %d, pe_ctx->event_table->num_native_events: %d\n", i, pe_ctx->event_table->num_native_events);
1557 
1558  /* Move this events hardware config values and other attributes to the perf_events attribute structure */
1559  memcpy (&pe_ctl->events[i].attr, &ntv_evt->attr, sizeof(perf_event_attr_t));
1560 
1561  /* may need to update the attribute structure with information from event set level domain settings (values set by PAPI_set_domain) */
1562  /* only done if the event mask which controls each counting domain was not provided */
1563 
1564  /* get pointer to allocated name, will be NULL when adding preset events to event set */
1565  char *aName = ntv_evt->allocated_name;
1566  if ((aName == NULL) || (strstr(aName, ":u=") == NULL)) {
1567  SUBDBG("set exclude_user attribute from eventset level domain flags, encode: %d, eventset: %d\n", pe_ctl->events[i].attr.exclude_user, !(pe_ctl->domain & PAPI_DOM_USER));
1568  pe_ctl->events[i].attr.exclude_user = !(pe_ctl->domain & PAPI_DOM_USER);
1569  }
1570  if ((aName == NULL) || (strstr(aName, ":k=") == NULL)) {
1571  SUBDBG("set exclude_kernel attribute from eventset level domain flags, encode: %d, eventset: %d\n", pe_ctl->events[i].attr.exclude_kernel, !(pe_ctl->domain & PAPI_DOM_KERNEL));
1572  pe_ctl->events[i].attr.exclude_kernel = !(pe_ctl->domain & PAPI_DOM_KERNEL);
1573  }
1574 
1575  // libpfm4 supports mh (monitor host) and mg (monitor guest) event masks
1576  // perf_events supports exclude_hv and exclude_idle attributes
1577  // PAPI_set_domain supports PAPI_DOM_SUPERVISOR and PAPI_DOM_OTHER domain attributes
1578  // not sure how these perf_event attributes, and PAPI domain attributes relate to each other
1579  // if that can be figured out then there should probably be code here to set some perf_events attributes based on what was set in a PAPI_set_domain call
1580  // the code sample below is one possibility
1581 // if (strstr(ntv_evt->allocated_name, ":mg=") == NULL) {
1582 // SUBDBG("set exclude_hv attribute from eventset level domain flags, encode: %d, eventset: %d\n", pe_ctl->events[i].attr.exclude_hv, !(pe_ctl->domain & PAPI_DOM_SUPERVISOR));
1583 // pe_ctl->events[i].attr.exclude_hv = !(pe_ctl->domain & PAPI_DOM_SUPERVISOR);
1584 // }
1585 
1586 
1587  // set the cpu number provided with an event mask if there was one (will be -1 if mask not provided)
1588  pe_ctl->events[i].cpu = ntv_evt->cpu;
1589  // if cpu event mask not provided, then set the cpu to use to what may have been set on call to PAPI_set_opt (will still be -1 if not called)
1590  if (pe_ctl->events[i].cpu == -1) {
1591  pe_ctl->events[i].cpu = pe_ctl->cpu;
1592  }
1593  } else {
1594  /* This case happens when called from _pe_set_overflow and _pe_ctl */
1595  /* Those callers put things directly into the pe_ctl structure so it is already set for the open call */
1596  }
1597 
1598  /* Copy the inherit flag into the attribute block that will be passed to the kernel */
1599  pe_ctl->events[i].attr.inherit = pe_ctl->inherit;
1600 
1601  /* Set the position in the native structure */
1602  /* We just set up events linearly */
1603  if ( native ) {
1604  native[i].ni_position = i;
1605  SUBDBG( "&native[%d]: %p, ni_papi_code: %#x, ni_event: %#x, ni_position: %d, ni_owners: %d\n",
1606  i, &(native[i]), native[i].ni_papi_code, native[i].ni_event, native[i].ni_position, native[i].ni_owners);
1607  }
1608  }
1609 
1610  if (count <= skipped_events) {
1611  SUBDBG("EXIT: No events to count, they all contained invalid umasks\n");
1612  return PAPI_ENOEVNT;
1613  }
1614 
1615  pe_ctl->num_events = count - skipped_events;
1616 
1617  /* actually open the events */
1618  ret = open_pe_events( pe_ctx, pe_ctl );
1619  if ( ret != PAPI_OK ) {
1620  SUBDBG("EXIT: open_pe_events returned: %d\n", ret);
1621  /* Restore values ? */
1622  return ret;
1623  }
1624 
1625  SUBDBG( "EXIT: PAPI_OK\n" );
1626  return PAPI_OK;
1627 }
1628 
1629 /* Set various options on a control state */
1630 static int
1631 _pe_ctl( hwd_context_t *ctx, int code, _papi_int_option_t *option )
1632 {
1633  int ret;
1634  pe_context_t *pe_ctx = ( pe_context_t *) ctx;
1635  pe_control_t *pe_ctl = NULL;
1636 
1637  switch ( code ) {
1638  case PAPI_MULTIPLEX:
1639  pe_ctl = ( pe_control_t * ) ( option->multiplex.ESI->ctl_state );
1640  ret = check_permissions( pe_ctl->tid, pe_ctl->cpu, pe_ctl->domain,
1641  pe_ctl->granularity,
1642  1, pe_ctl->inherit );
1643  if (ret != PAPI_OK) {
1644  return ret;
1645  }
1646 
1647  /* looks like we are allowed, so set multiplexed attribute */
1648  pe_ctl->multiplexed = 1;
1649  ret = _pe_update_control_state( pe_ctl, NULL,
1650  pe_ctl->num_events, pe_ctx );
1651  if (ret != PAPI_OK) {
1652  pe_ctl->multiplexed = 0;
1653  }
1654  return ret;
1655 
1656  case PAPI_ATTACH:
1657  pe_ctl = ( pe_control_t * ) ( option->attach.ESI->ctl_state );
1658  ret = check_permissions( option->attach.tid, pe_ctl->cpu,
1659  pe_ctl->domain, pe_ctl->granularity,
1660  pe_ctl->multiplexed,
1661  pe_ctl->inherit );
1662  if (ret != PAPI_OK) {
1663  return ret;
1664  }
1665 
1666  pe_ctl->attached = 1;
1667  pe_ctl->tid = option->attach.tid;
1668 
1669  /* If events have been already been added, something may */
1670  /* have been done to the kernel, so update */
1671  ret =_pe_update_control_state( pe_ctl, NULL,
1672  pe_ctl->num_events, pe_ctx);
1673 
1674  return ret;
1675 
1676  case PAPI_DETACH:
1677  pe_ctl = ( pe_control_t *) ( option->attach.ESI->ctl_state );
1678 
1679  pe_ctl->attached = 0;
1680  pe_ctl->tid = 0;
1681 
1682  return PAPI_OK;
1683 
1684  case PAPI_CPU_ATTACH:
1685  pe_ctl = ( pe_control_t *) ( option->cpu.ESI->ctl_state );
1686  ret = check_permissions( pe_ctl->tid, option->cpu.cpu_num,
1687  pe_ctl->domain, pe_ctl->granularity,
1688  pe_ctl->multiplexed,
1689  pe_ctl->inherit );
1690  if (ret != PAPI_OK) {
1691  return ret;
1692  }
1693  /* looks like we are allowed so set cpu number */
1694 
1695  pe_ctl->cpu = option->cpu.cpu_num;
1696 
1697  return PAPI_OK;
1698 
1699  case PAPI_DOMAIN:
1700  pe_ctl = ( pe_control_t *) ( option->domain.ESI->ctl_state );
1701  ret = check_permissions( pe_ctl->tid, pe_ctl->cpu,
1702  option->domain.domain,
1703  pe_ctl->granularity,
1704  pe_ctl->multiplexed,
1705  pe_ctl->inherit );
1706  if (ret != PAPI_OK) {
1707  return ret;
1708  }
1709  /* looks like we are allowed, so set event set level counting domains */
1710  pe_ctl->domain = option->domain.domain;
1711  return PAPI_OK;
1712 
1713  case PAPI_GRANUL:
1714  pe_ctl = (pe_control_t *) ( option->granularity.ESI->ctl_state );
1715 
1716  /* FIXME: we really don't support this yet */
1717 
1718  switch ( option->granularity.granularity ) {
1719  case PAPI_GRN_PROCG:
1720  case PAPI_GRN_SYS_CPU:
1721  case PAPI_GRN_PROC:
1722  return PAPI_ECMP;
1723 
1724  /* Currently we only support thread and CPU granularity */
1725  case PAPI_GRN_SYS:
1726  pe_ctl->granularity=PAPI_GRN_SYS;
1727  pe_ctl->cpu=_papi_getcpu();
1728  break;
1729 
1730  case PAPI_GRN_THR:
1731  pe_ctl->granularity=PAPI_GRN_THR;
1732  break;
1733 
1734 
1735  default:
1736  return PAPI_EINVAL;
1737  }
1738  return PAPI_OK;
1739 
1740  case PAPI_INHERIT:
1741  pe_ctl = (pe_control_t *) ( option->inherit.ESI->ctl_state );
1742  ret = check_permissions( pe_ctl->tid, pe_ctl->cpu, pe_ctl->domain,
1743  pe_ctl->granularity, pe_ctl->multiplexed,
1744  option->inherit.inherit );
1745  if (ret != PAPI_OK) {
1746  return ret;
1747  }
1748  /* looks like we are allowed, so set the requested inheritance */
1749  if (option->inherit.inherit) {
1750  /* children will inherit counters */
1751  pe_ctl->inherit = 1;
1752  } else {
1753  /* children won't inherit counters */
1754  pe_ctl->inherit = 0;
1755  }
1756  return PAPI_OK;
1757 
1758  case PAPI_DATA_ADDRESS:
1759  return PAPI_ENOSUPP;
1760 #if 0
1761  pe_ctl = (pe_control_t *) (option->address_range.ESI->ctl_state);
1762  ret = set_default_domain( pe_ctl, option->address_range.domain );
1763  if ( ret != PAPI_OK ) {
1764  return ret;
1765  }
1766  set_drange( pe_ctx, pe_ctl, option );
1767  return PAPI_OK;
1768 #endif
1769  case PAPI_INSTR_ADDRESS:
1770  return PAPI_ENOSUPP;
1771 #if 0
1772  pe_ctl = (pe_control_t *) (option->address_range.ESI->ctl_state);
1773  ret = set_default_domain( pe_ctl, option->address_range.domain );
1774  if ( ret != PAPI_OK ) {
1775  return ret;
1776  }
1777  set_irange( pe_ctx, pe_ctl, option );
1778  return PAPI_OK;
1779 #endif
1780 
1781  case PAPI_DEF_ITIMER:
1782  /* What should we be checking for here? */
1783  /* This seems like it should be OS-specific not component */
1784  /* specific. */
1785 
1786  return PAPI_OK;
1787 
1788  case PAPI_DEF_MPX_NS:
1789  /* Defining a given ns per set is not current supported */
1790  return PAPI_ENOSUPP;
1791 
1792  case PAPI_DEF_ITIMER_NS:
1793  /* We don't support this... */
1794  return PAPI_OK;
1795 
1796  default:
1797  return PAPI_ENOSUPP;
1798  }
1799 }
1800 
1801 
1802 /* Initialize a new control state */
1803 static int
1805 {
1806  pe_control_t *pe_ctl = ( pe_control_t *) ctl;
1807 
1808  /* clear the contents */
1809  memset( pe_ctl, 0, sizeof ( pe_control_t ) );
1810 
1811  /* Set the domain */
1813 
1814  /* default granularity */
1816 
1817  /* overflow signal */
1819 
1820  pe_ctl->cidx=our_cidx;
1821 
1822  /* Set cpu number in the control block to show events */
1823  /* are not tied to specific cpu */
1824  pe_ctl->cpu = -1;
1825 
1826  return PAPI_OK;
1827 }
1828 
1829 
1830 /****************** EVENT NAME HANDLING CODE *****************/
1831 
1832 static int
1833 _pe_ntv_enum_events( unsigned int *PapiEventCode, int modifier )
1834 {
1835  return _pe_libpfm4_ntv_enum_events(PapiEventCode, modifier, our_cidx,
1837 }
1838 
1839 static int
1840 _pe_ntv_name_to_code( const char *name, unsigned int *event_code)
1841 {
1842  return _pe_libpfm4_ntv_name_to_code(name,event_code, our_cidx,
1844 }
1845 
1846 static int
1847 _pe_ntv_code_to_name(unsigned int EventCode,
1848  char *ntv_name, int len)
1849 {
1850  return _pe_libpfm4_ntv_code_to_name(EventCode,
1851  ntv_name, len,
1853 }
1854 
1855 static int
1856 _pe_ntv_code_to_descr( unsigned int EventCode,
1857  char *ntv_descr, int len)
1858 {
1859 
1860  return _pe_libpfm4_ntv_code_to_descr(EventCode,ntv_descr,len,
1862 }
1863 
1864 static int
1865 _pe_ntv_code_to_info(unsigned int EventCode,
1866  PAPI_event_info_t *info) {
1867 
1868  return _pe_libpfm4_ntv_code_to_info(EventCode, info,
1870 }
1871 
1872 
1873 /*********************** SAMPLING / PROFILING *******************/
1874 
1875 
1876 /* Find a native event specified by a profile index */
1877 static int
1878 find_profile_index( EventSetInfo_t *ESI, int evt_idx, int *flags,
1879  unsigned int *native_index, int *profile_index )
1880 {
1881  int pos, esi_index, count;
1882 
1883  for ( count = 0; count < ESI->profile.event_counter; count++ ) {
1884  esi_index = ESI->profile.EventIndex[count];
1885  pos = ESI->EventInfoArray[esi_index].pos[0];
1886 
1887  if ( pos == evt_idx ) {
1888  *profile_index = count;
1889  *native_index = ESI->NativeInfoArray[pos].ni_event &
1891  *flags = ESI->profile.flags;
1892  SUBDBG( "Native event %d is at profile index %d, flags %d\n",
1893  *native_index, *profile_index, *flags );
1894  return PAPI_OK;
1895  }
1896  }
1897  PAPIERROR( "wrong count: %d vs. ESI->profile.event_counter %d",
1898  count, ESI->profile.event_counter );
1899  return PAPI_EBUG;
1900 }
1901 
1902 
1903 /* What exactly does this do? */
1904 static int
1905 process_smpl_buf( int evt_idx, ThreadInfo_t **thr, int cidx )
1906 {
1907  int ret, flags, profile_index;
1908  unsigned native_index;
1909  pe_control_t *ctl;
1910 
1911  ret = find_profile_index( ( *thr )->running_eventset[cidx], evt_idx,
1912  &flags, &native_index, &profile_index );
1913  if ( ret != PAPI_OK ) {
1914  return ret;
1915  }
1916 
1917  ctl= (*thr)->running_eventset[cidx]->ctl_state;
1918 
1919  mmap_read( cidx, thr, &(ctl->events[evt_idx]), profile_index );
1920 
1921  return PAPI_OK;
1922 }
1923 
1924 /*
1925  * This function is used when hardware overflows are working or when
1926  * software overflows are forced
1927  */
1928 
1929 static void
1930 _pe_dispatch_timer( int n, hwd_siginfo_t *info, void *uc)
1931 {
1932  ( void ) n; /*unused */
1933  _papi_hwi_context_t hw_context;
1934  int found_evt_idx = -1, fd = info->si_fd;
1935  caddr_t address;
1937  int i;
1938  pe_control_t *ctl;
1940 
1941  if ( thread == NULL ) {
1942  PAPIERROR( "thread == NULL in _papi_pe_dispatch_timer for fd %d!", fd );
1943  return;
1944  }
1945 
1946  if ( thread->running_eventset[cidx] == NULL ) {
1947  PAPIERROR( "thread->running_eventset == NULL in "
1948  "_papi_pe_dispatch_timer for fd %d!",fd );
1949  return;
1950  }
1951 
1952  if ( thread->running_eventset[cidx]->overflow.flags == 0 ) {
1953  PAPIERROR( "thread->running_eventset->overflow.flags == 0 in "
1954  "_papi_pe_dispatch_timer for fd %d!", fd );
1955  return;
1956  }
1957 
1958  hw_context.si = info;
1959  hw_context.ucontext = ( hwd_ucontext_t * ) uc;
1960 
1961  if ( thread->running_eventset[cidx]->overflow.flags &
1963  address = GET_OVERFLOW_ADDRESS( hw_context );
1964  _papi_hwi_dispatch_overflow_signal( ( void * ) &hw_context,
1965  address, NULL, 0,
1966  0, &thread, cidx );
1967  return;
1968  }
1969 
1970  if ( thread->running_eventset[cidx]->overflow.flags !=
1972  PAPIERROR( "thread->running_eventset->overflow.flags "
1973  "is set to something other than "
1974  "PAPI_OVERFLOW_HARDWARE or "
1975  "PAPI_OVERFLOW_FORCE_SW for fd %d (%#x)",
1976  fd,
1977  thread->running_eventset[cidx]->overflow.flags);
1978  }
1979 
1980  /* convoluted way to get ctl */
1981  ctl= thread->running_eventset[cidx]->ctl_state;
1982 
1983  /* See if the fd is one that's part of the this thread's context */
1984  for( i=0; i < ctl->num_events; i++ ) {
1985  if ( fd == ctl->events[i].event_fd ) {
1986  found_evt_idx = i;
1987  break;
1988  }
1989  }
1990 
1991  if ( found_evt_idx == -1 ) {
1992  PAPIERROR( "Unable to find fd %d among the open event fds "
1993  "_papi_hwi_dispatch_timer!", fd );
1994  return;
1995  }
1996 
1997  if (ioctl( fd, PERF_EVENT_IOC_DISABLE, NULL ) == -1 ) {
1998  PAPIERROR("ioctl(PERF_EVENT_IOC_DISABLE) failed");
1999  }
2000 
2001  if ( ( thread->running_eventset[cidx]->state & PAPI_PROFILING ) &&
2002  !( thread->running_eventset[cidx]->profile.flags &
2003  PAPI_PROFIL_FORCE_SW ) ) {
2004  process_smpl_buf( found_evt_idx, &thread, cidx );
2005  }
2006  else {
2007  uint64_t ip;
2008  unsigned int head;
2009  pe_event_info_t *pe = &(ctl->events[found_evt_idx]);
2010  unsigned char *data = ((unsigned char*)pe->mmap_buf) + getpagesize( );
2011 
2012  /*
2013  * Read up the most recent IP from the sample in the mmap buffer. To
2014  * do this, we make the assumption that all of the records in the
2015  * mmap buffer are the same size, and that they all contain the IP as
2016  * their only record element. This means that we can use the
2017  * data_head element from the user page and move backward one record
2018  * from that point and read the data. Since we don't actually need
2019  * to access the header of the record, we can just subtract 8 (size
2020  * of the IP) from data_head and read up that word from the mmap
2021  * buffer. After we subtract 8, we account for mmap buffer wrapping
2022  * by AND'ing this offset with the buffer mask.
2023  */
2024  head = mmap_read_head( pe );
2025 
2026  if ( head == 0 ) {
2027  PAPIERROR( "Attempting to access memory "
2028  "which may be inaccessable" );
2029  return;
2030  }
2031  ip = *( uint64_t * ) ( data + ( ( head - 8 ) & pe->mask ) );
2032  /*
2033  * Update the tail to the current head pointer.
2034  *
2035  * Note: that if we were to read the record at the tail pointer,
2036  * rather than the one at the head (as you might otherwise think
2037  * would be natural), we could run into problems. Signals don't
2038  * stack well on Linux, particularly if not using RT signals, and if
2039  * they come in rapidly enough, we can lose some. Overtime, the head
2040  * could catch up to the tail and monitoring would be stopped, and
2041  * since no more signals are coming in, this problem will never be
2042  * resolved, resulting in a complete loss of overflow notification
2043  * from that point on. So the solution we use here will result in
2044  * only the most recent IP value being read every time there are two
2045  * or more samples in the buffer (for that one overflow signal). But
2046  * the handler will always bring up the tail, so the head should
2047  * never run into the tail.
2048  */
2049  mmap_write_tail( pe, head );
2050 
2051  /*
2052  * The fourth parameter is supposed to be a vector of bits indicating
2053  * the overflowed hardware counters, but it's not really clear that
2054  * it's useful, because the actual hardware counters used are not
2055  * exposed to the PAPI user. For now, I'm just going to set the bit
2056  * that indicates which event register in the array overflowed. The
2057  * result is that the overflow vector will not be identical to the
2058  * perfmon implementation, and part of that is due to the fact that
2059  * which hardware register is actually being used is opaque at the
2060  * user level (the kernel event dispatcher hides that info).
2061  */
2062 
2063  _papi_hwi_dispatch_overflow_signal( ( void * ) &hw_context,
2064  ( caddr_t ) ( unsigned long ) ip,
2065  NULL, ( 1 << found_evt_idx ), 0,
2066  &thread, cidx );
2067 
2068  }
2069 
2070  /* Restart the counters */
2071  if (ioctl( fd, PERF_EVENT_IOC_REFRESH, PAPI_REFRESH_VALUE ) == -1) {
2072  PAPIERROR( "overflow refresh failed", 0 );
2073  }
2074 }
2075 
2076 /* Stop profiling */
2077 /* FIXME: does this actually stop anything? */
2078 /* It looks like it is only actually called from PAPI_stop() */
2079 /* So the event will be destroyed soon after anyway. */
2080 static int
2082 {
2083  int i, ret = PAPI_OK;
2084  pe_control_t *ctl;
2085  int cidx;
2086 
2087  ctl=ESI->ctl_state;
2088 
2089  cidx=ctl->cidx;
2090 
2091  /* Loop through all of the events and process those which have mmap */
2092  /* buffers attached. */
2093  for ( i = 0; i < ctl->num_events; i++ ) {
2094  /* Use the mmap_buf field as an indicator */
2095  /* of this fd being used for profiling. */
2096  if ( ctl->events[i].profiling ) {
2097  /* Process any remaining samples in the sample buffer */
2098  ret = process_smpl_buf( i, &thread, cidx );
2099  if ( ret ) {
2100  PAPIERROR( "process_smpl_buf returned error %d", ret );
2101  return ret;
2102  }
2103  ctl->events[i].profiling=0;
2104  }
2105  }
2106 
2107  return ret;
2108 }
2109 
2110 /* Set up an event to cause overflow */
2111 /* If threshold==0 then disable overflow for that event */
2112 static int
2113 _pe_set_overflow( EventSetInfo_t *ESI, int EventIndex, int threshold )
2114 {
2115  SUBDBG("ENTER: ESI: %p, EventIndex: %d, threshold: %d\n",
2116  ESI, EventIndex, threshold);
2117 
2118  pe_context_t *ctx;
2119  pe_control_t *ctl = (pe_control_t *) ( ESI->ctl_state );
2120  int i, evt_idx, found_non_zero_sample_period = 0, retval = PAPI_OK;
2121  int cidx;
2122 
2123  cidx = ctl->cidx;
2124  ctx = ( pe_context_t *) ( ESI->master->context[cidx] );
2125 
2126  /* pos[0] is the first native event */
2127  /* derived events might be made up of multiple native events */
2128  evt_idx = ESI->EventInfoArray[EventIndex].pos[0];
2129 
2130  SUBDBG("Attempting to set overflow for index %d (%d) of EventSet %d\n",
2131  evt_idx,EventIndex,ESI->EventSetIndex);
2132 
2133  if (evt_idx<0) {
2134  SUBDBG("EXIT: evt_idx: %d\n", evt_idx);
2135  return PAPI_EINVAL;
2136  }
2137 
2138  /* It's an error to disable overflow if it wasn't set in the */
2139  /* first place. */
2140  if (( threshold == 0 ) &&
2141  ( ctl->events[evt_idx].attr.sample_period == 0 ) ) {
2142  SUBDBG("EXIT: PAPI_EINVAL, Tried to clear "
2143  "sample threshold when it was not set\n");
2144  return PAPI_EINVAL;
2145  }
2146 
2147  /* Set the sample period to threshold */
2148  ctl->events[evt_idx].attr.sample_period = threshold;
2149 
2150  if (threshold == 0) {
2151  ctl->events[evt_idx].sampling = 0;
2152  }
2153  else {
2154  ctl->events[evt_idx].sampling = 1;
2155 
2156  /* Setting wakeup_events to one means issue a wakeup on every */
2157  /* counter overflow (not mmap page overflow). */
2158  ctl->events[evt_idx].attr.wakeup_events = 1;
2159  /* We need the IP to pass to the overflow handler */
2160  ctl->events[evt_idx].attr.sample_type = PERF_SAMPLE_IP;
2161  }
2162 
2163 
2164  /* Check to see if any events in the EventSet are setup to sample */
2165  /* Do we actually handle multiple overflow events at once? --vmw */
2166  for ( i = 0; i < ctl->num_events; i++ ) {
2167  if ( ctl->events[i].attr.sample_period ) {
2168  found_non_zero_sample_period = 1;
2169  break;
2170  }
2171  }
2172 
2173  if ( found_non_zero_sample_period ) {
2174  /* turn on internal overflow flag for this event set */
2175  ctl->overflow = 1;
2176 
2177  /* Enable the signal handler */
2179  ctl->overflow_signal,
2180  1, ctl->cidx );
2181  if (retval != PAPI_OK) {
2182  SUBDBG("Call to _papi_hwi_start_signal "
2183  "returned: %d\n", retval);
2184  }
2185  } else {
2186 
2187  /* turn off internal overflow flag for this event set */
2188  ctl->overflow = 0;
2189 
2190  /* Remove the signal handler, if there are no remaining */
2191  /* non-zero sample_periods set */
2193  if ( retval != PAPI_OK ) {
2194  SUBDBG("Call to _papi_hwi_stop_signal "
2195  "returned: %d\n", retval);
2196  return retval;
2197  }
2198  }
2199 
2200  retval = _pe_update_control_state( ctl, NULL,
2201  ((pe_control_t *)(ESI->ctl_state) )->num_events,
2202  ctx );
2203 
2204  SUBDBG("EXIT: return: %d\n", retval);
2205 
2206  return retval;
2207 }
2208 
2209 /* Enable/disable profiling */
2210 /* If threshold is zero, we disable */
2211 static int
2212 _pe_set_profile( EventSetInfo_t *ESI, int EventIndex, int threshold )
2213 {
2214  int ret;
2215  int evt_idx;
2216  pe_control_t *ctl = ( pe_control_t *) ( ESI->ctl_state );
2217 
2218  /* Since you can't profile on a derived event, */
2219  /* the event is always the first and only event */
2220  /* in the native event list. */
2221  evt_idx = ESI->EventInfoArray[EventIndex].pos[0];
2222 
2223  /* If threshold is zero we want to *disable* */
2224  /* profiling on the event */
2225  if ( threshold == 0 ) {
2226 // SUBDBG( "MUNMAP(%p,%"PRIu64")\n",
2227 // ctl->events[evt_idx].mmap_buf,
2228 // ( uint64_t ) ctl->events[evt_idx].nr_mmap_pages *
2229 // getpagesize() );
2230 
2231 // if ( ctl->events[evt_idx].mmap_buf ) {
2232 // munmap( ctl->events[evt_idx].mmap_buf,
2233 // ctl->events[evt_idx].nr_mmap_pages *
2234 // getpagesize() );
2235 // }
2236 // ctl->events[evt_idx].mmap_buf = NULL;
2237 // ctl->events[evt_idx].nr_mmap_pages = 0;
2238 
2239  /* no longer sample on IP */
2240  ctl->events[evt_idx].attr.sample_type &= ~PERF_SAMPLE_IP;
2241 
2242  /* Clear any residual overflow flags */
2243  /* ??? old warning says "This should be handled somewhere else" */
2244  ESI->state &= ~( PAPI_OVERFLOWING );
2245  ESI->overflow.flags &= ~( PAPI_OVERFLOW_HARDWARE );
2246 
2247  ctl->events[evt_idx].profiling=0;
2248 
2249  } else {
2250 
2251  /* Otherwise, we are *enabling* profiling */
2252 
2253  /* Look up the native event code */
2254 
2255  if ( ESI->profile.flags & (PAPI_PROFIL_DATA_EAR |
2257  /* Not supported yet... */
2258  return PAPI_ENOSUPP;
2259  }
2260 
2261  if ( ESI->profile.flags & PAPI_PROFIL_RANDOM ) {
2262  /* This requires an ability to randomly alter the */
2263  /* sample_period within a given range. */
2264  /* Linux currently does not have this ability. FIXME */
2265  return PAPI_ENOSUPP;
2266  }
2267  ctl->events[evt_idx].profiling=1;
2268  }
2269 
2270  ret = _pe_set_overflow( ESI, EventIndex, threshold );
2271  if ( ret != PAPI_OK ) return ret;
2272 
2273  return PAPI_OK;
2274 }
2275 
2276 
2277 /************ INITIALIZATION / SHUTDOWN CODE *********************/
2278 
2279 
2280 /* Shutdown the perf_event component */
2281 static int
2283 
2284  /* deallocate our event table */
2286 
2287  /* Shutdown libpfm4 */
2289 
2290  return PAPI_OK;
2291 }
2292 
2293 
2294 /* Check the mmap page for rdpmc support */
2295 static int _pe_detect_rdpmc(void) {
2296 
2297  struct perf_event_attr pe;
2298  int fd,rdpmc_exists=1;
2299  void *addr;
2300  struct perf_event_mmap_page *our_mmap;
2301  int page_size=getpagesize();
2302 
2303 #if defined(__i386__) || defined (__x86_64__)
2304 #else
2305  /* We only support rdpmc on x86 for now */
2306  return 0;
2307 #endif
2308 
2309  /* There were various subtle bugs in rdpmc support before */
2310  /* the Linux 4.13 release. */
2311  if (_papi_os_info.os_version < LINUX_VERSION(4,13,0)) {
2312  return 0;
2313  }
2314 
2315  /* Create a fake instructions event so we can read a mmap page */
2316  memset(&pe,0,sizeof(struct perf_event_attr));
2317 
2318  pe.type=PERF_TYPE_HARDWARE;
2319  pe.size=sizeof(struct perf_event_attr);
2320  pe.config=PERF_COUNT_HW_INSTRUCTIONS;
2321  pe.exclude_kernel=1;
2322  pe.disabled=1;
2323 
2324  perf_event_dump_attr(&pe,0,-1,-1,0);
2325  fd=sys_perf_event_open(&pe,0,-1,-1,0);
2326 
2327  /* This hopefully won't happen? */
2328  /* Though there is a chance this is the first */
2329  /* attempt to open a perf_event */
2330  if (fd<0) {
2331  SUBDBG("FAILED perf_event_open trying to detect rdpmc support");
2332  return PAPI_ESYS;
2333  }
2334 
2335  /* create the mmap page */
2336  addr=mmap(NULL, page_size, PROT_READ, MAP_SHARED,fd,0);
2337  if (addr == MAP_FAILED) {
2338  SUBDBG("FAILED mmap trying to detect rdpmc support");
2339  close(fd);
2340  return PAPI_ESYS;
2341  }
2342 
2343  /* get the rdpmc info from the mmap page */
2344  our_mmap=(struct perf_event_mmap_page *)addr;
2345 
2346  /* If cap_usr_rdpmc bit is set to 1, we have support! */
2347  if (our_mmap->cap_usr_rdpmc!=0) {
2348  rdpmc_exists=1;
2349  }
2350  else if ((!our_mmap->cap_bit0_is_deprecated) && (our_mmap->cap_bit0)) {
2351  /* 3.4 to 3.11 had somewhat broken rdpmc support */
2352  /* This convoluted test is the "official" way to detect this */
2353  /* To make things easier we don't support these kernels */
2354  rdpmc_exists=0;
2355  }
2356  else {
2357  rdpmc_exists=0;
2358  }
2359 
2360  /* close the fake event */
2361  munmap(addr,page_size);
2362  close(fd);
2363 
2364  return rdpmc_exists;
2365 
2366 }
2367 
2368 
2369 static int
2371 
2372  FILE *fff;
2373  int paranoid_level;
2374  int retval;
2375 
2376  /* The is the official way to detect if perf_event support exists */
2377  /* The file is called perf_counter_paranoid on 2.6.31 */
2378  /* currently we are lazy and do not support 2.6.31 kernels */
2379 
2380  fff=fopen("/proc/sys/kernel/perf_event_paranoid","r");
2381  if (fff==NULL) {
2382  strncpy(component->cmp_info.disabled_reason,
2383  "perf_event support not detected",PAPI_MAX_STR_LEN);
2384  return PAPI_ENOCMP;
2385  }
2386 
2387  /* 3 (vendor patch) means completely disabled */
2388  /* 2 means no kernel measurements allowed */
2389  /* 1 means normal counter access */
2390  /* 0 means you can access CPU-specific data */
2391  /* -1 means no restrictions */
2392  retval=fscanf(fff,"%d",&paranoid_level);
2393  if (retval!=1) fprintf(stderr,"Error reading paranoid level\n");
2394  fclose(fff);
2395 
2396  if (paranoid_level==3) {
2397  strncpy(component->cmp_info.disabled_reason,
2398  "perf_event support disabled by Linux with paranoid=3",PAPI_MAX_STR_LEN);
2399  return PAPI_ENOCMP;
2400  }
2401 
2402  if ((paranoid_level==2) && (getuid()!=0)) {
2403  SUBDBG("/proc/sys/kernel/perf_event_paranoid prohibits kernel counts");
2405  }
2406 
2407  return PAPI_OK;
2408 
2409 }
2410 
2411 #if (OBSOLETE_WORKAROUNDS==1)
2412 /* Version based workarounds */
2413 /* perf_event has many bugs */
2414 /* PAPI has to work around a number of them, but for the most part */
2415 /* all of those were fixed by Linux 2.6.34 (May 2010) */
2416 /* Unfortunately it's not easy to auto-detect for these so we were */
2417 /* going by uname() version number */
2418 /* To complicate things, some vendors like Redhat backport fixes */
2419 /* So even though their kernel reports as 2.6.32 it has the fixes */
2420 /* As of PAPI 5.6 we're going to default to disabling the workarounds */
2421 /* I'm going to leave them here, ifdefed out, for the time being */
2422 static int
2423 _pe_version_workarounds(papi_vector_t *component) {
2424 
2425  /* Kernel multiplexing is broken prior to kernel 2.6.34 */
2426  /* The fix was probably git commit: */
2427  /* 45e16a6834b6af098702e5ea6c9a40de42ff77d8 */
2428  if (_papi_os_info.os_version < LINUX_VERSION(2,6,34)) {
2429  component->cmp_info.kernel_multiplex = 0;
2431  }
2432 
2433  /* Check that processor is supported */
2434  if (processor_supported(_papi_hwi_system_info.hw_info.vendor,
2436  fprintf(stderr,"warning, your processor is unsupported\n");
2437  /* should not return error, as software events should still work */
2438  }
2439 
2440  /* Update the default function pointers */
2441  /* Based on features/bugs */
2442  if (bug_sync_read()) {
2443  component->read = _pe_read_bug_sync;
2444  }
2445 
2446  return PAPI_OK;
2447 
2448 }
2449 
2450 #endif
2451 
2452 
2453 
2454 
2455 /* Initialize the perf_event component */
2456 static int
2458 {
2459 
2460  int retval;
2461 
2462  our_cidx=cidx;
2463 
2464  /* Update component behavior based on paranoid setting */
2466  if (retval!=PAPI_OK) return retval;
2467 
2468 #if (OBSOLETE_WORKAROUNDS==1)
2469  /* Handle any kernel version related workarounds */
2470  _pe_version_workarounds(_papi_hwd[cidx]);
2471 #endif
2472 
2473  /* Setup mmtimers, if appropriate */
2475  if (retval) {
2476  strncpy(_papi_hwd[cidx]->cmp_info.disabled_reason,
2477  "Error initializing mmtimer",PAPI_MAX_STR_LEN);
2478  return retval;
2479  }
2480 
2481  /* Set the overflow signal */
2482  _papi_hwd[cidx]->cmp_info.hardware_intr_sig = SIGRTMIN + 2;
2483 
2484  /* Run Vendor-specific fixups */
2486 
2487  /* Detect if we can use rdpmc (or equivalent) */
2489  _papi_hwd[cidx]->cmp_info.fast_counter_read = retval;
2490  if (retval < 0 ) {
2491  /* Don't actually fail here, as could be a surivable bug? */
2492  /* If perf_event_open/mmap truly are failing we will */
2493  /* likely catch it pretty quickly elsewhere. */
2494  _papi_hwd[cidx]->cmp_info.fast_counter_read = 0;
2495  }
2496 
2497 #if (USE_PERFEVENT_RDPMC==1)
2498 
2499 #else
2500  /* Force fast_counter_read off if --enable-perfevent-rdpmc=no */
2501  _papi_hwd[cidx]->cmp_info.fast_counter_read = 0;
2502 #endif
2503 
2504  /* Run the libpfm4-specific setup */
2506  if (retval) {
2507 
2508  strncpy(_papi_hwd[cidx]->cmp_info.disabled_reason,
2509  "Error initializing libpfm4",PAPI_MAX_STR_LEN);
2510  return retval;
2511 
2512  }
2513 
2514  /* Now that libpfm4 is initialized */
2515  /* Try to setup the perf_event component events */
2516 
2520  if (retval) {
2521  switch(retval) {
2522  case PAPI_ENOMEM:
2523  strncpy(_papi_hwd[cidx]->cmp_info.disabled_reason,
2524  "Error libpfm4 memory allocation",
2526  break;
2527  case PAPI_ENOSUPP:
2528  strncpy(_papi_hwd[cidx]->cmp_info.disabled_reason,
2529  "Error libpfm4 no PMUs found",
2531  break;
2532  case PAPI_ENOCMP:
2533  strncpy(_papi_hwd[cidx]->cmp_info.disabled_reason,
2534  "Error libpfm4 no default PMU found",
2536  break;
2537  case PAPI_ECOUNT:
2538  strncpy(_papi_hwd[cidx]->cmp_info.disabled_reason,
2539  "Error libpfm4 too many default PMUs found",
2541  break;
2542  case PAPI_ENOEVNT:
2543  strncpy(_papi_hwd[cidx]->cmp_info.disabled_reason,
2544  "Error loading preset events",
2546  break;
2547  default:
2548  strncpy(_papi_hwd[cidx]->cmp_info.disabled_reason,
2549  "Unknown libpfm4 related error",
2551 
2552  }
2553  return retval;
2554  }
2555 
2556  /* Detect NMI watchdog which can steal counters */
2557  /* FIXME: on Intel we should also halve the count if SMT enabled */
2559  if (_papi_hwd[cidx]->cmp_info.num_cntrs>0) {
2560  _papi_hwd[cidx]->cmp_info.num_cntrs--;
2561  }
2562  SUBDBG("The Linux nmi_watchdog is using one of the performance "
2563  "counters, reducing the total number available.\n");
2564  }
2565 
2566  /* check for exclude_guest issue */
2568 
2569  return PAPI_OK;
2570 
2571 }
2572 
2573 
2574 
2575 /* Our component vector */
2576 
2578  .cmp_info = {
2579  /* component information (unspecified values initialized to 0) */
2580  .name = "perf_event",
2581  .short_name = "perf",
2582  .version = "5.0",
2583  .description = "Linux perf_event CPU counters",
2584 
2585  .default_domain = PAPI_DOM_USER,
2586  .available_domains = PAPI_DOM_USER | PAPI_DOM_KERNEL | PAPI_DOM_SUPERVISOR,
2587  .default_granularity = PAPI_GRN_THR,
2588  .available_granularities = PAPI_GRN_THR | PAPI_GRN_SYS,
2589 
2590  .hardware_intr = 1,
2591  .kernel_profile = 1,
2592 
2593  /* component specific cmp_info initializations */
2594  .fast_virtual_timer = 0,
2595  .attach = 1,
2596  .attach_must_ptrace = 1,
2597  .cpu = 1,
2598  .inherit = 1,
2599  .cntr_umasks = 1,
2600 
2601  .kernel_multiplex = 1,
2602  .num_mpx_cntrs = PERF_EVENT_MAX_MPX_COUNTERS,
2603 
2604 
2605  },
2606 
2607  /* sizes of framework-opaque component-private structures */
2608  .size = {
2609  .context = sizeof ( pe_context_t ),
2610  .control_state = sizeof ( pe_control_t ),
2611  .reg_value = sizeof ( int ),
2612  .reg_alloc = sizeof ( int ),
2613  },
2614 
2615  /* function pointers in this component */
2616  .init_component = _pe_init_component,
2617  .shutdown_component = _pe_shutdown_component,
2618  .init_thread = _pe_init_thread,
2619  .init_control_state = _pe_init_control_state,
2620  .dispatch_timer = _pe_dispatch_timer,
2621 
2622  /* function pointers from the shared perf_event lib */
2623  .start = _pe_start,
2624  .stop = _pe_stop,
2625  .read = _pe_read,
2626  .shutdown_thread = _pe_shutdown_thread,
2627  .ctl = _pe_ctl,
2628  .update_control_state = _pe_update_control_state,
2629  .set_domain = _pe_set_domain,
2630  .reset = _pe_reset,
2631  .set_overflow = _pe_set_overflow,
2632  .set_profile = _pe_set_profile,
2633  .stop_profiling = _pe_stop_profiling,
2634  .write = _pe_write,
2635 
2636 
2637  /* from counter name mapper */
2638  .ntv_enum_events = _pe_ntv_enum_events,
2639  .ntv_name_to_code = _pe_ntv_name_to_code,
2640  .ntv_code_to_name = _pe_ntv_code_to_name,
2641  .ntv_code_to_descr = _pe_ntv_code_to_descr,
2642  .ntv_code_to_info = _pe_ntv_code_to_info,
2643 };
#define PAPI_OK
Definition: fpapi.h:105
i inherit inherit
ssize_t read(int fd, void *buf, size_t count)
Definition: appio.c:225
static int _pe_init_component(int cidx)
Definition: perf_event.c:2457
long long counts[PERF_EVENT_MAX_MPX_COUNTERS]
char disabled_reason[PAPI_MAX_STR_LEN]
Definition: papi.h:637
int _papi_hwi_get_ntv_idx(unsigned int papi_evt_code)
_papi_int_inherit_t inherit
static int process_smpl_buf(int evt_idx, ThreadInfo_t **thr, int cidx)
Definition: perf_event.c:1905
int errno
int close(int fd)
Definition: appio.c:175
#define PAPI_GRANUL
Definition: fpapi.h:52
#define PAPI_ENOMEM
Definition: fpapi.h:107
static const char * name
Definition: fork_overflow.c:31
#define PAPI_CPU_ATTACH
Definition: papi.h:458
#define PAPI_DOM_KERNEL
Definition: fpapi.h:22
#define PAPI_EINVAL
Definition: fpapi.h:106
#define PERF_EVENT_MAX_MPX_COUNTERS
Definition: perf_event_lib.h:5
EventSetInfo_t * ESI
static int close_pe_events(pe_context_t *ctx, pe_control_t *ctl)
Definition: perf_event.c:916
#define PAPI_GRN_THR
Definition: fpapi.h:67
struct native_event_t * native_events
unsigned int granularity
#define PAPI_DEF_ITIMER_NS
Definition: papi.h:456
#define _papi_getcpu()
Definition: linux-common.h:46
EventSetInfo_t * ESI
int _papi_libpfm4_init(papi_vector_t *my_vector)
static int _pe_write(hwd_context_t *ctx, hwd_control_state_t *ctl, long long *from)
Definition: perf_event.c:1062
#define PAPI_ENOSUPP
Definition: fpapi.h:123
#define PAPI_INSTR_ADDRESS
Definition: papi.h:454
#define PAPI_PROFIL_DATA_EAR
Definition: papi.h:405
static int _pe_set_domain(hwd_control_state_t *ctl, int domain)
Definition: perf_event.c:986
_papi_int_addr_range_t address_range
#define READ_BUFFER_SIZE
Definition: perf_event.c:494
int default_granularity
Definition: papi.h:646
static int _pe_shutdown_thread(hwd_context_t *ctx)
Definition: perf_event.c:1001
static unsigned long long mmap_read_self(void *addr, unsigned long long *en, unsigned long long *ru)
Definition: perf_helpers.h:158
static int _pe_stop(hwd_context_t *ctx, hwd_control_state_t *ctl)
Definition: perf_event.c:1459
#define PAPI_DATA_ADDRESS
Definition: papi.h:453
int(* read)(hwd_context_t *, hwd_control_state_t *, long long **, int)
Definition: papi_vector.h:30
int fd
Definition: iozone.c:1291
#define PAPI_REFRESH_VALUE
Definition: perf_event.c:83
static int bug_format_group(void)
Definition: perf_event.c:175
static int _pe_ntv_code_to_descr(unsigned int EventCode, char *ntv_descr, int len)
Definition: perf_event.c:1856
EventSetInfo_t * ESI
static int set_irange(hwd_context_t *ctx, hwd_control_state_t *current_state, _papi_int_option_t *option)
Definition: perfmon-ia64.c:919
#define PAPI_MULTIPLEX
Definition: fpapi.h:48
static int _pe_read_multiplexed(pe_control_t *pe_ctl)
Definition: perf_event.c:1153
#define PAPI_GRN_SYS
Definition: fpapi.h:71
pe_event_info_t events[PERF_EVENT_MAX_MPX_COUNTERS]
int _pe_libpfm4_ntv_enum_events(unsigned int *PapiEventCode, int modifier, int cidx, struct native_event_table_t *event_table)
int _pe_libpfm4_ntv_name_to_code(const char *name, unsigned int *event_code, int cidx, struct native_event_table_t *event_table)
#define PERF_EVENTS_RUNNING
Definition: perf_event.c:65
static int _pe_ntv_code_to_info(unsigned int EventCode, PAPI_event_info_t *info)
Definition: perf_event.c:1865
#define PAPI_EBUG
Definition: fpapi.h:111
#define PMU_TYPE_OS
static int find_profile_index(EventSetInfo_t *ESI, int evt_idx, int *flags, unsigned int *native_index, int *profile_index)
Definition: perf_event.c:1878
#define PAPI_EPERM
Definition: fpapi.h:120
static pid_t mygettid(void)
Definition: darwin-common.h:11
int retval
Definition: zero_fork.c:53
PAPI_component_info_t cmp_info
Definition: papi_vector.h:20
#define PAPI_MAX_SW_MPX_EVENTS
Definition: sw_multiplex.h:4
static int check_permissions(unsigned long tid, unsigned int cpu_num, unsigned int domain, unsigned int granularity, unsigned int multiplex, unsigned int inherit)
Definition: perf_event.c:433
static int set_default_domain(EventSetInfo_t *zero, int domain)
Definition: aix.c:510
unsigned int attached
#define PAPI_INHERIT
Definition: papi.h:459
static int _pe_update_control_state(hwd_control_state_t *ctl, NativeInfo_t *native, int count, hwd_context_t *ctx)
Definition: perf_event.c:1501
Return codes and api definitions.
uint32_t nr_mmap_pages
FILE * fff[MAX_EVENTS]
int _pe_libpfm4_init(papi_vector_t *component, int cidx, struct native_event_table_t *event_table, int pmu_type)
#define PAPI_DOM_OTHER
Definition: fpapi.h:23
unsigned int domain
char events[MAX_EVENTS][BUFSIZ]
int multiplex(void)
Definition: multiplex.c:35
#define PAPI_ECMP
Definition: fpapi.h:109
_papi_int_attach_t attach
#define PAPI_ESYS
Definition: fpapi.h:108
unsigned int overflow
unsigned long tid
#define PAPI_ENOCMP
Definition: fpapi.h:122
int _pe_libpfm4_ntv_code_to_name(unsigned int EventCode, char *ntv_name, int len, struct native_event_table_t *event_table)
papi_vector_t _perf_event_vector
Definition: perf_event.c:68
static int cidx
#define PAPI_DEF_MPX_NS
Definition: fpapi.h:53
_papi_int_cpu_t cpu
static int _pe_set_profile(EventSetInfo_t *ESI, int EventIndex, int threshold)
Definition: perf_event.c:2212
EventSetOverflowInfo_t overflow
#define PAPI_OVERFLOW_HARDWARE
Definition: papi.h:413
unsigned int fast_real_timer
Definition: papi.h:661
static int _pe_start(hwd_context_t *ctx, hwd_control_state_t *ctl)
Definition: perf_event.c:1413
#define PAPI_DOM_SUPERVISOR
Definition: fpapi.h:24
PAPI_os_info_t _papi_os_info
Definition: aix.c:1210
struct _ThreadInfo * master
#define PAPI_VENDOR_IBM
Definition: papi.h:351
static int _pe_rdpmc_read(hwd_context_t *ctx, hwd_control_state_t *ctl, long long **events, int flags)
Definition: perf_event.c:1096
static int pe_vendor_fixups(papi_vector_t *vector)
Definition: perf_event.c:112
static int set_up_mmap(pe_control_t *ctl, int evt_idx)
Definition: perf_event.c:625
static int pid
long long page_size
Definition: iozone.c:428
static int check_scheduability(pe_context_t *ctx, pe_control_t *ctl, int idx)
Definition: perf_event.c:504
unsigned int fast_counter_read
Definition: papi.h:660
static int exclude_guest_unsupported
Definition: perf_event.c:73
static int _pe_detect_rdpmc(void)
Definition: perf_event.c:2295
hwd_ucontext_t * ucontext
#define PAPI_DOM_USER
Definition: fpapi.h:21
char model_string[PAPI_MAX_STR_LEN]
Definition: papi.h:791
void * thread(void *arg)
Definition: kufrin.c:38
#define PERF_EVENTS_OPENED
Definition: perf_event.c:64
static int native
int cpuid_model
Definition: papi.h:794
#define SUBDBG(format, args...)
Definition: papi_debug.h:63
#define PAPI_PROFIL_INST_EAR
Definition: papi.h:406
#define PAPI_VENDOR_MIPS
Definition: papi.h:356
_papi_int_granularity_t granularity
EventSetInfo_t * ESI
static int configure_fd_for_sampling(pe_control_t *ctl, int evt_idx)
Definition: perf_event.c:579
void PAPIERROR(char *format,...)
unsigned int multiplexed
int _papi_hwi_start_signal(int signal, int need_context, int cidx)
Definition: extras.c:403
static int check_exclude_guest(void)
Definition: perf_event.c:272
int mmtimer_setup(void)
Definition: linux-timer.c:130
unsigned int kernel_multiplex
Definition: papi.h:657
struct native_event_table_t * event_table
static int _pe_reset(hwd_context_t *ctx, hwd_control_state_t *ctl)
Definition: perf_event.c:1035
long long ret
Definition: iozone.c:1346
#define PAPI_VENDOR_ARM
Definition: papi.h:355
char name[PAPI_MAX_STR_LEN]
Definition: papi.h:630
#define PMU_TYPE_CORE
int _papi_hwi_stop_signal(int signal)
Definition: extras.c:443
static void perf_event_dump_attr(struct perf_event_attr *hw_event, pid_t pid, int cpu, int group_fd, unsigned long int flags)
Definition: perf_event.c:319
int _papi_libpfm4_shutdown(papi_vector_t *my_vector)
EventSetInfo_t * ESI
int cnt[ctr_pcp_ntv_code_to_info+1]
Definition: linux-pcp.c:215
_papi_int_multiplex_t multiplex
static long sys_perf_event_open(struct perf_event_attr *hw_event, pid_t pid, int cpu, int group_fd, unsigned long flags)
Definition: perf_helpers.h:21
static int close_event(pe_event_info_t *event)
Definition: perf_event.c:882
NativeInfo_t * NativeInfoArray
EventInfo_t * EventInfoArray
int cpuid_family
Definition: papi.h:793
static int threshold
#define PAPI_VENDOR_CRAY
Definition: papi.h:352
papi_mdi_t _papi_hwi_system_info
Definition: papi_internal.c:56
static int _pe_stop_profiling(ThreadInfo_t *thread, EventSetInfo_t *ESI)
Definition: perf_event.c:2081
PAPI_hw_info_t hw_info
unsigned int overflow_signal
int _pe_libpfm4_ntv_code_to_info(unsigned int EventCode, PAPI_event_info_t *info, struct native_event_table_t *event_table)
#define PAPI_VENDOR_INTEL
Definition: papi.h:349
int pos[PAPI_EVENTS_IN_DERIVED_EVENT]
#define PAPI_PROFIL_RANDOM
Definition: fpapi.h:76
static int _pe_read(hwd_context_t *ctx, hwd_control_state_t *ctl, long long **events, int flags)
Definition: perf_event.c:1263
static void _pe_dispatch_timer(int n, hwd_siginfo_t *info, void *uc)
Definition: perf_event.c:1930
int _pe_libpfm4_ntv_code_to_descr(unsigned int EventCode, char *ntv_descr, int len, struct native_event_table_t *event_table)
#define LINUX_VERSION(a, b, c)
Definition: linux-common.h:4
static void mmap_read(int cidx, ThreadInfo_t **thr, pe_event_info_t *pe, int profile_index)
Definition: perf_helpers.h:223
int vendor
Definition: papi.h:788
unsigned int cpu_num
static int map_perf_event_errors_to_papi(int perf_event_error)
Definition: perf_event.c:383
int _pe_libpfm4_shutdown(papi_vector_t *my_vector, struct native_event_table_t *event_table)
static int _pe_shutdown_component(void)
Definition: perf_event.c:2282
#define PAPI_OVERFLOW_FORCE_SW
Definition: papi.h:412
static int fcntl_setown_fd(int fd)
Definition: perf_event.c:216
static int set_drange(hwd_context_t *ctx, hwd_control_state_t *current_state, _papi_int_option_t *option)
Definition: perfmon-ia64.c:767
#define PAPI_DEF_ITIMER
Definition: papi.h:455
perf_event_attr_t attr
struct perf_event_attr attr
#define PAPI_ENOEVNT
Definition: fpapi.h:112
struct native_event_table_t perf_native_event_table
Definition: perf_event.c:71
#define PAPI_DETACH
Definition: fpapi.h:66
#define MAP_FAILED
Definition: iozone.c:336
static int our_cidx
Definition: perf_event.c:72
inline_static ThreadInfo_t * _papi_hwi_lookup_thread(int custom_tid)
Definition: threads.h:92
#define PAPI_NATIVE_AND_MASK
#define PAPI_PROFIL_FORCE_SW
Definition: papi.h:404
#define PAPI_ATTACH
Definition: fpapi.h:62
static int _pe_ntv_name_to_code(const char *name, unsigned int *event_code)
Definition: perf_event.c:1840
unsigned int inherit
struct papi_vectors * _papi_hwd[]
#define PAPI_DOMAIN
Definition: fpapi.h:50
int _linux_detect_nmi_watchdog()
Definition: linux-common.c:719
#define F_OWNER_TID
Definition: linux-common.h:28
#define PAPI_GRN_SYS_CPU
Definition: fpapi.h:72
_papi_int_domain_t domain
char * caddr_t
static int _pe_read_nogroup(pe_control_t *pe_ctl)
Definition: perf_event.c:1225
static int _pe_ctl(hwd_context_t *ctx, int code, _papi_int_option_t *option)
Definition: perf_event.c:1631
hwd_siginfo_t * si
#define PAPI_ECNFLCT
Definition: fpapi.h:113
#define PAPI_OVERFLOWING
Definition: fpapi.h:33
static unsigned int get_read_format(unsigned int multiplex, unsigned int inherit, int format_group)
Definition: perf_event.c:241
#define F_SETOWN_EX
Definition: linux-common.h:25
int _papi_hwi_dispatch_overflow_signal(void *papiContext, caddr_t address, int *isHardware, long long overflow_bit, int genOverflowBit, ThreadInfo_t **t, int cidx)
Definition: extras.c:216
static void mmap_write_tail(pe_event_info_t *pe, uint64_t tail)
Definition: perf_helpers.h:196
#define PAPI_ECOUNT
Definition: fpapi.h:128
EventSetInfo_t * ESI
EventSetProfileInfo_t profile
hwd_control_state_t * ctl_state
#define PAPI_GRN_PROCG
Definition: fpapi.h:70
static int _pe_handle_paranoid(papi_vector_t *component)
Definition: perf_event.c:2370
#define PAPI_GRN_PROC
Definition: fpapi.h:69
static int _pe_init_thread(hwd_context_t *hwd_ctx)
Definition: perf_event.c:1012
#define GET_OVERFLOW_ADDRESS(ctx)
Definition: aix-context.h:12
static int _pe_init_control_state(hwd_control_state_t *ctl)
Definition: perf_event.c:1804
static int _pe_ntv_enum_events(unsigned int *PapiEventCode, int modifier)
Definition: perf_event.c:1833
#define PAPI_PROFILING
Definition: fpapi.h:34
static int open_pe_events(pe_context_t *ctx, pe_control_t *ctl)
Definition: perf_event.c:681
static long count
EventSetInfo_t * ESI
static int _pe_ntv_code_to_name(unsigned int EventCode, char *ntv_name, int len)
Definition: perf_event.c:1847
#define PAPI_VENDOR_AMD
Definition: papi.h:350
int i
Definition: fileop.c:140
static int _pe_set_overflow(EventSetInfo_t *ESI, int EventIndex, int threshold)
Definition: perf_event.c:2113
static uint64_t mmap_read_head(pe_event_info_t *pe)
Definition: perf_helpers.h:179
#define PAPI_MAX_STR_LEN
Definition: fpapi.h:43