PAPI  5.7.0.0
cuda_ld_preload_example.c
Go to the documentation of this file.
1 /*
2  Example of using LD_PRELOAD with the CUDA component.
3  Asim YarKhan
4 
5  This is designed to work with the simpleMultiGPU_no_counters binary
6  in the PAPI CUDA component tests directory. First trace the library
7  calls in simpleMultiGPU_no_counters binary using ltrace. Note in
8  the ltrace output that the CUDA C APIs are different from the CUDA
9  calls visible to nvcc. Then figure out appropriate place to attach
10  the PAPI calls. The initialization is attached to the first entry
11  to cudaSetDevice. Each cudaSetDevice is also used to setup the PAPI
12  events for that device. It was harder to figure out where to attach
13  the PAPI_start. After running some tests, I attached it to the 18th
14  invocation of gettimeofday (kind of arbitrary! Sorry!). The
15  PAPI_stop was attached to the first invocation of cudaFreeHost.
16 
17 */
18 
19 #define _GNU_SOURCE
20 
21 #include <stdio.h>
22 #include <dlfcn.h>
23 
24 #include "papi.h"
25 
26 #define MAXDEVICES 5
28 int devseen[MAXDEVICES] = {0};
29 
30 static void *dl1;
33 int (*PAPI_add_named_event_ptr)(int EventSet, char *EventName);
34 int (*PAPI_start_ptr)(int EventSet);
35 int (*PAPI_stop_ptr)(int EventSet, long long * values);
38 int cudaSetDevice(int devnum, int n1, int n2, int n3, void *ptr1)
39 {
40  static int onetime = 0;
41  int retval, retval_cudaSetDevice;
42  //printf("cudaSetDevice wrapper %d\n", devnum);
43  if ( onetime==0 ) {
44  onetime=1;
45  // Load the papi library dynamically and read the relevant functions
46  dl1 = dlopen( "libpapi.so", RTLD_NOW | RTLD_GLOBAL );
47  if ( dl1==NULL ) printf("Intercept cudaSetDevice: Cannot load libpapi.so\n");
48  PAPI_library_init_ptr = dlsym( dl1, "PAPI_library_init" );
49  PAPI_create_eventset_ptr = dlsym( dl1, "PAPI_create_eventset" );
50  PAPI_add_named_event_ptr = dlsym( dl1, "PAPI_add_named_event" );
51  PAPI_start_ptr = dlsym( dl1, "PAPI_start" );
52  PAPI_stop_ptr = dlsym( dl1, "PAPI_stop" );
53  // Start using PAPI
54  printf("Intercept cudaSetDevice: Initializing PAPI on device %d\n", devnum);
56  if( retval != PAPI_VER_CURRENT ) fprintf( stdout, "PAPI_library_init failed\n" );
59  if( retval != PAPI_OK ) fprintf( stdout, "PAPI_create_eventset failed\n" );
60  }
61  int (*original_function)(int devnum, int n1, int n2, int n3, void *ptr1);
62  original_function = dlsym(RTLD_NEXT, "cudaSetDevice");
63  retval_cudaSetDevice = (*original_function)( devnum, n1, n2, n3, ptr1 );
64  if ( devseen[devnum]==0 ) {
65  devseen[devnum]=1;
66  char tmpEventName[120];
67  printf("Intercept cudaSetDevice: Attaching events for device on device %d\n", devnum);
68  snprintf( tmpEventName, 110, "cuda:::device:%d:%s", devnum, "inst_executed" );
69  retval = (PAPI_add_named_event_ptr)( EventSet, tmpEventName );
70  if (retval!=PAPI_OK) printf( "Could not add event %s\n", tmpEventName );
71  }
72  return retval_cudaSetDevice;
73 }
74 
75 
76 int gettimeofday(void *ptr1, void *ptr2)
77 {
78  static int onetime = 0;
79  onetime++;
80  // printf("gettimeofday onetime %d\n", onetime);
81  // Use above print statement to determine that the N-th gettime of day works
82  if ( onetime==17 ) {
83  printf("Intercept gettimeofday: Attaching PAPI_start to the %d th call to gettimeofday (this may need to be adjusted)\n", onetime);
84  int retval = (PAPI_start_ptr)( EventSet );
85  printf("Starting PAPI\n");
86  if( retval!=PAPI_OK ) fprintf( stdout, "PAPI_start failed\n" );
87  }
88  int (*original_function)(void *ptr1, void *ptr2);
89  original_function = dlsym(RTLD_NEXT, "gettimeofday");
90  return (*original_function)(ptr1, ptr2);
91 }
92 
93 int cudaFreeHost(void *ptr1, void *ptr2, int n1, int n2, void *ptr3)
94 {
95  static int onetime = 0;
96  long long values[10];
97  int retval, devnum;
98  onetime++;
99  if ( onetime==1 ) {
100  printf("Intercept cudaFreeHost: Used to get PAPI results\n" );
102  if( retval != PAPI_OK ) fprintf( stderr, "PAPI_stop failed\n" );
103  for( devnum = 0; devnum < MAXDEVICES && devseen[devnum]==1 ; devnum++ )
104  printf( "PAPI counterValue: cuda::device:%d:%s: %12lld \n", devnum, "inst_executed", values[devnum] );
105  }
106  int (*original_function)(void *ptr1, void *ptr2, int n1, int n2, void *ptr3);
107  original_function = dlsym(RTLD_NEXT, "cudaFreeHost");
108  return (*original_function)(ptr1, ptr2, n1, n2, ptr3);
109 }
110 
int devseen[MAXDEVICES]
#define PAPI_OK
Definition: fpapi.h:105
int(* PAPI_add_named_event_ptr)(int EventSet, char *EventName)
long long onetime
Definition: iozone.c:1290
int(* PAPI_stop_ptr)(int EventSet, long long *values)
int cudaSetDevice(int devnum, int n1, int n2, int n3, void *ptr1)
#define PAPI_VER_CURRENT
Definition: fpapi.h:14
int EventSet
int retval
Definition: zero_fork.c:53
Return codes and api definitions.
#define PAPI_VERSION
Definition: fpapi.h:15
int cudaFreeHost(void *ptr1, void *ptr2, int n1, int n2, void *ptr3)
#define PAPI_VERSION_MAJOR(x)
Definition: papi.h:217
#define PAPI_VERSION_REVISION(x)
Definition: papi.h:219
#define PAPI_NULL
Definition: fpapi.h:13
#define PAPI_VERSION_MINOR(x)
Definition: papi.h:218
char version[]
Definition: fileop.c:134
int(* PAPI_create_eventset_ptr)(int *EventSet)
int gettimeofday(void *ptr1, void *ptr2)
int(* PAPI_library_init_ptr)(int version)
int(* PAPI_start_ptr)(int EventSet)
#define MAXDEVICES
static long long values[NUM_EVENTS]
Definition: init_fini.c:10
static void * dl1