PAPI  5.0.1.0
linux-cuda.h
Go to the documentation of this file.
00001 /****************************/
00002 /* THIS IS OPEN SOURCE CODE */
00003 /****************************/
00004 
00018 #ifndef _PAPI_CUDA_H
00019 #define _PAPI_CUDA_H
00020 
00021 /* Headers required by CuPTI */
00022 #include "cupti_events.h"
00023 #include <cuda_runtime_api.h>
00024 
00025 /* Specific errors from CUDA lib */
00026 #define CHECK_CU_ERROR(err, cufunc) \
00027 if (err != CUDA_SUCCESS) \
00028 { \
00029 printf ("Error %d for CUDA Driver API function '%s'. cuptiQuery failed\n", err, cufunc); \
00030 return -1;  \
00031 }
00032 
00033 /* Specific errors from CuPTI lib */
00034 #define CHECK_CUPTI_ERROR(err, cuptifunc) \
00035 if (err != CUPTI_SUCCESS) \
00036 { \
00037 printf ("Error %d for CUPTI API function '%s'. cuptiQuery failed\n", err, cuptifunc); \
00038 return -1;  \
00039 }
00040 
00041 
00042 
00043 /*************************  DEFINES SECTION  ***********************************
00044  *******************************************************************************/
00045 
00046 /* this number assumes that there will never be more events than indicated */
00047 #define CUDA_MAX_COUNTERS 512
00048 
00049 typedef struct EventData
00050 {
00051     CUpti_EventID eventId;             // CuPTI event id 
00052     char name[PAPI_MIN_STR_LEN];       // event name
00053     char desc[PAPI_2MAX_STR_LEN];      // short desc of the event
00054 } EventData_t;
00055 
00056 
00057 typedef struct DomainData
00058 {
00059     CUpti_EventDomainID domainId;      // CuPTI domain id
00060     char name[PAPI_MIN_STR_LEN];       // domain name
00061     uint32_t eventCount;               // number of events per domain
00062     EventData_t *event;
00063 } DomainData_t;
00064 
00065 
00066 typedef struct DeviceData
00067 {
00068     CUdevice dev;                      // CUDA device
00069     char name[PAPI_MIN_STR_LEN];       // device name
00070     uint32_t domainCount;              // number of domains per device
00071     DomainData_t *domain;
00072 } DeviceData_t;
00073 
00074 
00075 typedef struct AddedEvents
00076 {
00077     int count;                         // number of events that have been added to the CuPTI eventGroup
00078     int *list;                         // list of the added events
00079 } AddedEvents_t;
00080 
00081 
00083 typedef struct CUDA_register
00084 {
00085     /* This is used by the framework.It likes it to be !=0 to do somehting */
00086     unsigned int selector;
00087     /* This is the information needed to locate a CUDA event */
00088     CUpti_EventID eventId;
00089 } CUDA_register_t;
00090 
00091 
00093 typedef struct CUDA_native_event_entry
00094 {
00095     CUDA_register_t resources;
00096     char name[PAPI_MAX_STR_LEN];
00097     char description[PAPI_2MAX_STR_LEN];
00098 } CUDA_native_event_entry_t;
00099 
00100 
00101 typedef struct CUDA_reg_alloc
00102 {
00103     CUDA_register_t ra_bits;
00104 } CUDA_reg_alloc_t;
00105 
00106 
00107 typedef struct CUDA_control_state
00108 {
00109     CUpti_EventGroup eventGroup;
00110     AddedEvents_t addedEvents;
00111     long long counts[CUDA_MAX_COUNTERS];
00112     int ncounter;
00113     int old_count;
00114 } CUDA_control_state_t;
00115 
00116 /* Holds per-thread information */
00117 typedef struct CUDA_context
00118 {
00119     CUDA_control_state_t state;
00120 } CUDA_context_t;
00121 
00122  
00123 /*************************  GLOBALS SECTION  ***********************************
00124  *******************************************************************************/
00125 
00126 static int enumEventDomains( CUdevice dev, int deviceId );
00127 #ifdef CUDA_4_0
00128 static int enumEvents( CUdevice dev, int domainId, int eventCount );
00129 #else
00130 static int enumEvents( int domainId, int eventCount );
00131 #endif
00132 
00133 /* This table contains the CUDA native events */
00134 static CUDA_native_event_entry_t *cuda_native_table;
00135 /* number of events in the table */
00136 static int NUM_EVENTS = 0;
00137 static int deviceCount = 0;
00138 static int totalDomainCount = 0;
00139 static int totalEventCount = 0;
00140 static int currentDeviceID;            /* determine the actual device the user code is running on */
00141 static int CUDA_FREED = 0;
00142 
00143 /* 
00144  * Why are device and cuCtx globals?
00145  *
00146  * Starting in CUDA 4.0, multiple CPU threads can access the same CUDA context.
00147  * This is a much easier programming model then pre-4.0 as threads - using the 
00148  * same context - can share memory, data, etc. 
00149  * It's possible to create a different context for each thread, but then we are
00150  * likely running into a limitation that only one context can be profiled at a time.
00151  * ==> and we don't want this. That's why CUDA context creation is done in 
00152  * CUDA_init_component() (called only by main thread) rather than CUDA_init_thread() 
00153  * or CUDA_init_control_state() (both called by each thread).
00154  */
00155 
00156 static DeviceData_t *device;
00157 static CUcontext cuCtx;
00158 
00159 #endif /* _PAPI_CUDA_H */
 All Data Structures Files Functions Variables Typedefs Enumerations Enumerator Defines