|
PAPI
5.3.0.0
|
This file has the source code for a component that enables PAPI-C to access hardware monitoring counters for GPU devices through the CUPTI library. More...

Go to the source code of this file.
Definition in file linux-cuda.c.
| #define CUDAAPI __attribute__((weak)) |
| #define CUDARTAPI __attribute__((weak)) |
| #define CUPTIAPI __attribute__((weak)) |
| static int createNativeEvents | ( | void | ) | [static] |
Definition at line 380 of file linux-cuda.c.
{
int deviceId, id = 0;
uint32_t domainId, eventId;
int cuptiDomainId;
int i;
int devNameLen;
/* create events for every GPU device and every domain per device */
for ( deviceId = 0; deviceId < deviceCount; deviceId++ ) {
/* for the event names, replace blanks in the device name with underscores */
devNameLen = strlen( device[deviceId].name );
for ( i = 0; i < devNameLen; i++ )
if ( device[deviceId].name[i] == ' ' )
device[deviceId].name[i] = '_';
for ( domainId = 0; domainId < device[deviceId].domainCount;
domainId++ ) {
cuptiDomainId = device[deviceId].domain[domainId].domainId;
for ( eventId = 0;
eventId < device[deviceId].domain[domainId].eventCount;
eventId++ ) {
/* Save native event data */
sprintf( cuda_native_table[id].name,
"%s:%s:%s",
device[deviceId].name,
device[deviceId].domain[domainId].name,
device[deviceId].domain[domainId].event[eventId].
name );
strncpy( cuda_native_table[id].description,
device[deviceId].domain[domainId].event[eventId].desc,
PAPI_2MAX_STR_LEN );
/* The selector has to be !=0 . Starts with 1 */
cuda_native_table[id].resources.selector = id + 1;
/* store event ID */
cuda_native_table[id].resources.eventId =
device[deviceId].domain[domainId].event[eventId].eventId;
/* increment the table index counter */
id++;
}
}
}
/* Return the number of events created */
return id;
}

| int CUDA_cleanup_eventset | ( | hwd_control_state_t * | ctrl | ) |
Definition at line 1050 of file linux-cuda.c.
{
( void ) ctrl;
// TODO: after cleanup_eventset() which destroys the eventset, update_control_state()
// is called, which operates on the already destroyed eventset. Bad!
#if 0
CUDA_control_state_t * CUDA_ctrl = ( CUDA_control_state_t * ) ctrl;
CUptiResult cuptiErr = CUPTI_SUCCESS;
/* Disable the CUDA eventGroup;
it also frees the perfmon hardware on the GPU */
cuptiErr = (*cuptiEventGroupDisablePtr)( CUDA_ctrl->eventGroup );
CHECK_CUPTI_ERROR( cuptiErr, "cuptiEventGroupDisable" );
/* Call the CuPTI cleaning function before leaving */
cuptiErr = (*cuptiEventGroupDestroyPtr)( CUDA_ctrl->eventGroup );
CHECK_CUPTI_ERROR( cuptiErr, "cuptiEventGroupDestroy" );
#endif
return ( PAPI_OK );
}
| int CUDA_ctl | ( | hwd_context_t * | ctx, |
| int | code, | ||
| _papi_int_option_t * | option | ||
| ) |
Definition at line 930 of file linux-cuda.c.
{
( void ) ctx;
( void ) code;
( void ) option;
return ( PAPI_OK );
}
| int CUDA_init_component | ( | int | cidx | ) |
Definition at line 513 of file linux-cuda.c.
{
SUBDBG ("Entry: cidx: %d\n", cidx);
CUresult cuErr = CUDA_SUCCESS;
/* link in all the cuda libraries and resolve the symbols we need to use */
if (linkCudaLibraries() != PAPI_OK) {
SUBDBG ("Dynamic link of CUDA libraries failed, component will be disabled.\n");
SUBDBG ("See disable reason in papi_component_avail output for more details.\n");
return (PAPI_ENOSUPP);
}
/* Create dynamic event table */
NUM_EVENTS = detectDevice( );
if (NUM_EVENTS < 0) {
strncpy(_cuda_vector.cmp_info.disabled_reason, "Call to detectDevice failed.",PAPI_MAX_STR_LEN);
return (PAPI_ENOSUPP);
}
/* TODO: works only for one device right now;
need to find out if user can use 2 or more devices at same time */
/* want create a CUDA context for either the default device or
the device specified with cudaSetDevice() in user code */
if ( CUDA_SUCCESS != (*cudaGetDevicePtr)( ¤tDeviceID ) ) {
strncpy(_cuda_vector.cmp_info.disabled_reason, "No NVIDIA GPU's found.",PAPI_MAX_STR_LEN);
return ( PAPI_ENOSUPP );
}
if ( getenv( "PAPI_VERBOSE" ) ) {
printf( "DEVICE USED: %s (%d)\n", device[currentDeviceID].name,
currentDeviceID );
}
/* get the CUDA context from the calling CPU thread */
cuErr = (*cuCtxGetCurrentPtr)( &cuCtx );
/* if no CUDA context is bound to the calling CPU thread yet, create one */
if ( cuErr != CUDA_SUCCESS || cuCtx == NULL ) {
cuErr = (*cuCtxCreatePtr)( &cuCtx, 0, device[currentDeviceID].dev );
CHECK_CU_ERROR( cuErr, "cuCtxCreate" );
}
/* cuCtxGetCurrent() can return a non-null context that is not valid
because the context has not yet been initialized.
Here is a workaround:
cudaFree(NULL) forces the context to be initialized
if cudaFree(NULL) returns success then we are able to use the context in subsequent calls
if cudaFree(NULL) returns an error (or subsequent cupti* calls) then the context is not usable,
and will never be useable */
if ( CUDA_SUCCESS != (*cudaFreePtr)( NULL ) ) {
strncpy(_cuda_vector.cmp_info.disabled_reason, "Problem initializing CUDA context.",PAPI_MAX_STR_LEN);
return ( PAPI_ENOSUPP );
}
/* Create dynamic event table */
cuda_native_table = ( CUDA_native_event_entry_t * )
malloc( sizeof ( CUDA_native_event_entry_t ) * NUM_EVENTS );
if ( cuda_native_table == NULL ) {
perror( "malloc(): Failed to allocate memory to events table" );
strncpy(_cuda_vector.cmp_info.disabled_reason, "Failed to allocate memory to events table.",PAPI_MAX_STR_LEN);
return ( PAPI_ENOSUPP );
}
if ( NUM_EVENTS != createNativeEvents( ) ) {
strncpy(_cuda_vector.cmp_info.disabled_reason, "Error creating CUDA event list.",PAPI_MAX_STR_LEN);
return ( PAPI_ENOSUPP );
}
/* Export the component id */
_cuda_vector.cmp_info.CmpIdx = cidx;
/* Number of events */
_cuda_vector.cmp_info.num_native_events = NUM_EVENTS;
return ( PAPI_OK );
}

| int CUDA_init_control_state | ( | hwd_control_state_t * | ctrl | ) |
Definition at line 786 of file linux-cuda.c.
{
CUDA_control_state_t * CUDA_ctrl = ( CUDA_control_state_t * ) ctrl;
CUptiResult cuptiErr = CUPTI_SUCCESS;
int i;
/* allocate memory for the list of events that are added to the CuPTI eventGroup */
CUDA_ctrl->addedEvents.list = malloc( sizeof ( int ) * NUM_EVENTS );
if ( CUDA_ctrl->addedEvents.list == NULL ) {
perror
( "malloc(): Failed to allocate memory to table of events that are added to CuPTI eventGroup" );
return ( PAPI_ENOSUPP );
}
/* initialize the event list */
for ( i = 0; i < NUM_EVENTS; i++ )
CUDA_ctrl->addedEvents.list[i] = 0;
cuptiErr = (*cuptiEventGroupCreatePtr)( cuCtx, &CUDA_ctrl->eventGroup, 0 );
CHECK_CUPTI_ERROR( cuptiErr, "cuptiEventGroupCreate" );
return PAPI_OK;
}
| int CUDA_init_thread | ( | hwd_context_t * | ctx | ) |
Definition at line 489 of file linux-cuda.c.
{
( void ) ctx;
return PAPI_OK;
}
| int CUDA_ntv_code_to_bits | ( | unsigned int | EventCode, |
| hwd_register_t * | bits | ||
| ) |
Definition at line 1136 of file linux-cuda.c.
{
int index = EventCode;
memcpy( ( CUDA_register_t * ) bits,
&( cuda_native_table[index].resources ),
sizeof ( CUDA_register_t ) );
return ( PAPI_OK );
}
| int CUDA_ntv_code_to_descr | ( | unsigned int | EventCode, |
| char * | name, | ||
| int | len | ||
| ) |
Definition at line 1123 of file linux-cuda.c.
{
int index = EventCode;
strncpy( name, cuda_native_table[index].description, len );
return ( PAPI_OK );
}
| int CUDA_ntv_code_to_name | ( | unsigned int | EventCode, |
| char * | name, | ||
| int | len | ||
| ) |
Definition at line 1110 of file linux-cuda.c.
{
int index = EventCode;
strncpy( name, cuda_native_table[index].name, len );
return ( PAPI_OK );
}
| int CUDA_ntv_enum_events | ( | unsigned int * | EventCode, |
| int | modifier | ||
| ) |
Definition at line 1077 of file linux-cuda.c.
{
switch ( modifier ) {
case PAPI_ENUM_FIRST:
*EventCode = 0;
return ( PAPI_OK );
break;
case PAPI_ENUM_EVENTS:
{
int index = *EventCode;
if ( index < NUM_EVENTS - 1 ) {
*EventCode = *EventCode + 1;
return ( PAPI_OK );
} else
return ( PAPI_ENOEVNT );
break;
}
default:
return ( PAPI_EINVAL );
}
return ( PAPI_EINVAL );
}
| int CUDA_read | ( | hwd_context_t * | ctx, |
| hwd_control_state_t * | ctrl, | ||
| long_long ** | events, | ||
| int | flags | ||
| ) |
Definition at line 856 of file linux-cuda.c.
{
( void ) ctx;
( void ) flags;
CUDA_control_state_t * CUDA_ctrl = ( CUDA_control_state_t * ) ctrl;
if ( 0 != getEventValue( CUDA_ctrl->counts, CUDA_ctrl->eventGroup, CUDA_ctrl->addedEvents ) )
return ( PAPI_ENOSUPP );
*events = CUDA_ctrl->counts;
return ( PAPI_OK );
}

| int CUDA_reset | ( | hwd_context_t * | ctx, |
| hwd_control_state_t * | ctrl | ||
| ) |
Definition at line 1033 of file linux-cuda.c.
{
( void ) ctx;
CUDA_control_state_t * CUDA_ctrl = ( CUDA_control_state_t * ) ctrl;
CUptiResult cuptiErr = CUPTI_SUCCESS;
/* Resets all events in the CuPTI eventGroup to zero */
cuptiErr = (*cuptiEventGroupResetAllEventsPtr)( CUDA_ctrl->eventGroup );
CHECK_CUPTI_ERROR( cuptiErr, "cuptiEventGroupResetAllEvents" );
return ( PAPI_OK );
}
| int CUDA_set_domain | ( | hwd_control_state_t * | cntrl, |
| int | domain | ||
| ) |
Definition at line 1008 of file linux-cuda.c.
{
int found = 0;
( void ) cntrl;
if ( PAPI_DOM_USER & domain )
found = 1;
if ( PAPI_DOM_KERNEL & domain )
found = 1;
if ( PAPI_DOM_OTHER & domain )
found = 1;
if ( !found )
return ( PAPI_EINVAL );
return ( PAPI_OK );
}
| int CUDA_shutdown_component | ( | void | ) |
Definition at line 887 of file linux-cuda.c.
{
CUresult cuErr = CUDA_SUCCESS;
/* if running a threaded application, we need to make sure that
a thread doesn't free the same memory location(s) more than once */
if ( CUDA_FREED == 0 ) {
uint32_t j;
int i;
CUDA_FREED = 1;
/* deallocate all the memory */
for ( i = 0; i < deviceCount; i++ ) {
for ( j = 0; j < device[i].domainCount; j++ )
free( device[i].domain[j].event );
free( device[i].domain );
}
free( device );
free( cuda_native_table );
/* destroy floating CUDA context */
cuErr = (*cuCtxDestroyPtr)( cuCtx );
if ( cuErr != CUDA_SUCCESS )
return ( PAPI_ENOSUPP ); // Not supported
}
// close the dynamic libraries needed by this component (opened in the init substrate call)
dlclose(dl1);
dlclose(dl2);
dlclose(dl3);
return ( PAPI_OK );
}
| int CUDA_shutdown_thread | ( | hwd_context_t * | ctx | ) |
Definition at line 876 of file linux-cuda.c.
{
CUDA_context_t *CUDA_ctx = (CUDA_context_t*)ctx;
free( CUDA_ctx->state.addedEvents.list );
return (PAPI_OK);
}
| int CUDA_start | ( | hwd_context_t * | ctx, |
| hwd_control_state_t * | ctrl | ||
| ) |
Definition at line 817 of file linux-cuda.c.
{
( void ) ctx;
int i;
CUDA_control_state_t * CUDA_ctrl = ( CUDA_control_state_t * ) ctrl;
CUptiResult cuptiErr = CUPTI_SUCCESS;
// reset all event values to 0
for ( i = 0; i < NUM_EVENTS; i++ )
CUDA_ctrl->counts[i] = 0;
cuptiErr = (*cuptiEventGroupEnablePtr)( CUDA_ctrl->eventGroup );
CHECK_CUPTI_ERROR( cuptiErr, "cuptiEventGroupEnable" );
/* Resets all events in the CuPTI eventGroup to zero */
cuptiErr = (*cuptiEventGroupResetAllEventsPtr)( CUDA_ctrl->eventGroup );
CHECK_CUPTI_ERROR( cuptiErr, "cuptiEventGroupResetAllEvents" );
return ( PAPI_OK );
}
| int CUDA_stop | ( | hwd_context_t * | ctx, |
| hwd_control_state_t * | ctrl | ||
| ) |
Definition at line 843 of file linux-cuda.c.
{
( void ) ctx;
( void ) ctrl;
return ( PAPI_OK );
}
| int CUDA_update_control_state | ( | hwd_control_state_t * | ptr, |
| NativeInfo_t * | native, | ||
| int | count, | ||
| hwd_context_t * | ctx | ||
| ) |
Definition at line 946 of file linux-cuda.c.
{
( void ) ctx;
CUDA_control_state_t * CUDA_ptr = ( CUDA_control_state_t * ) ptr;
int index, i;
CUptiResult cuptiErr = CUPTI_SUCCESS;
/* Disable the CUDA eventGroup;
it also frees the perfmon hardware on the GPU */
cuptiErr = (*cuptiEventGroupDisablePtr)( CUDA_ptr->eventGroup );
CHECK_CUPTI_ERROR( cuptiErr, "cuptiEventGroupDisable" );
cuptiErr = (*cuptiEventGroupRemoveAllEventsPtr)( CUDA_ptr->eventGroup );
CHECK_CUPTI_ERROR( cuptiErr, "cuptiEventGroupRemoveAllEvents" );
// otherwise, add the events to the eventset
for ( i = 0; i < count; i++ ) {
index = native[i].ni_event;
native[i].ni_position = index;
/* store events, that have been added to the CuPTI eveentGroup
in a seperate place (addedEvents).
Needed, so that we can read the values for the added events only */
CUDA_ptr->addedEvents.count = count;
CUDA_ptr->addedEvents.list[i] = index;
/* if this device name is different from the actual device the code is running on, then exit */
if ( 0 != strncmp( device[currentDeviceID].name,
cuda_native_table[index].name,
strlen( device[currentDeviceID].name ) ) ) {
fprintf( stderr, "Device %s is used -- BUT event %s is collected. \n ---> ERROR: Specify events for the device that is used!\n\n",
device[currentDeviceID].name, cuda_native_table[index].name );
return ( PAPI_ENOSUPP ); // Not supported
}
/* Add events to the CuPTI eventGroup */
cuptiErr =
(*cuptiEventGroupAddEventPtr)( CUDA_ptr->eventGroup,
cuda_native_table[index].resources.
eventId );
CHECK_CUPTI_ERROR( cuptiErr, "cuptiEventGroupAddEvent" );
}
return ( PAPI_OK );
}
| static int enumEventDomains | ( | CUdevice | dev, |
| int | deviceId | ||
| ) | [static] |
Definition at line 185 of file linux-cuda.c.
{
CUptiResult err = CUPTI_SUCCESS;
CUpti_EventDomainID *domainId = NULL;
uint32_t id = 0;
size_t size = 0;
device[deviceId].domainCount = 0;
/* get number of domains for device dev */
err = (*cuptiDeviceGetNumEventDomainsPtr)( dev, &device[deviceId].domainCount );
CHECK_CUPTI_ERROR( err, "cuptiDeviceGetNumEventDomains" );
if ( device[deviceId].domainCount == 0 ) {
printf( "No domain is exposed by dev = %d\n", dev );
return -1;
}
/* CuPTI domain struct */
size = sizeof ( CUpti_EventDomainID ) * device[deviceId].domainCount;
domainId = ( CUpti_EventDomainID * ) malloc( size );
if ( domainId == NULL ) {
perror( "malloc(): Failed to allocate memory to CuPTI domain ID" );
return -1;
}
memset( domainId, 0, size );
/* PAPI domain struct */
device[deviceId].domain =
( DomainData_t * ) malloc( sizeof ( DomainData_t ) *
device[deviceId].domainCount );
if ( device[deviceId].domain == NULL ) {
perror( "malloc(): Failed to allocate memory to PAPI domain struct" );
free(domainId);
return -1;
}
/* Enumerates the event domains for a device dev */
err = (*cuptiDeviceEnumEventDomainsPtr)( dev, &size, domainId );
CHECK_CUPTI_ERROR( err, "cuptiDeviceEnumEventDomains" );
/* enum domains */
for ( id = 0; id < device[deviceId].domainCount; id++ ) {
device[deviceId].domain[id].domainId = domainId[id];
/* query domain name */
size = PAPI_MIN_STR_LEN;
#ifdef CUDA_4_0
err = cuptiEventDomainGetAttribute( dev,
device[deviceId].domain[id].
domainId,
CUPTI_EVENT_DOMAIN_ATTR_NAME, &size,
( void * ) device[deviceId].
domain[id].name );
CHECK_CUPTI_ERROR( err, "cuptiEventDomainGetAttribute" );
/* query num of events avaialble in the domain */
size = sizeof ( device[deviceId].domain[id].eventCount );
err = cuptiEventDomainGetAttribute( dev,
device[deviceId].domain[id].
domainId,
CUPTI_EVENT_DOMAIN_MAX_EVENTS,
&size,
( void * ) &device[deviceId].
domain[id].eventCount );
CHECK_CUPTI_ERROR( err, "cuptiEventDomainGetAttribute" );
/* enumerate the events for the domain[id] on the device dev */
if ( 0 != enumEvents( dev, deviceId, id ) )
return -1;
#else
err = (*cuptiDeviceGetEventDomainAttributePtr)( dev,
device[deviceId].domain[id].domainId,
CUPTI_EVENT_DOMAIN_ATTR_NAME, &size,
( void * ) device[deviceId].domain[id].name );
CHECK_CUPTI_ERROR( err, "cuptiDeviceGetEventDomainAttribute" );
/* query num of events avaialble in the domain */
err = (*cuptiEventDomainGetNumEventsPtr)( device[deviceId].domain[id].domainId,
&device[deviceId].domain[id].eventCount );
CHECK_CUPTI_ERROR( err, "cuptiEventDomainGetNumEvents" );
/* enumerate the events for the domain[id] on the device deviceId */
if ( 0 != enumEvents( deviceId, id ) )
return -1;
#endif
}
totalDomainCount += device[deviceId].domainCount;
free( domainId );
return 0;
}

| static int enumEvents | ( | int | deviceId, |
| int | domainId | ||
| ) | [static] |
Definition at line 287 of file linux-cuda.c.
{
CUptiResult err = CUPTI_SUCCESS;
CUpti_EventID *eventId = NULL;
size_t size = 0;
uint32_t id = 0;
/* CuPTI event struct */
size =
sizeof ( CUpti_EventID ) * device[deviceId].domain[domainId].eventCount;
eventId = ( CUpti_EventID * ) malloc( size );
if ( eventId == NULL ) {
perror( "malloc(): Failed to allocate memory to CuPTI event ID" );
return -1;
}
memset( eventId, 0, size );
/* PAPI event struct */
device[deviceId].domain[domainId].event =
( EventData_t * ) malloc( sizeof ( EventData_t ) *
device[deviceId].domain[domainId].
eventCount );
if ( device[deviceId].domain[domainId].event == NULL ) {
perror( "malloc(): Failed to allocate memory to PAPI event struct" );
free(eventId);
return -1;
}
/* enumerate the events for the domain[domainId] on the device[deviceId] */
#ifdef CUDA_4_0
err =
(*cuptiEventDomainEnumEventsPtr)( dev,
( CUpti_EventDomainID ) device[deviceId].
domain[domainId].domainId, &size, eventId );
#else
err =
(*cuptiEventDomainEnumEventsPtr)( ( CUpti_EventDomainID ) device[deviceId].
domain[domainId].domainId, &size, eventId );
#endif
CHECK_CUPTI_ERROR( err, "cuptiEventDomainEnumEvents" );
/* query event info */
for ( id = 0; id < device[deviceId].domain[domainId].eventCount; id++ ) {
device[deviceId].domain[domainId].event[id].eventId = eventId[id];
/* query event name */
size = PAPI_MIN_STR_LEN;
#ifdef CUDA_4_0
err = (*cuptiEventGetAttributePtr)( dev,
device[deviceId].domain[domainId].
event[id].eventId, CUPTI_EVENT_ATTR_NAME,
&size,
( uint8_t * ) device[deviceId].
domain[domainId].event[id].name );
#else
err = (*cuptiEventGetAttributePtr)( device[deviceId].domain[domainId].
event[id].eventId, CUPTI_EVENT_ATTR_NAME,
&size,
( uint8_t * ) device[deviceId].
domain[domainId].event[id].name );
#endif
CHECK_CUPTI_ERROR( err, "cuptiEventGetAttribute" );
/* query event description */
size = PAPI_2MAX_STR_LEN;
#ifdef CUDA_4_0
err = (*cuptiEventGetAttributePtr)( dev,
device[deviceId].domain[domainId].
event[id].eventId,
CUPTI_EVENT_ATTR_SHORT_DESCRIPTION, &size,
( uint8_t * ) device[deviceId].
domain[domainId].event[id].desc );
#else
err = (*cuptiEventGetAttributePtr)( device[deviceId].domain[domainId].
event[id].eventId,
CUPTI_EVENT_ATTR_SHORT_DESCRIPTION, &size,
( uint8_t * ) device[deviceId].
domain[domainId].event[id].desc );
#endif
CHECK_CUPTI_ERROR( err, "cuptiEventGetAttribute" );
}
totalEventCount += device[deviceId].domain[domainId].eventCount;
free( eventId );
return 0;
}

| static int getEventValue | ( | long long * | counts, |
| CUpti_EventGroup | eventGroup, | ||
| AddedEvents_t | addedEvents | ||
| ) | [static] |
Definition at line 437 of file linux-cuda.c.
{
CUptiResult cuptiErr = CUPTI_SUCCESS;
size_t events_read, bufferSizeBytes, arraySizeBytes, i;
uint64_t *counterDataBuffer;
CUpti_EventID *eventIDArray;
int j;
bufferSizeBytes = addedEvents.count * sizeof ( uint64_t );
counterDataBuffer = ( uint64_t * ) malloc( bufferSizeBytes );
arraySizeBytes = addedEvents.count * sizeof ( CUpti_EventID );
eventIDArray = ( CUpti_EventID * ) malloc( arraySizeBytes );
/* read counter data for the specified event from the CuPTI eventGroup */
cuptiErr = (*cuptiEventGroupReadAllEventsPtr)( eventGroup,
CUPTI_EVENT_READ_FLAG_NONE,
&bufferSizeBytes,
counterDataBuffer, &arraySizeBytes,
eventIDArray, &events_read );
CHECK_CUPTI_ERROR( cuptiErr, "cuptiEventGroupReadAllEvents" );
if ( events_read != ( size_t ) addedEvents.count )
return -1;
/* Since there is no guarantee that returned counter values are in the same
order as the counters in the PAPI addedEvents.list, we need to map the
CUpti_EventID to PAPI event ID values.
According to CuPTI doc: counter return values of counterDataBuffer
correspond to the return event IDs in eventIDArray */
for ( i = 0; i < events_read; i++ )
for ( j = 0; j < addedEvents.count; j++ )
if ( cuda_native_table[addedEvents.list[j]].resources.eventId ==
eventIDArray[i] )
// since cuptiEventGroupReadAllEvents() resets counter values to 0;
// we have to accumulate ourselves
counts[addedEvents.list[j]] = counts[addedEvents.list[j]] + counterDataBuffer[i];
free( counterDataBuffer );
free( eventIDArray );
return 0;
}

| static int linkCudaLibraries | ( | ) | [static] |
Definition at line 598 of file linux-cuda.c.
{
/* Attempt to guess if we were statically linked to libc, if so bail */
if ( _dl_non_dynamic_init != NULL ) {
strncpy(_cuda_vector.cmp_info.disabled_reason, "The cuda component does not support statically linking to libc.",PAPI_MAX_STR_LEN);
return PAPI_ENOSUPP;
}
/* Need to link in the cuda libraries, if not found disable the component */
dl1 = dlopen("libcuda.so", RTLD_NOW | RTLD_GLOBAL);
if (!dl1)
{
strncpy(_cuda_vector.cmp_info.disabled_reason, "CUDA library libcuda.so not found.",PAPI_MAX_STR_LEN);
return ( PAPI_ENOSUPP );
}
cuCtxCreatePtr = dlsym(dl1, "cuCtxCreate_v2");
if (dlerror() != NULL)
{
strncpy(_cuda_vector.cmp_info.disabled_reason, "CUDA function cuCtxCreate not found.",PAPI_MAX_STR_LEN);
return ( PAPI_ENOSUPP );
}
cuCtxDestroyPtr = dlsym(dl1, "cuCtxDestroy_v2");
if (dlerror() != NULL)
{
strncpy(_cuda_vector.cmp_info.disabled_reason, "CUDA function cuCtxDestroy not found.",PAPI_MAX_STR_LEN);
return ( PAPI_ENOSUPP );
}
cuCtxGetCurrentPtr = dlsym(dl1, "cuCtxGetCurrent");
if (dlerror() != NULL)
{
strncpy(_cuda_vector.cmp_info.disabled_reason, "CUDA function cuCtxGetCurrent not found.",PAPI_MAX_STR_LEN);
return ( PAPI_ENOSUPP );
}
cuDeviceGetPtr = dlsym(dl1, "cuDeviceGet");
if (dlerror() != NULL)
{
strncpy(_cuda_vector.cmp_info.disabled_reason, "CUDA function cuDeviceGet not found.",PAPI_MAX_STR_LEN);
return ( PAPI_ENOSUPP );
}
cuDeviceGetCountPtr = dlsym(dl1, "cuDeviceGetCount");
if (dlerror() != NULL)
{
strncpy(_cuda_vector.cmp_info.disabled_reason, "CUDA function cuDeviceGetCount not found.",PAPI_MAX_STR_LEN);
return ( PAPI_ENOSUPP );
}
cuDeviceGetNamePtr = dlsym(dl1, "cuDeviceGetName");
if (dlerror() != NULL)
{
strncpy(_cuda_vector.cmp_info.disabled_reason, "CUDA function cuDeviceGetName not found.",PAPI_MAX_STR_LEN);
return ( PAPI_ENOSUPP );
}
cuInitPtr = dlsym(dl1, "cuInit");
if (dlerror() != NULL)
{
strncpy(_cuda_vector.cmp_info.disabled_reason, "CUDA function cuInit not found.",PAPI_MAX_STR_LEN);
return ( PAPI_ENOSUPP );
}
dl2 = dlopen("libcudart.so", RTLD_NOW | RTLD_GLOBAL);
if (!dl2)
{
strncpy(_cuda_vector.cmp_info.disabled_reason, "CUDA runtime library libcudart.so not found.",PAPI_MAX_STR_LEN);
return ( PAPI_ENOSUPP );
}
cudaFreePtr = dlsym(dl2, "cudaFree");
if (dlerror() != NULL)
{
strncpy(_cuda_vector.cmp_info.disabled_reason, "CUDART function cudaFree not found.",PAPI_MAX_STR_LEN);
return ( PAPI_ENOSUPP );
}
cudaGetDevicePtr = dlsym(dl2, "cudaGetDevice");
if (dlerror() != NULL)
{
strncpy(_cuda_vector.cmp_info.disabled_reason, "CUDART function cudaGetDevice not found.",PAPI_MAX_STR_LEN);
return ( PAPI_ENOSUPP );
}
cudaRuntimeGetVersionPtr = dlsym(dl2, "cudaRuntimeGetVersion");
if (dlerror() != NULL)
{
strncpy(_cuda_vector.cmp_info.disabled_reason, "CUDART function cudaRuntimeGetVersion not found.",PAPI_MAX_STR_LEN);
return ( PAPI_ENOSUPP );
}
cudaDriverGetVersionPtr = dlsym(dl2, "cudaDriverGetVersion");
if (dlerror() != NULL)
{
strncpy(_cuda_vector.cmp_info.disabled_reason, "CUDART function cudaDriverGetVersion not found.",PAPI_MAX_STR_LEN);
return ( PAPI_ENOSUPP );
}
dl3 = dlopen("libcupti.so", RTLD_NOW | RTLD_GLOBAL);
if (!dl3)
{
strncpy(_cuda_vector.cmp_info.disabled_reason, "CUDA runtime library libcupti.so not found.",PAPI_MAX_STR_LEN);
return ( PAPI_ENOSUPP );
}
cuptiDeviceEnumEventDomainsPtr = dlsym(dl3, "cuptiDeviceEnumEventDomains");
if (dlerror() != NULL)
{
strncpy(_cuda_vector.cmp_info.disabled_reason, "CUPTI function cuptiDeviceEnumEventDomains not found.",PAPI_MAX_STR_LEN);
return ( PAPI_ENOSUPP );
}
cuptiDeviceGetEventDomainAttributePtr = dlsym(dl3, "cuptiDeviceGetEventDomainAttribute");
if (dlerror() != NULL)
{
strncpy(_cuda_vector.cmp_info.disabled_reason, "CUPTI function cuptiDeviceGetEventDomainAttribute not found.",PAPI_MAX_STR_LEN);
return ( PAPI_ENOSUPP );
}
cuptiDeviceGetNumEventDomainsPtr = dlsym(dl3, "cuptiDeviceGetNumEventDomains");
if (dlerror() != NULL)
{
strncpy(_cuda_vector.cmp_info.disabled_reason, "CUPTI function cuptiDeviceGetNumEventDomains not found.",PAPI_MAX_STR_LEN);
return ( PAPI_ENOSUPP );
}
cuptiEventDomainEnumEventsPtr = dlsym(dl3, "cuptiEventDomainEnumEvents");
if (dlerror() != NULL)
{
strncpy(_cuda_vector.cmp_info.disabled_reason, "CUPTI function cuptiEventDomainEnumEvents not found.",PAPI_MAX_STR_LEN);
return ( PAPI_ENOSUPP );
}
cuptiEventDomainGetNumEventsPtr = dlsym(dl3, "cuptiEventDomainGetNumEvents");
if (dlerror() != NULL)
{
strncpy(_cuda_vector.cmp_info.disabled_reason, "CUPTI function cuptiEventDomainGetNumEvents not found.",PAPI_MAX_STR_LEN);
return ( PAPI_ENOSUPP );
}
cuptiEventGetAttributePtr = dlsym(dl3, "cuptiEventGetAttribute");
if (dlerror() != NULL)
{
strncpy(_cuda_vector.cmp_info.disabled_reason, "CUPTI function cuptiEventGetAttribute not found.",PAPI_MAX_STR_LEN);
return ( PAPI_ENOSUPP );
}
cuptiEventGroupAddEventPtr = dlsym(dl3, "cuptiEventGroupAddEvent");
if (dlerror() != NULL)
{
strncpy(_cuda_vector.cmp_info.disabled_reason, "CUPTI function cuptiEventGroupAddEvent not found.",PAPI_MAX_STR_LEN);
return ( PAPI_ENOSUPP );
}
cuptiEventGroupCreatePtr = dlsym(dl3, "cuptiEventGroupCreate");
if (dlerror() != NULL)
{
strncpy(_cuda_vector.cmp_info.disabled_reason, "CUPTI function cuptiEventGroupCreate not found.",PAPI_MAX_STR_LEN);
return ( PAPI_ENOSUPP );
}
cuptiEventGroupDestroyPtr = dlsym(dl3, "cuptiEventGroupDestroy");
if (dlerror() != NULL)
{
strncpy(_cuda_vector.cmp_info.disabled_reason, "CUPTI function cuptiEventGroupDestroy not found.",PAPI_MAX_STR_LEN);
return ( PAPI_ENOSUPP );
}
cuptiEventGroupDisablePtr = dlsym(dl3, "cuptiEventGroupDisable");
if (dlerror() != NULL)
{
strncpy(_cuda_vector.cmp_info.disabled_reason, "CUPTI function cuptiEventGroupDisable not found.",PAPI_MAX_STR_LEN);
return ( PAPI_ENOSUPP );
}
cuptiEventGroupEnablePtr = dlsym(dl3, "cuptiEventGroupEnable");
if (dlerror() != NULL)
{
strncpy(_cuda_vector.cmp_info.disabled_reason, "CUPTI function cuptiEventGroupEnable not found.",PAPI_MAX_STR_LEN);
return ( PAPI_ENOSUPP );
}
cuptiEventGroupReadAllEventsPtr = dlsym(dl3, "cuptiEventGroupReadAllEvents");
if (dlerror() != NULL)
{
strncpy(_cuda_vector.cmp_info.disabled_reason, "CUPTI function cuptiEventGroupReadAllEvents not found.",PAPI_MAX_STR_LEN);
return ( PAPI_ENOSUPP );
}
cuptiEventGroupRemoveAllEventsPtr = dlsym(dl3, "cuptiEventGroupRemoveAllEvents");
if (dlerror() != NULL)
{
strncpy(_cuda_vector.cmp_info.disabled_reason, "CUPTI function cuptiEventGroupRemoveAllEvents not found.",PAPI_MAX_STR_LEN);
return ( PAPI_ENOSUPP );
}
cuptiEventGroupResetAllEventsPtr = dlsym(dl3, "cuptiEventGroupResetAllEvents");
if (dlerror() != NULL)
{
strncpy(_cuda_vector.cmp_info.disabled_reason, "CUPTI function cuptiEventGroupResetAllEvents not found.",PAPI_MAX_STR_LEN);
return ( PAPI_ENOSUPP );
}
return ( PAPI_OK );
}

Definition at line 1151 of file linux-cuda.c.
| void(* _dl_non_dynamic_init)(void) |
Definition at line 41 of file linux-cuda.c.
{
CUresult err;
int skipDevice = 0;
int id;
char deviceName_tmp[PAPI_MIN_STR_LEN] = "init";
totalEventCount = 0;
/* CUDA initialization */
err = (*cuInitPtr)( 0 );
if ( err != CUDA_SUCCESS ) {
SUBDBG ("Info: Error from cuInit(): %d\n", err);
return ( PAPI_ENOSUPP );
}
/* How many gpgpu devices do we have? */
err = (*cuDeviceGetCountPtr)( &deviceCount );
CHECK_CU_ERROR( err, "cuDeviceGetCount" );
if ( deviceCount == 0 )
return ( PAPI_ENOSUPP );
/* allocate memory for device data table */
device = ( DeviceData_t * ) malloc( sizeof ( DeviceData_t ) * deviceCount );
if ( device == NULL ) {
perror( "malloc(): Failed to allocate memory to CUDA device table" );
return ( PAPI_ENOSUPP );
}
/* What are the devices? Get Name and # of domains per device */
for ( id = 0; id < deviceCount; id++ ) {
err = (*cuDeviceGetPtr)( &device[id].dev, id );
CHECK_CU_ERROR( err, "cuDeviceGet" );
err = (*cuDeviceGetNamePtr)( device[id].name, PAPI_MIN_STR_LEN, device[id].dev );
CHECK_CU_ERROR( err, "cuDeviceGetName" );
SUBDBG ("Cuda deviceName: %s\n", device[id].name);
/* Skip device if there are multiple of the same type
and if it has been already added to the list */
if ( 0 == strcmp( deviceName_tmp, device[id].name ) ) {
skipDevice++;
continue;
}
strcpy( deviceName_tmp, device[id].name );
/* enumerate the domains on the device */
if ( 0 != enumEventDomains( device[id].dev, id ) )
return ( PAPI_ENOSUPP );
}
deviceCount = deviceCount - skipDevice;
/* return number of events provided via CuPTI */
return totalEventCount;
}