PAPI  5.0.1.0
linux-cuda.c File Reference

This file has the source code for a component that enables PAPI-C to access hardware monitoring counters for GPU devices through the CUPTI library. More...

Include dependency graph for linux-cuda.c:

Go to the source code of this file.

Functions

static int detectDevice (void)
static int enumEventDomains (CUdevice dev, int deviceId)
static int enumEvents (int deviceId, int domainId)
static int createNativeEvents (void)
static int getEventValue (long long *counts, CUpti_EventGroup eventGroup, AddedEvents_t addedEvents)
int CUDA_init_thread (hwd_context_t *ctx)
int CUDA_init_component ()
int CUDA_init_control_state (hwd_control_state_t *ctrl)
int CUDA_start (hwd_context_t *ctx, hwd_control_state_t *ctrl)
int CUDA_stop (hwd_context_t *ctx, hwd_control_state_t *ctrl)
int CUDA_read (hwd_context_t *ctx, hwd_control_state_t *ctrl, long_long **events, int flags)
int CUDA_shutdown_thread (hwd_context_t *ctx)
int CUDA_shutdown_component (void)
int CUDA_ctl (hwd_context_t *ctx, int code, _papi_int_option_t *option)
int CUDA_update_control_state (hwd_control_state_t *ptr, NativeInfo_t *native, int count, hwd_context_t *ctx)
int CUDA_set_domain (hwd_control_state_t *cntrl, int domain)
int CUDA_reset (hwd_context_t *ctx, hwd_control_state_t *ctrl)
int CUDA_cleanup_eventset (hwd_control_state_t *ctrl)
int CUDA_ntv_enum_events (unsigned int *EventCode, int modifier)
int CUDA_ntv_code_to_name (unsigned int EventCode, char *name, int len)
int CUDA_ntv_code_to_descr (unsigned int EventCode, char *name, int len)
int CUDA_ntv_code_to_bits (unsigned int EventCode, hwd_register_t *bits)

Variables

papi_vector_t _cuda_vector

Detailed Description

Author:
Heike Jagode (in collaboration with Robert Dietrich, TU Dresden) jagode@eecs.utk.edu

Definition in file linux-cuda.c.


Function Documentation

static int createNativeEvents ( void  ) [static]

Definition at line 289 of file linux-cuda.c.

{
    int deviceId, id = 0;
    uint32_t domainId, eventId;
    int cuptiDomainId;
    int i;
    int devNameLen;

    /* component name and description */
    strcpy( _cuda_vector.cmp_info.short_name, "CUDA" );
    strcpy( _cuda_vector.cmp_info.description,
            "CuPTI provides the API for monitoring CUDA hardware events" );

    /* create events for every GPU device and every domain per device  */
    for ( deviceId = 0; deviceId < deviceCount; deviceId++ ) {
        /* for the event names, replace blanks in the device name with underscores */
        devNameLen = strlen( device[deviceId].name );
        for ( i = 0; i < devNameLen; i++ )
            if ( device[deviceId].name[i] == ' ' )
                device[deviceId].name[i] = '_';

        for ( domainId = 0; domainId < device[deviceId].domainCount;
              domainId++ ) {
            cuptiDomainId = device[deviceId].domain[domainId].domainId;

            for ( eventId = 0;
                  eventId < device[deviceId].domain[domainId].eventCount;
                  eventId++ ) {
                /* Save native event data */
                sprintf( cuda_native_table[id].name,
                         "%s:%s:%s",
                         device[deviceId].name,
                         device[deviceId].domain[domainId].name,
                         device[deviceId].domain[domainId].event[eventId].
                         name );

                strncpy( cuda_native_table[id].description,
                         device[deviceId].domain[domainId].event[eventId].desc,
                         PAPI_2MAX_STR_LEN );

                /* The selector has to be !=0 . Starts with 1 */
                cuda_native_table[id].resources.selector = id + 1;

                /* store event ID */
                cuda_native_table[id].resources.eventId =
                    device[deviceId].domain[domainId].event[eventId].eventId;

                /* increment the table index counter */
                id++;
            }
        }
    }

    /* Return the number of events created */
    return id;
}

Here is the caller graph for this function:

Definition at line 753 of file linux-cuda.c.

{
    CUDA_control_state_t * CUDA_ctrl = ( CUDA_control_state_t * ) ctrl;
    CUptiResult cuptiErr = CUPTI_SUCCESS;

    /* Disable the CUDA eventGroup; 
       it also frees the perfmon hardware on the GPU */
    cuptiErr = cuptiEventGroupDisable( CUDA_ctrl->eventGroup );
    CHECK_CUPTI_ERROR( cuptiErr, "cuptiEventGroupDisable" );

    /* Call the CuPTI cleaning function before leaving */
    cuptiErr = cuptiEventGroupDestroy( CUDA_ctrl->eventGroup );
    CHECK_CUPTI_ERROR( cuptiErr, "cuptiEventGroupDestroy" );

    return ( PAPI_OK );
}
int CUDA_ctl ( hwd_context_t ctx,
int  code,
_papi_int_option_t option 
)

Definition at line 628 of file linux-cuda.c.

{
    ( void ) ctx;
    ( void ) code;
    ( void ) option;
    return ( PAPI_OK );
}

Definition at line 429 of file linux-cuda.c.

{
    CUresult cuErr = CUDA_SUCCESS;
    
    /* Create dynamic event table */
    NUM_EVENTS = detectDevice(  );

    /* TODO: works only for one device right now; 
     need to find out if user can use 2 or more devices at same time */
    
    /* want create a CUDA context for either the default device or
     the device specified with cudaSetDevice() in user code */
    if ( CUDA_SUCCESS != cudaGetDevice( &currentDeviceID ) )
        return ( PAPI_ENOSUPP );
    
    if ( getenv( "PAPI_VERBOSE" ) ) {
        printf( "DEVICE USED: %s (%d)\n", device[currentDeviceID].name,
               currentDeviceID );
    }
    
    /* get the CUDA context from the calling CPU thread */
    cuErr = cuCtxGetCurrent( &cuCtx );
    
    /* if no CUDA context is bound to the calling CPU thread yet, create one */
    if ( cuErr != CUDA_SUCCESS || cuCtx == NULL ) {
        cuErr = cuCtxCreate( &cuCtx, 0, device[currentDeviceID].dev );
        CHECK_CU_ERROR( cuErr, "cuCtxCreate" );
    }
    
    /* cuCtxGetCurrent() can return a non-null context that is not valid 
       because the context has not yet been initialized.
       Here is a workaround: 
       cudaFree(NULL) forces the context to be initialized
       if cudaFree(NULL) returns success then we are able to use the context in subsequent calls
       if cudaFree(NULL) returns an error (or subsequent cupti* calls) then the context is not usable,
       and will never be useable */
    if ( CUDA_SUCCESS != cudaFree( NULL ) )
        return ( PAPI_ENOSUPP );
        
    /* Create dynamic event table */
    cuda_native_table = ( CUDA_native_event_entry_t * )
        malloc( sizeof ( CUDA_native_event_entry_t ) * NUM_EVENTS );
    if ( cuda_native_table == NULL ) {
        perror( "malloc(): Failed to allocate memory to events table" );
        return ( PAPI_ENOSUPP );
    }

    if ( NUM_EVENTS != createNativeEvents(  ) ) 
        return ( PAPI_ENOSUPP );
    
    return ( PAPI_OK );
}

Here is the call graph for this function:

Definition at line 488 of file linux-cuda.c.

{
    CUDA_control_state_t * CUDA_ctrl = ( CUDA_control_state_t * ) ctrl;
    CUptiResult cuptiErr = CUPTI_SUCCESS;
    int i;

    /* allocate memory for the list of events that are added to the CuPTI eventGroup */
    CUDA_ctrl->addedEvents.list = malloc( sizeof ( int ) * NUM_EVENTS );
    if ( CUDA_ctrl->addedEvents.list == NULL ) {
        perror
        ( "malloc(): Failed to allocate memory to table of events that are added to CuPTI eventGroup" );
        return ( PAPI_ENOSUPP );
    }
    
    /* initialize the event list */
    for ( i = 0; i < NUM_EVENTS; i++ )
        CUDA_ctrl->addedEvents.list[i] = 0;

    
    
    cuptiErr = cuptiEventGroupCreate( cuCtx, &CUDA_ctrl->eventGroup, 0 );
    CHECK_CUPTI_ERROR( cuptiErr, "cuptiEventGroupCreate" );
    
    return PAPI_OK;
}

Definition at line 403 of file linux-cuda.c.

{
    CUDA_context_t * CUDA_ctx = ( CUDA_context_t * ) ctx;
    /* Initialize number of events in EventSet for update_control_state() */
    CUDA_ctx->state.old_count = 0;
    
    return PAPI_OK;
}
int CUDA_ntv_code_to_bits ( unsigned int  EventCode,
hwd_register_t bits 
)

Definition at line 834 of file linux-cuda.c.

{
    int index = EventCode;

    memcpy( ( CUDA_register_t * ) bits,
            &( cuda_native_table[index].resources ),
            sizeof ( CUDA_register_t ) );

    return ( PAPI_OK );
}
int CUDA_ntv_code_to_descr ( unsigned int  EventCode,
char *  name,
int  len 
)

Definition at line 821 of file linux-cuda.c.

{
    int index = EventCode;

    strncpy( name, cuda_native_table[index].description, len );
    return ( PAPI_OK );
}
int CUDA_ntv_code_to_name ( unsigned int  EventCode,
char *  name,
int  len 
)

Definition at line 808 of file linux-cuda.c.

{
    int index = EventCode;

    strncpy( name, cuda_native_table[index].name, len );
    return ( PAPI_OK );
}
int CUDA_ntv_enum_events ( unsigned int *  EventCode,
int  modifier 
)

Definition at line 775 of file linux-cuda.c.

{

    switch ( modifier ) {
    case PAPI_ENUM_FIRST:
        *EventCode = 0;

        return ( PAPI_OK );
        break;

    case PAPI_ENUM_EVENTS:
    {
        int index = *EventCode;

        if ( index < NUM_EVENTS - 1 ) {
            *EventCode = *EventCode + 1;
            return ( PAPI_OK );
        } else
            return ( PAPI_ENOEVNT );

        break;
    }
    default:
        return ( PAPI_EINVAL );
    }
    return ( PAPI_EINVAL );
}
int CUDA_read ( hwd_context_t ctx,
hwd_control_state_t ctrl,
long_long **  events,
int  flags 
)

Definition at line 558 of file linux-cuda.c.

{
    ( void ) ctx;
    ( void ) flags;
    CUDA_control_state_t * CUDA_ctrl = ( CUDA_control_state_t * ) ctrl;


    if ( 0 != getEventValue( CUDA_ctrl->counts, CUDA_ctrl->eventGroup, CUDA_ctrl->addedEvents ) )
        return ( PAPI_ENOSUPP );

    *events = CUDA_ctrl->counts;

    return ( PAPI_OK );
}

Here is the call graph for this function:

int CUDA_reset ( hwd_context_t ctx,
hwd_control_state_t ctrl 
)

Definition at line 736 of file linux-cuda.c.

{
    ( void ) ctx;
    CUDA_control_state_t * CUDA_ctrl = ( CUDA_control_state_t * ) ctrl;
    CUptiResult cuptiErr = CUPTI_SUCCESS;

    /* Resets all events in the CuPTI eventGroup to zero */
    cuptiErr = cuptiEventGroupResetAllEvents( CUDA_ctrl->eventGroup );
    CHECK_CUPTI_ERROR( cuptiErr, "cuptiEventGroupResetAllEvents" );

    return ( PAPI_OK );
}
int CUDA_set_domain ( hwd_control_state_t cntrl,
int  domain 
)

Definition at line 711 of file linux-cuda.c.

{
    int found = 0;
    ( void ) cntrl;

    if ( PAPI_DOM_USER & domain )
        found = 1;

    if ( PAPI_DOM_KERNEL & domain )
        found = 1;

    if ( PAPI_DOM_OTHER & domain )
        found = 1;

    if ( !found )
        return ( PAPI_EINVAL );

    return ( PAPI_OK );
}
int CUDA_shutdown_component ( void  )

Definition at line 589 of file linux-cuda.c.

{
    CUresult cuErr = CUDA_SUCCESS;
    
    /* if running a threaded application, we need to make sure that 
       a thread doesn't free the same memory location(s) more than once */
    if ( CUDA_FREED == 0 ) {
        uint32_t j;
        int i;
        
        CUDA_FREED = 1;

        /* deallocate all the memory */
        for ( i = 0; i < deviceCount; i++ ) {
            for ( j = 0; j < device[i].domainCount; j++ )
                free( device[i].domain[j].event );
            
            free( device[i].domain );
        }

        free( device );
        free( cuda_native_table );
        
        /* destroy floating CUDA context */
        cuErr = cuCtxDestroy( cuCtx );
        if ( cuErr != CUDA_SUCCESS )
            return ( PAPI_ENOSUPP );            // Not supported
    }

    
    return ( PAPI_OK );
}

Definition at line 578 of file linux-cuda.c.

{
    CUDA_context_t *CUDA_ctx = (CUDA_context_t*)ctx;
    free( CUDA_ctx->state.addedEvents.list );
    return (PAPI_OK);
}
int CUDA_start ( hwd_context_t ctx,
hwd_control_state_t ctrl 
)

Definition at line 519 of file linux-cuda.c.

{
    ( void ) ctx;
    int i;
    CUDA_control_state_t * CUDA_ctrl = ( CUDA_control_state_t * ) ctrl;
    CUptiResult cuptiErr = CUPTI_SUCCESS;
    
    // reset all event values to 0
    for ( i = 0; i < NUM_EVENTS; i++ )
        CUDA_ctrl->counts[i] = 0;

    cuptiErr = cuptiEventGroupEnable( CUDA_ctrl->eventGroup );
    CHECK_CUPTI_ERROR( cuptiErr, "cuptiEventGroupEnable" );

    /* Resets all events in the CuPTI eventGroup to zero */
    cuptiErr = cuptiEventGroupResetAllEvents( CUDA_ctrl->eventGroup );
    CHECK_CUPTI_ERROR( cuptiErr, "cuptiEventGroupResetAllEvents" );

    return ( PAPI_OK );
}
int CUDA_stop ( hwd_context_t ctx,
hwd_control_state_t ctrl 
)

Definition at line 545 of file linux-cuda.c.

{
    ( void ) ctx;
    ( void ) ctrl;

    return ( PAPI_OK );
}
int CUDA_update_control_state ( hwd_control_state_t ptr,
NativeInfo_t native,
int  count,
hwd_context_t ctx 
)

Definition at line 644 of file linux-cuda.c.

{
    ( void ) ctx;
    CUDA_control_state_t * CUDA_ptr = ( CUDA_control_state_t * ) ptr;
    int index;
    CUptiResult cuptiErr = CUPTI_SUCCESS;

    cuptiErr = cuptiEventGroupDisable( CUDA_ptr->eventGroup );
    CHECK_CUPTI_ERROR( cuptiErr, "cuptiEventGroupDisable" );
    
    /* Remove or Add events */
    if ( CUDA_ptr->old_count > count ) {
        cuptiErr =
            cuptiEventGroupRemoveEvent( CUDA_ptr->eventGroup,
                                        cuda_native_table[CUDA_ptr->addedEvents.list[0]].
                                        resources.eventId );

        /* Keep track of events in EventGroup if an event is removed */
        CUDA_ptr->old_count = count;
    } else {
        index = native[count - 1].ni_event;
        native[count - 1].ni_position = index;

        /* store events, that have been added to the CuPTI eveentGroup 
           in a seperate place (addedEvents).
           Needed, so that we can read the values for the added events only */
        CUDA_ptr->addedEvents.count = count;
        CUDA_ptr->addedEvents.list[count - 1] = index;

        /* if this device name is different from the actual device the code is running on, then exit */
        if ( 0 != strncmp( device[currentDeviceID].name,
                           cuda_native_table[index].name,
                           strlen( device[currentDeviceID].name ) ) ) {
            fprintf( stderr, "Device %s is used -- BUT event %s is collected. \n ---> ERROR: Specify events for the device that is used!\n\n",
                  device[currentDeviceID].name, cuda_native_table[index].name );
            
            return ( PAPI_ENOSUPP );    // Not supported 
        }

        /* Add events to the CuPTI eventGroup */
        cuptiErr =
            cuptiEventGroupAddEvent( CUDA_ptr->eventGroup,
                                     cuda_native_table[index].resources.
                                     eventId );
        CHECK_CUPTI_ERROR( cuptiErr, "cuptiEventGroupAddEvent" );

        /* Keep track of events in EventGroup if an event is removed */
        CUDA_ptr->old_count = count;
    }

    return ( PAPI_OK );
}
static int detectDevice ( void  ) [static]

Definition at line 34 of file linux-cuda.c.

{
    CUresult err;
    int skipDevice = 0;
    int id;
    char deviceName_tmp[PAPI_MIN_STR_LEN] = "init";

    totalEventCount = 0;

    /* CUDA initialization  */
    err = cuInit( 0 );
    if ( err != CUDA_SUCCESS ) 
        return ( PAPI_ENOSUPP );

    /* How many gpgpu devices do we have? */
    err = cuDeviceGetCount( &deviceCount );
    CHECK_CU_ERROR( err, "cuDeviceGetCount" );
    if ( deviceCount == 0 )
        return ( PAPI_ENOSUPP );

    /* allocate memory for device data table */
    device = ( DeviceData_t * ) malloc( sizeof ( DeviceData_t ) * deviceCount );
    if ( device == NULL ) {
        perror( "malloc(): Failed to allocate memory to CUDA device table" );
        return ( PAPI_ENOSUPP );
    }

    /* What are the devices? Get Name and # of domains per device */
    for ( id = 0; id < deviceCount; id++ ) {
        err = cuDeviceGet( &device[id].dev, id );
        CHECK_CU_ERROR( err, "cuDeviceGet" );

        err =
            cuDeviceGetName( device[id].name, PAPI_MIN_STR_LEN,
                             device[id].dev );
        CHECK_CU_ERROR( err, "cuDeviceGetName" );

        /* Skip device if there are multiple of the same type 
           and if it has been already added to the list */
        if ( 0 == strcmp( deviceName_tmp, device[id].name ) ) {
            skipDevice++;
            continue;
        }

        strcpy( deviceName_tmp, device[id].name );

        /* enumerate the domains on the device */
        if ( 0 != enumEventDomains( device[id].dev, id ) )
            return ( PAPI_ENOSUPP );
    }

    deviceCount = deviceCount - skipDevice;

    /* return number of events provided via CuPTI */
    return totalEventCount;
}

Here is the call graph for this function:

Here is the caller graph for this function:

static int enumEventDomains ( CUdevice  dev,
int  deviceId 
) [static]

Definition at line 96 of file linux-cuda.c.

{
    CUptiResult err = CUPTI_SUCCESS;
    CUpti_EventDomainID *domainId = NULL;
    uint32_t id = 0;
    size_t size = 0;

    device[deviceId].domainCount = 0;

    /* get number of domains for device dev */
    err = cuptiDeviceGetNumEventDomains( dev, &device[deviceId].domainCount );
    CHECK_CUPTI_ERROR( err, "cuptiDeviceGetNumEventDomains" );

    if ( device[deviceId].domainCount == 0 ) {
        printf( "No domain is exposed by dev = %d\n", dev );
        return -1;
    }

    /* CuPTI domain struct */
    size = sizeof ( CUpti_EventDomainID ) * device[deviceId].domainCount;
    domainId = ( CUpti_EventDomainID * ) malloc( size );
    if ( domainId == NULL ) {
        perror( "malloc(): Failed to allocate memory to CuPTI domain ID" );
        return -1;
    }
    memset( domainId, 0, size );

    /* PAPI domain struct */
    device[deviceId].domain =
        ( DomainData_t * ) malloc( sizeof ( DomainData_t ) *
                                   device[deviceId].domainCount );
    if ( device[deviceId].domain == NULL ) {
        perror( "malloc(): Failed to allocate memory to PAPI domain struct" );
        return -1;
    }

    /* Enumerates the event domains for a device dev */
    err = cuptiDeviceEnumEventDomains( dev, &size, domainId );
    CHECK_CUPTI_ERROR( err, "cuptiDeviceEnumEventDomains" );

    /* enum domains */
    for ( id = 0; id < device[deviceId].domainCount; id++ ) {
        device[deviceId].domain[id].domainId = domainId[id];

        /* query domain name */
        size = PAPI_MIN_STR_LEN;
#ifdef CUDA_4_0
        err = cuptiEventDomainGetAttribute( dev,
                                           device[deviceId].domain[id].
                                           domainId,
                                           CUPTI_EVENT_DOMAIN_ATTR_NAME, &size,
                                           ( void * ) device[deviceId].
                                           domain[id].name );
        CHECK_CUPTI_ERROR( err, "cuptiEventDomainGetAttribute" );
        
        /* query num of events avaialble in the domain */
        size = sizeof ( device[deviceId].domain[id].eventCount );
        err = cuptiEventDomainGetAttribute( dev,
                                           device[deviceId].domain[id].
                                           domainId,
                                           CUPTI_EVENT_DOMAIN_MAX_EVENTS,
                                           &size,
                                           ( void * ) &device[deviceId].
                                           domain[id].eventCount );
        CHECK_CUPTI_ERROR( err, "cuptiEventDomainGetAttribute" );
        
        /* enumerate the events for the domain[id] on the device dev */
        if ( 0 != enumEvents( dev, deviceId, id ) )
            return -1;
#else
        err = cuptiDeviceGetEventDomainAttribute( dev,
                                                  device[deviceId].domain[id].domainId,
                                                  CUPTI_EVENT_DOMAIN_ATTR_NAME, &size,
                                                  ( void * ) device[deviceId].domain[id].name );
        CHECK_CUPTI_ERROR( err, "cuptiDeviceGetEventDomainAttribute" );

        /* query num of events avaialble in the domain */
        err = cuptiEventDomainGetNumEvents( device[deviceId].domain[id].domainId,
                                            &device[deviceId].domain[id].eventCount );
        CHECK_CUPTI_ERROR( err, "cuptiEventDomainGetNumEvents" );

        /* enumerate the events for the domain[id] on the device deviceId */
        if ( 0 != enumEvents( deviceId, id ) )
            return -1;
#endif
    }

    totalDomainCount += device[deviceId].domainCount;
    free( domainId );
    return 0;
}

Here is the call graph for this function:

Here is the caller graph for this function:

static int enumEvents ( int  deviceId,
int  domainId 
) [static]

Definition at line 197 of file linux-cuda.c.

{
    CUptiResult err = CUPTI_SUCCESS;
    CUpti_EventID *eventId = NULL;
    size_t size = 0;
    uint32_t id = 0;

    /* CuPTI event struct */
    size =
        sizeof ( CUpti_EventID ) * device[deviceId].domain[domainId].eventCount;
    eventId = ( CUpti_EventID * ) malloc( size );
    if ( eventId == NULL ) {
        perror( "malloc(): Failed to allocate memory to CuPTI event ID" );
        return -1;
    }
    memset( eventId, 0, size );

    /* PAPI event struct */
    device[deviceId].domain[domainId].event =
        ( EventData_t * ) malloc( sizeof ( EventData_t ) *
                                  device[deviceId].domain[domainId].
                                  eventCount );
    if ( device[deviceId].domain[domainId].event == NULL ) {
        perror( "malloc(): Failed to allocate memory to PAPI event struct" );
        return -1;
    }

    /* enumerate the events for the domain[domainId] on the device[deviceId] */
#ifdef CUDA_4_0
    err =
    cuptiEventDomainEnumEvents( dev,
                               ( CUpti_EventDomainID ) device[deviceId].
                               domain[domainId].domainId, &size, eventId );
#else
    err =
        cuptiEventDomainEnumEvents( ( CUpti_EventDomainID ) device[deviceId].
                                    domain[domainId].domainId, &size, eventId );
#endif
    CHECK_CUPTI_ERROR( err, "cuptiEventDomainEnumEvents" );

    /* query event info */
    for ( id = 0; id < device[deviceId].domain[domainId].eventCount; id++ ) {
        device[deviceId].domain[domainId].event[id].eventId = eventId[id];

        /* query event name */
        size = PAPI_MIN_STR_LEN;
#ifdef CUDA_4_0
        err = cuptiEventGetAttribute( dev,
                                     device[deviceId].domain[domainId].
                                     event[id].eventId, CUPTI_EVENT_ATTR_NAME,
                                     &size,
                                     ( uint8_t * ) device[deviceId].
                                     domain[domainId].event[id].name );     
#else
        err = cuptiEventGetAttribute( device[deviceId].domain[domainId].
                                      event[id].eventId, CUPTI_EVENT_ATTR_NAME,
                                      &size,
                                      ( uint8_t * ) device[deviceId].
                                      domain[domainId].event[id].name );
#endif
        CHECK_CUPTI_ERROR( err, "cuptiEventGetAttribute" );

        /* query event description */
        size = PAPI_2MAX_STR_LEN;
#ifdef CUDA_4_0
        err = cuptiEventGetAttribute( dev,
                                     device[deviceId].domain[domainId].
                                     event[id].eventId,
                                     CUPTI_EVENT_ATTR_SHORT_DESCRIPTION, &size,
                                     ( uint8_t * ) device[deviceId].
                                     domain[domainId].event[id].desc );     
#else
        err = cuptiEventGetAttribute( device[deviceId].domain[domainId].
                                      event[id].eventId,
                                      CUPTI_EVENT_ATTR_SHORT_DESCRIPTION, &size,
                                      ( uint8_t * ) device[deviceId].
                                      domain[domainId].event[id].desc );
#endif
        CHECK_CUPTI_ERROR( err, "cuptiEventGetAttribute" );
    }

    totalEventCount += device[deviceId].domain[domainId].eventCount;
    free( eventId );
    return 0;
}

Here is the caller graph for this function:

static int getEventValue ( long long *  counts,
CUpti_EventGroup  eventGroup,
AddedEvents_t  addedEvents 
) [static]

Definition at line 351 of file linux-cuda.c.

{
    CUptiResult cuptiErr = CUPTI_SUCCESS;
    size_t events_read, bufferSizeBytes, arraySizeBytes, i;
    uint64_t *counterDataBuffer;
    CUpti_EventID *eventIDArray;
    int j;

    bufferSizeBytes = addedEvents.count * sizeof ( uint64_t );
    counterDataBuffer = ( uint64_t * ) malloc( bufferSizeBytes );

    arraySizeBytes = addedEvents.count * sizeof ( CUpti_EventID );
    eventIDArray = ( CUpti_EventID * ) malloc( arraySizeBytes );

    /* read counter data for the specified event from the CuPTI eventGroup */
    cuptiErr = cuptiEventGroupReadAllEvents( eventGroup,
                                             CUPTI_EVENT_READ_FLAG_NONE,
                                             &bufferSizeBytes,
                                             counterDataBuffer, &arraySizeBytes,
                                             eventIDArray, &events_read );
    CHECK_CUPTI_ERROR( cuptiErr, "cuptiEventGroupReadAllEvents" );

    if ( events_read != ( size_t ) addedEvents.count )
        return -1;

    /* Since there is no guarantee that returned counter values are in the same 
       order as the counters in the PAPI addedEvents.list, we need to map the
       CUpti_EventID to PAPI event ID values.
       According to CuPTI doc: counter return values of counterDataBuffer 
       correspond to the return event IDs in eventIDArray */
    for ( i = 0; i < events_read; i++ )
        for ( j = 0; j < addedEvents.count; j++ )
            if ( cuda_native_table[addedEvents.list[j]].resources.eventId ==
                 eventIDArray[i] )
                // since cuptiEventGroupReadAllEvents() resets counter values to 0;
                // we have to accumulate ourselves 
                counts[addedEvents.list[j]] = counts[addedEvents.list[j]] + counterDataBuffer[i];

    free( counterDataBuffer );
    free( eventIDArray );
    return 0;
}

Here is the caller graph for this function:


Variable Documentation

Definition at line 24 of file linux-cuda.c.

 All Data Structures Files Functions Variables Typedefs Enumerations Enumerator Defines