PAPI  5.0.1.0
linux-nvml.c File Reference

This is an NVML component, it demos the component interface and implements two counters nvmlDeviceGetPowerUsage, nvmlDeviceGetTemperature from Nvidia Management Library. Please refer to NVML documentation for details about nvmlDeviceGetPowerUsage, nvmlDeviceGetTemperature. Power is reported in mW and temperature in Celcius. More...

Include dependency graph for linux-nvml.c:

Go to the source code of this file.

Data Structures

struct  nvml_control_state_t
struct  nvml_context_t

Defines

#define NVML_MAX_COUNTERS   100

Functions

unsigned long long getClockSpeed (nvmlDevice_t dev, nvmlClockType_t which_one)
unsigned long long getEccLocalErrors (nvmlDevice_t dev, nvmlEccBitType_t bits, int which_one)
unsigned long long getFanSpeed (nvmlDevice_t dev)
unsigned long long getMaxClockSpeed (nvmlDevice_t dev, nvmlClockType_t which_one)
unsigned long long getMemoryInfo (nvmlDevice_t dev, int which_one)
unsigned long long getPState (nvmlDevice_t dev)
unsigned long long getPowerUsage (nvmlDevice_t dev)
unsigned long long getTemperature (nvmlDevice_t dev)
unsigned long long getTotalEccErrors (nvmlDevice_t dev, nvmlEccBitType_t bits)
unsigned long long getUtilization (nvmlDevice_t dev, int which_one)
static void nvml_hardware_reset ()
static int nvml_hardware_read (long long *value, int which_one)
int _papi_nvml_init_thread (hwd_context_t *ctx)
static int detectDevices ()
static void createNativeEvents ()
int _papi_nvml_init_component (int cidx)
int _papi_nvml_init_control_state (hwd_control_state_t *ctl)
int _papi_nvml_update_control_state (hwd_control_state_t *ctl, NativeInfo_t *native, int count, hwd_context_t *ctx)
int _papi_nvml_start (hwd_context_t *ctx, hwd_control_state_t *ctl)
int _papi_nvml_stop (hwd_context_t *ctx, hwd_control_state_t *ctl)
int _papi_nvml_read (hwd_context_t *ctx, hwd_control_state_t *ctl, long long **events, int flags)
int _papi_nvml_write (hwd_context_t *ctx, hwd_control_state_t *ctl, long long *events)
int _papi_nvml_reset (hwd_context_t *ctx, hwd_control_state_t *ctl)
int _papi_nvml_shutdown_component ()
int _papi_nvml_shutdown_thread (hwd_context_t *ctx)
int _papi_nvml_ctl (hwd_context_t *ctx, int code, _papi_int_option_t *option)
int _papi_nvml_set_domain (hwd_control_state_t *cntrl, int domain)
int _papi_nvml_ntv_enum_events (unsigned int *EventCode, int modifier)
int _papi_nvml_ntv_code_to_name (unsigned int EventCode, char *name, int len)
int _papi_nvml_ntv_code_to_descr (unsigned int EventCode, char *descr, int len)

Variables

papi_vector_t _nvml_vector
static nvml_native_event_entry_tnvml_native_table
static int device_count = 0
static int num_events = 0
static nvmlDevice_t * devices
static int * features

Detailed Description

Author:
Kiran Kumar Kasichayanula kkasicha@utk.edu
James Ralph ralph@eecs.utk.edu

Definition in file linux-nvml.c.


Define Documentation

#define NVML_MAX_COUNTERS   100

Definition at line 45 of file linux-nvml.c.


Function Documentation

int _papi_nvml_ctl ( hwd_context_t ctx,
int  code,
_papi_int_option_t option 
)

This function sets various options in the component

Parameters:
codevalid are PAPI_SET_DEFDOM, PAPI_SET_DOMAIN, PAPI_SETDEFGRN, PAPI_SET_GRANUL and PAPI_SET_INHERIT

Definition at line 1084 of file linux-nvml.c.

{

        (void) ctx;
        (void) code;
        (void) option;

        SUBDBG( "nvml_ctl..." );

        /* FIXME.  This should maybe set up more state, such as which counters are active and */
        /*         counter mappings. */

        return PAPI_OK;
}
int _papi_nvml_init_component ( int  cidx)

Initialize hardware counters, setup the function vector table and get hardware information, this routine is called when the PAPI process is initialized (IE PAPI_library_init)

Definition at line 821 of file linux-nvml.c.

{
        nvmlReturn_t ret;
        cudaError_t cuerr;

        int cuda_count = 0;
        unsigned int nvml_count = 0;

        ret = nvmlInit();
        if ( NVML_SUCCESS != ret ) {
                strcpy(_nvml_vector.cmp_info.disabled_reason, "The NVIDIA managament library failed to initialize.");
                goto disable;
        }

        cuerr = cuInit( 0 );
        if ( CUDA_SUCCESS != cuerr ) {
                strcpy(_nvml_vector.cmp_info.disabled_reason, "The CUDA library failed to initialize.");
                goto disable;
        }

        /* Figure out the number of CUDA devices in the system */
        ret = nvmlDeviceGetCount( &nvml_count );
        if ( NVML_SUCCESS != ret ) {
                strcpy(_nvml_vector.cmp_info.disabled_reason, "Unable to get a count of devices from the NVIDIA managament library.");
                goto disable;
        }

        cuerr = cudaGetDeviceCount( &cuda_count );
        if ( CUDA_SUCCESS != cuerr ) {
                strcpy(_nvml_vector.cmp_info.disabled_reason, "Unable to get a device count from CUDA.");
                goto disable;
        }

        /* We can probably recover from this, when we're clever */
        if ( nvml_count != cuda_count ) {
                strcpy(_nvml_vector.cmp_info.disabled_reason, "Cuda and the NVIDIA managament library have different device counts.");
                goto disable;
        }

        device_count = cuda_count;

        /* A per device representation of what events are present */
        features = (int*)papi_malloc(sizeof(int) * device_count );

        /* Handles to each device */
        devices = (nvmlDevice_t*)papi_malloc(sizeof(nvmlDevice_t) * device_count);

        /* Figure out what events are supported on each card. */
        if ( (papi_errorcode = detectDevices( ) ) != PAPI_OK ) {
            papi_free(features);
            papi_free(devices);
            sprintf(_nvml_vector.cmp_info.disabled_reason, "An error occured in device feature detection, please check your NVIDIA Management Library and CUDA install." );
            goto disable;
        }

        /* The assumption is that if everything went swimmingly in detectDevices, 
            all nvml calls here should be fine. */
        createNativeEvents( );

        /* Export the total number of events available */
        _nvml_vector.cmp_info.num_native_events = num_events;

        /* Export the component id */
        _nvml_vector.cmp_info.CmpIdx = cidx;

        /* Export the number of 'counters' */
        _nvml_vector.cmp_info.num_cntrs = num_events;
        _nvml_vector.cmp_info.num_mpx_cntrs = num_events;

        return PAPI_OK;

disable:
        _nvml_vector.cmp_info.num_cntrs = 0;
        return PAPI_OK; 
}

Here is the call graph for this function:

Setup a counter control state. In general a control state holds the hardware info for an EventSet.

Definition at line 904 of file linux-nvml.c.

{
        SUBDBG( "nvml_init_control_state... %p\n", ctl );
        nvml_control_state_t *nvml_ctl = ( nvml_control_state_t * ) ctl;
        memset( nvml_ctl, 0, sizeof ( nvml_control_state_t ) );

        return PAPI_OK;
}

This is called whenever a thread is initialized

Definition at line 389 of file linux-nvml.c.

{
        (void) ctx;

        SUBDBG( "_papi_nvml_init %p...", ctx );

        return PAPI_OK;
}
int _papi_nvml_ntv_code_to_descr ( unsigned int  EventCode,
char *  descr,
int  len 
)

Takes a native event code and passes back the event description

Parameters:
EventCodeis the native event code
descris a pointer for the description to be copied to
lenis the size of the descr string

Definition at line 1212 of file linux-nvml.c.

{
        int index;
        index = EventCode;

        if (index >= num_events) return PAPI_ENOEVNT;

        strncpy( descr, nvml_native_table[index].description, len );

        return PAPI_OK;
}
int _papi_nvml_ntv_code_to_name ( unsigned int  EventCode,
char *  name,
int  len 
)

Takes a native event code and passes back the name

Parameters:
EventCodeis the native event code
nameis a pointer for the name to be copied to
lenis the size of the name string

Definition at line 1192 of file linux-nvml.c.

{
        int index;

        index = EventCode;

        /* Make sure we are in range */
        if (index >= num_events) return PAPI_ENOEVNT;

        strncpy( name, nvml_native_table[index].name, len );

        return PAPI_OK;
}
int _papi_nvml_ntv_enum_events ( unsigned int *  EventCode,
int  modifier 
)

Enumerate Native Events

Parameters:
EventCodeis the event of interest
modifieris one of PAPI_ENUM_FIRST, PAPI_ENUM_EVENTS If your component has attribute masks then these need to be handled here as well.

Definition at line 1151 of file linux-nvml.c.

{
        int index;

        switch ( modifier ) {

                /* return EventCode of first event */
                case PAPI_ENUM_FIRST:
                        /* return the first event that we support */

                        *EventCode = 0;
                        return PAPI_OK;

                        /* return EventCode of next available event */
                case PAPI_ENUM_EVENTS:
                        index = *EventCode;

                        /* Make sure we are in range */
                        if ( index < num_events - 1 ) {

                                /* This assumes a non-sparse mapping of the events */
                                *EventCode = *EventCode + 1;
                                return PAPI_OK;
                        } else {
                                return PAPI_ENOEVNT;
                        }
                        break;

                default:
                        return PAPI_EINVAL;
        }

        return PAPI_EINVAL;
}
int _papi_nvml_read ( hwd_context_t ctx,
hwd_control_state_t ctl,
long long **  events,
int  flags 
)

Triggered by PAPI_read()

Definition at line 985 of file linux-nvml.c.

{

        (void) ctx;
        (void) flags;
        int i;
        int ret;
        nvml_control_state_t* nvml_ctl = ( nvml_control_state_t*) ctl;   

        SUBDBG( "nvml_read... %p %d", ctx, flags );

        for (i=0;i<nvml_ctl->num_events;i++) {
                if ( PAPI_OK != 
                                ( ret = nvml_hardware_read( &nvml_ctl->counter[i], 
                                                            nvml_ctl->which_counter[i]) ))
                        return ret;

        }
        /* return pointer to the values we read */
        *events = nvml_ctl->counter;    
        return PAPI_OK;
}

Here is the call graph for this function:

Triggered by PAPI_reset() but only if the EventSet is currently running

Definition at line 1033 of file linux-nvml.c.

{
        (void) ctx;
        (void) ctl;

        SUBDBG( "nvml_reset ctx=%p ctrl=%p...", ctx, ctl );

        /* Reset the hardware */
        nvml_hardware_reset(  );

        return PAPI_OK;
}

Here is the call graph for this function:

int _papi_nvml_set_domain ( hwd_control_state_t cntrl,
int  domain 
)

This function has to set the bits needed to count different domains In particular: PAPI_DOM_USER, PAPI_DOM_KERNEL PAPI_DOM_OTHER By default return PAPI_EINVAL if none of those are specified and PAPI_OK with success PAPI_DOM_USER is only user context is counted PAPI_DOM_KERNEL is only the Kernel/OS context is counted PAPI_DOM_OTHER is Exception/transient mode (like user TLB misses) PAPI_DOM_ALL is all of the domains

Definition at line 1109 of file linux-nvml.c.

{
        (void) cntrl;

        int found = 0;
        SUBDBG( "nvml_set_domain..." );

        if ( PAPI_DOM_USER & domain ) {
                SUBDBG( " PAPI_DOM_USER " );
                found = 1;
        }
        if ( PAPI_DOM_KERNEL & domain ) {
                SUBDBG( " PAPI_DOM_KERNEL " );
                found = 1;
        }
        if ( PAPI_DOM_OTHER & domain ) {
                SUBDBG( " PAPI_DOM_OTHER " );
                found = 1;
        }
        if ( PAPI_DOM_ALL & domain ) {
                SUBDBG( " PAPI_DOM_ALL " );
                found = 1;
        }
        if ( !found )
                return ( PAPI_EINVAL );

        return PAPI_OK;
}

Triggered by PAPI_shutdown()

Definition at line 1048 of file linux-nvml.c.

{

        SUBDBG( "nvml_shutdown_component..." );

        papi_free(nvml_native_table);
        papi_free(devices);

        nvmlShutdown();

        device_count = 0;
        num_events = 0;

        return PAPI_OK;
}

Called at thread shutdown

Definition at line 1066 of file linux-nvml.c.

{

        (void) ctx;

        SUBDBG( "nvml_shutdown_thread... %p", ctx );

        /* Last chance to clean up thread */

        return PAPI_OK;
}

Triggered by PAPI_start()

Definition at line 943 of file linux-nvml.c.

{

        (void) ctx;
        (void) ctl;

        SUBDBG( "nvml_start %p %p...", ctx, ctl );
        /* anything that would need to be set at counter start time */

        /* reset */
        /* start the counting */

        return PAPI_OK;
}

Triggered by PAPI_stop()

Definition at line 961 of file linux-nvml.c.

{
        int i;
        (void) ctx;
        (void) ctl;
        int ret;
        SUBDBG( "nvml_stop %p %p...", ctx, ctl );

        nvml_control_state_t* nvml_ctl = ( nvml_control_state_t*) ctl;

        for (i=0;i<nvml_ctl->num_events;i++) {
                if ( PAPI_OK != 
                                ( ret = nvml_hardware_read( &nvml_ctl->counter[i], 
                                                            nvml_ctl->which_counter[i]) ))
                        return ret;

        }

        return PAPI_OK;
}

Here is the call graph for this function:

int _papi_nvml_update_control_state ( hwd_control_state_t ctl,
NativeInfo_t native,
int  count,
hwd_context_t ctx 
)

Triggered by eventset operations like add or remove

Definition at line 916 of file linux-nvml.c.

{
        int i, index;

        nvml_control_state_t *nvml_ctl = ( nvml_control_state_t * ) ctl;   
        (void) ctx;

        SUBDBG( "_papi_nvml_update_control_state %p %p...", ctl, ctx );

        /* if no events, return */
        if (count==0) return PAPI_OK;

        for( i = 0; i < count; i++ ) {
                index = native[i].ni_event;
                nvml_ctl->which_counter[i]=index;
                /* We have no constraints on event position, so any event */
                /* can be in any slot.                                    */
                native[i].ni_position = i;
        }
        nvml_ctl->num_events=count;
        return PAPI_OK;
}
int _papi_nvml_write ( hwd_context_t ctx,
hwd_control_state_t ctl,
long long *  events 
)

Triggered by PAPI_write(), but only if the counters are running

Definition at line 1012 of file linux-nvml.c.

{

        (void) ctx;
        (void) ctl;
        (void) events;

        SUBDBG( "nvml_write... %p %p", ctx, ctl );

        /* You can change ECC mode and compute exclusivity modes on the cards */
        /* But I don't see this as a function of a PAPI component at this time */
        /* All implementation issues aside. */
        return PAPI_OK;
}
static void createNativeEvents ( void  ) [static]

Definition at line 581 of file linux-nvml.c.

{
        char name[64];
        char sanitized_name[PAPI_MAX_STR_LEN];
        char names[device_count][64];

        int i, nameLen = 0, j;
        int isUnique = 1;

        nvml_native_event_entry_t* entry;
        nvmlReturn_t ret;

        nvml_native_table = (nvml_native_event_entry_t*) papi_malloc( 
                        sizeof(nvml_native_event_entry_t) * num_events );   
        memset( nvml_native_table, 0x0, sizeof(nvml_native_event_entry_t) * num_events );
        entry = &nvml_native_table[0];

        for (i=0; i < device_count; i++ ) {
                memset( names[i], 0x0, 64 );
                isUnique = 1;
                ret = nvmlDeviceGetName( devices[i], name, 64 );

                for (j=0; j < i; j++ ) 
                {
                        if ( 0 == strncmp( name, names[j], 64 ) )
                                isUnique = 0;
                }

                if ( isUnique ) {
                        nameLen = strlen(name);
                        strncpy(sanitized_name, name, PAPI_MAX_STR_LEN );
                        for (j=0; j < nameLen; j++)
                                if ( ' ' == sanitized_name[j] )
                                        sanitized_name[j] = '_';



                        if ( HAS_FEATURE( features[i], FEATURE_CLOCK_INFO ) ) {
                                sprintf( entry->name, "%s:graphics_clock", sanitized_name );
                                strncpy(entry->description,"Graphics clock domain (MHz).", PAPI_MAX_STR_LEN );
                                entry->options.clock = NVML_CLOCK_GRAPHICS;
                                entry->type = FEATURE_CLOCK_INFO;
                                entry++;

                                sprintf( entry->name, "%s:sm_clock", sanitized_name);
                                strncpy(entry->description,"SM clock domain (MHz).", PAPI_MAX_STR_LEN);
                                entry->options.clock = NVML_CLOCK_SM;
                                entry->type = FEATURE_CLOCK_INFO;
                                entry++;

                                sprintf( entry->name, "%s:memory_clock", sanitized_name);
                                strncpy(entry->description,"Memory clock domain (MHz).", PAPI_MAX_STR_LEN);
                                entry->options.clock = NVML_CLOCK_MEM;
                                entry->type = FEATURE_CLOCK_INFO;
                                entry++;
                        }   

                        if ( HAS_FEATURE( features[i], FEATURE_ECC_LOCAL_ERRORS ) ) { 
                                sprintf(entry->name, "%s:l1_single_ecc_errors", sanitized_name);
                                strncpy(entry->description,"L1 cache single bit ECC", PAPI_MAX_STR_LEN);
                                entry->options.ecc_opts = (struct local_ecc){
                                        .bits = NVML_SINGLE_BIT_ECC,
                                                .which_one = LOCAL_ECC_L1,
                                };
                                entry->type = FEATURE_ECC_LOCAL_ERRORS;
                                entry++;

                                sprintf(entry->name, "%s:l2_single_ecc_errors", sanitized_name);
                                strncpy(entry->description,"L2 cache single bit ECC", PAPI_MAX_STR_LEN);
                                entry->options.ecc_opts = (struct local_ecc){
                                        .bits = NVML_SINGLE_BIT_ECC,
                                                .which_one = LOCAL_ECC_L2,
                                };
                                entry->type = FEATURE_ECC_LOCAL_ERRORS;
                                entry++;

                                sprintf(entry->name, "%s:memory_single_ecc_errors", sanitized_name);
                                strncpy(entry->description,"Device memory single bit ECC", PAPI_MAX_STR_LEN);
                                entry->options.ecc_opts = (struct local_ecc){
                                        .bits = NVML_SINGLE_BIT_ECC,
                                                .which_one = LOCAL_ECC_MEM,
                                };
                                entry->type = FEATURE_ECC_LOCAL_ERRORS;
                                entry++;

                                sprintf(entry->name, "%s:regfile_single_ecc_errors", sanitized_name);
                                strncpy(entry->description,"Register file single bit ECC", PAPI_MAX_STR_LEN);
                                entry->options.ecc_opts = (struct local_ecc){
                                        .bits = NVML_SINGLE_BIT_ECC,
                                                .which_one = LOCAL_ECC_REGFILE,
                                };
                                entry->type = FEATURE_ECC_LOCAL_ERRORS;
                                entry++;

                                sprintf(entry->name, "%s:1l_double_ecc_errors", sanitized_name);
                                strncpy(entry->description,"L1 cache double bit ECC", PAPI_MAX_STR_LEN);
                                entry->options.ecc_opts = (struct local_ecc){
                                        .bits = NVML_DOUBLE_BIT_ECC,
                                                .which_one = LOCAL_ECC_L1,
                                };
                                entry->type = FEATURE_ECC_LOCAL_ERRORS;
                                entry++;

                                sprintf(entry->name, "%s:l2_double_ecc_errors", sanitized_name);
                                strncpy(entry->description,"L2 cache double bit ECC", PAPI_MAX_STR_LEN);
                                entry->options.ecc_opts = (struct local_ecc){
                                        .bits = NVML_DOUBLE_BIT_ECC,
                                                .which_one = LOCAL_ECC_L2,
                                };
                                entry->type = FEATURE_ECC_LOCAL_ERRORS;
                                entry++;

                                sprintf(entry->name, "%s:memory_double_ecc_errors", sanitized_name);
                                strncpy(entry->description,"Device memory double bit ECC", PAPI_MAX_STR_LEN);
                                entry->options.ecc_opts = (struct local_ecc){
                                        .bits = NVML_DOUBLE_BIT_ECC,
                                                .which_one = LOCAL_ECC_MEM,
                                };
                                entry->type = FEATURE_ECC_LOCAL_ERRORS;
                                entry++;

                                sprintf(entry->name, "%s:regfile_double_ecc_errors", sanitized_name);
                                strncpy(entry->description,"Register file double bit ECC", PAPI_MAX_STR_LEN);
                                entry->options.ecc_opts = (struct local_ecc){
                                        .bits = NVML_DOUBLE_BIT_ECC,
                                                .which_one = LOCAL_ECC_REGFILE,
                                };
                                entry->type = FEATURE_ECC_LOCAL_ERRORS;
                                entry++;
                        }

                        if ( HAS_FEATURE( features[i], FEATURE_FAN_SPEED ) ) {
                                sprintf( entry->name, "%s:fan_speed", sanitized_name);
                                strncpy(entry->description,"The fan speed expressed as a percent of the maximum, i.e. full speed is 100%", PAPI_MAX_STR_LEN);
                                entry->type = FEATURE_FAN_SPEED;
                                entry++;
                        }

                        if ( HAS_FEATURE( features[i], FEATURE_MAX_CLOCK ) ) {
                                sprintf( entry->name, "%s:graphics_max_clock", sanitized_name);
                                strncpy(entry->description,"Maximal Graphics clock domain (MHz).", PAPI_MAX_STR_LEN);
                                entry->options.clock = NVML_CLOCK_GRAPHICS;
                                entry->type = FEATURE_MAX_CLOCK;
                                entry++;

                                sprintf( entry->name, "%s:sm_max_clock", sanitized_name);
                                strncpy(entry->description,"Maximal SM clock domain (MHz).", PAPI_MAX_STR_LEN);
                                entry->options.clock = NVML_CLOCK_SM;
                                entry->type = FEATURE_MAX_CLOCK;
                                entry++;

                                sprintf( entry->name, "%s:memory_max_clock", sanitized_name);
                                strncpy(entry->description,"Maximal Memory clock domain (MHz).", PAPI_MAX_STR_LEN);
                                entry->options.clock = NVML_CLOCK_MEM;
                                entry->type = FEATURE_MAX_CLOCK;
                                entry++;
                        }

                        if ( HAS_FEATURE( features[i], FEATURE_MEMORY_INFO ) ) {
                                sprintf( entry->name, "%s:total_memory", sanitized_name);
                                strncpy(entry->description,"Total installed FB memory (in bytes).", PAPI_MAX_STR_LEN);
                                entry->options.which_one = MEMINFO_TOTAL_MEMORY;
                                entry->type = FEATURE_MEMORY_INFO;
                                entry++;

                                sprintf( entry->name, "%s:unallocated_memory", sanitized_name);
                                strncpy(entry->description,"Uncallocated FB memory (in bytes).", PAPI_MAX_STR_LEN);
                                entry->options.which_one = MEMINFO_UNALLOCED;
                                entry->type = FEATURE_MEMORY_INFO;
                                entry++;

                                sprintf( entry->name, "%s:allocated_memory", sanitized_name);
                                strncpy(entry->description, "Allocated FB memory (in bytes). Note that the driver/GPU always sets aside a small amount of memory for bookkeeping.", PAPI_MAX_STR_LEN);
                                entry->options.which_one = MEMINFO_ALLOCED;
                                entry->type = FEATURE_MEMORY_INFO;
                                entry++;
                        }

                        if ( HAS_FEATURE( features[i], FEATURE_PERF_STATES ) ) {
                                sprintf( entry->name, "%s:pstate", sanitized_name);
                                strncpy(entry->description,"The performance state of the device.", PAPI_MAX_STR_LEN);
                                entry->type = FEATURE_PERF_STATES;
                                entry++;
                        }

                        if ( HAS_FEATURE( features[i], FEATURE_POWER ) ) {
                                sprintf( entry->name, "%s:power", sanitized_name);
                                strncpy(entry->description,"Power usage reading for the device, in miliwatts. This is the power draw for the entire board, including GPU, memory, etc.\n The reading is accurate to within a range of +/-5 watts.", PAPI_MAX_STR_LEN);
                                entry->type = FEATURE_POWER;
                                entry++;
                        }

                        if ( HAS_FEATURE( features[i], FEATURE_TEMP ) ) {
                                sprintf( entry->name, "%s:temperature", sanitized_name);
                                strncpy(entry->description,"Current temperature readings for the device, in degrees C.", PAPI_MAX_STR_LEN);
                                entry->type = FEATURE_TEMP;
                                entry++;
                        }

                        if ( HAS_FEATURE( features[i], FEATURE_ECC_TOTAL_ERRORS ) ) {
                                sprintf( entry->name, "%s:total_ecc_errors", sanitized_name);
                                strncpy(entry->description,"Total single bit errors.", PAPI_MAX_STR_LEN);
                                entry->options.ecc_opts = (struct local_ecc){ 
                                        .bits = NVML_SINGLE_BIT_ECC, 
                                };
                                entry->type = FEATURE_ECC_TOTAL_ERRORS;
                                entry++;

                                sprintf( entry->name, "%s:total_ecc_errors", sanitized_name);
                                strncpy(entry->description,"Total double bit errors.", PAPI_MAX_STR_LEN);
                                entry->options.ecc_opts = (struct local_ecc){ 
                                        .bits = NVML_DOUBLE_BIT_ECC, 
                                };
                                entry->type = FEATURE_ECC_TOTAL_ERRORS;
                                entry++;
                        }

                        if ( HAS_FEATURE( features[i], FEATURE_UTILIZATION ) ) {
                                sprintf( entry->name, "%s:gpu_utilization", sanitized_name);
                                strncpy(entry->description,"Percent of time over the past second during which one or more kernels was executing on the GPU.", PAPI_MAX_STR_LEN);
                                entry->options.which_one = GPU_UTILIZATION;
                                entry->type = FEATURE_UTILIZATION;
                                entry++;

                                sprintf( entry->name, "%s:memory_utilization", sanitized_name);
                                strncpy(entry->description,"Percent of time over the past second during which global (device) memory was being read or written.", PAPI_MAX_STR_LEN);
                                entry->options.which_one = MEMORY_UTILIZATION;
                                entry->type = FEATURE_UTILIZATION;
                                entry++;
                        }
                        strncpy( names[i], name, 64); 
                }
        }
}

Here is the caller graph for this function:

static int detectDevices ( ) [static]

Definition at line 399 of file linux-nvml.c.

{
        nvmlReturn_t ret;
        nvmlEnableState_t mode = NVML_FEATURE_DISABLED;
        nvmlDevice_t handle;
        nvmlPciInfo_t info;

        cudaError_t cuerr;

        char busId[16];
        char name[64];
        char inforomECC[16];
        char inforomPower[16];
        char names[device_count][64];
        char nvml_busIds[device_count][16];

        float ecc_version = 0.0, power_version = 0.0;

        int i = 0,
            j = 0;
        int isTesla = 0;
        int isFermi = 0;
        int isUnique = 1;

        unsigned int temp = 0;


        /* list of nvml pci_busids */
    for (i=0; i < device_count; i++) {
        ret = nvmlDeviceGetHandleByIndex( i, &handle ); 
        if ( NVML_SUCCESS != ret ) {
            SUBDBG("nvmlDeviceGetHandleByIndex(%d) failed\n", i);
            return PAPI_ESYS;
        }

        ret = nvmlDeviceGetPciInfo( handle, &info );
        if ( NVML_SUCCESS != ret ) {
            SUBDBG("nvmlDeviceGetPciInfo() failed %s\n", nvmlErrorString(ret) );
            return PAPI_ESYS;
        }

        strncpy(nvml_busIds[i], info.busId, 16);
        
    }

    /* We want to key our list of nvmlDevice_ts by each device's cuda index */
    for (i=0; i < device_count; i++) {
            cuerr = cudaDeviceGetPCIBusId( busId, 16, i );
            if ( CUDA_SUCCESS != cuerr ) {
                SUBDBG("cudaDeviceGetPCIBusId failed.\n");
                return PAPI_ESYS;
            }
            for (j=0; j < device_count; j++ ) {
                    if ( !strncmp( busId, nvml_busIds[j], 16) ) {
                            ret = nvmlDeviceGetHandleByIndex(j, &devices[i] );
                            if ( NVML_SUCCESS != ret ) {
                                SUBDBG("nvmlDeviceGetHandleByIndex(%d, &devices[%d]) failed.\n", j, i);
                            }
                                return PAPI_ESYS;
                            break;
                    }
            }   
    }

        memset(names, 0x0, device_count*64);
        /* So for each card, check whats querable */
        for (i=0; i < device_count; i++ ) {
                isTesla=0;
                isFermi=1;
                isUnique = 1;
                features[i] = 0;

                ret = nvmlDeviceGetName( devices[i], name, 64 );
                if ( NVML_SUCCESS != ret) {
                    SUBDBG("nvmlDeviceGetName failed \n");
                    return PAPI_ESYS;
                }

                for (j=0; j < i; j++ ) 
                        if ( 0 == strncmp( name, names[j], 64 ) ) {
                                /* if we have a match, and IF everything is sane, 
                                 * devices with the same name eg Tesla C2075 share features */
                                isUnique = 0;
                                features[i] = features[j];

                        }

                if ( isUnique ) {
                        ret = nvmlDeviceGetInforomVersion( devices[i], NVML_INFOROM_ECC, inforomECC, 16);
                        if ( NVML_SUCCESS != ret ) {
                                SUBDBG("nvmlGetInforomVersion carps %s\n", nvmlErrorString(ret ) );
                                isFermi = 0;
                        }
                        ret = nvmlDeviceGetInforomVersion( devices[i], NVML_INFOROM_POWER, inforomPower, 16);
                        if ( NVML_SUCCESS != ret ) {
                                /* This implies the card is older then Fermi */
                                SUBDBG("nvmlGetInforomVersion carps %s\n", nvmlErrorString(ret ) );
                                SUBDBG("Based upon the return to nvmlGetInforomVersion, we conclude this card is older then Fermi.\n");
                                isFermi = 0;
                        } 

                        ecc_version = strtof(inforomECC, NULL );
                        power_version = strtof( inforomPower, NULL);

                        ret = nvmlDeviceGetName( devices[i], name, 64 );
                        isTesla = ( NULL == strstr(name, "Tesla") ) ? 0:1;

                        /* For Tesla and Quadro products from Fermi and Kepler families. */
                        if ( isFermi ) {
                                features[i] |= FEATURE_CLOCK_INFO;
                                num_events += 3;
                        }

                        /*  For Tesla and Quadro products from Fermi and Kepler families. 
                            requires NVML_INFOROM_ECC 2.0 or higher for location-based counts
                            requires NVML_INFOROM_ECC 1.0 or higher for all other ECC counts
                            requires ECC mode to be enabled. */
                        if ( isFermi ) {
                                ret = nvmlDeviceGetEccMode( devices[i], &mode, NULL );
                                if ( NVML_FEATURE_ENABLED == mode) {
                                        if ( ecc_version >= 2.0 ) {
                                                features[i] |= FEATURE_ECC_LOCAL_ERRORS;
                                                num_events += 8; /* {single bit, two bit errors} x { reg, l1, l2, memory } */
                                        } 
                                        if ( ecc_version >= 1.0 ) {
                                                features[i] |= FEATURE_ECC_TOTAL_ERRORS;
                                                num_events += 2; /* single bit errors, double bit errors */
                                        }
                                }   
                        }

                        /* For all discrete products with dedicated fans */
                        features[i] |= FEATURE_FAN_SPEED;
                        num_events++;

                        /* For Tesla and Quadro products from Fermi and Kepler families. */
                        if ( isFermi ) {
                                features[i] |= FEATURE_MAX_CLOCK;
                                num_events += 3;
                        }

                        /* For all products */
                        features[i] |= FEATURE_MEMORY_INFO;
                        num_events += 3; /* total, free, used */

                        /* For Tesla and Quadro products from the Fermi and Kepler families. */
                        if ( isFermi ) {
                                features[i] |= FEATURE_PERF_STATES;
                                num_events++;
                        }

                        /*  For "GF11x" Tesla and Quadro products from the Fermi family
                            requires NVML_INFOROM_POWER 3.0 or higher
                            For Tesla and Quadro products from the Kepler family
                            does not require NVML_INFOROM_POWER */
                        if ( isFermi ) {
                                ret = nvmlDeviceGetPowerUsage( devices[i], &temp);
                                if ( NVML_SUCCESS == ret ) {
                                        features[i] |= FEATURE_POWER;
                                        num_events++;
                                }
                        }

                        /* For all discrete and S-class products. */
                        features[i] |= FEATURE_TEMP;
                        num_events++;

                        /* For Tesla and Quadro products from the Fermi and Kepler families */
                        if (isFermi) {
                                features[i] |= FEATURE_UTILIZATION;
                                num_events += 2;
                        }

                        strncpy( names[i], name, 64); 

                }
        }
        return PAPI_OK;

}

Here is the caller graph for this function:

unsigned long long getClockSpeed ( nvmlDevice_t  dev,
nvmlClockType_t  which_one 
)

Definition at line 76 of file linux-nvml.c.

{
        unsigned int ret = 0;
        nvmlReturn_t bad; 
        bad = nvmlDeviceGetClockInfo( dev, which_one, &ret );

        if ( NVML_SUCCESS != bad ) {
                SUBDBG( "something went wrong %s\n", nvmlErrorString(bad));
        }

        return (unsigned long long)ret;
}

Here is the caller graph for this function:

unsigned long long getEccLocalErrors ( nvmlDevice_t  dev,
nvmlEccBitType_t  bits,
int  which_one 
)

Definition at line 90 of file linux-nvml.c.

{
        nvmlEccErrorCounts_t counts;

        nvmlReturn_t bad; 
        bad = nvmlDeviceGetDetailedEccErrors( dev, bits, NVML_VOLATILE_ECC , &counts);

        if ( NVML_SUCCESS != bad ) {
                SUBDBG( "something went wrong %s\n", nvmlErrorString(bad));
        }


        switch ( which_one ) {
                case LOCAL_ECC_REGFILE:
                        return counts.registerFile;
                case LOCAL_ECC_L1:
                        return counts.l1Cache;
                case LOCAL_ECC_L2:
                        return counts.l2Cache;
                case LOCAL_ECC_MEM:
                        return counts.deviceMemory;
                default:
                        ;
        }
        return (unsigned long long)-1;
}

Here is the caller graph for this function:

unsigned long long getFanSpeed ( nvmlDevice_t  dev)

Definition at line 118 of file linux-nvml.c.

{
        unsigned int ret = 0;
        nvmlReturn_t bad; 
        bad = nvmlDeviceGetFanSpeed( dev, &ret );

        if ( NVML_SUCCESS != bad ) {
                SUBDBG( "something went wrong %s\n", nvmlErrorString(bad));
        }


        return (unsigned long long)ret; 
}

Here is the caller graph for this function:

unsigned long long getMaxClockSpeed ( nvmlDevice_t  dev,
nvmlClockType_t  which_one 
)

Definition at line 133 of file linux-nvml.c.

{
        unsigned int ret = 0;
        nvmlReturn_t bad; 
        bad = nvmlDeviceGetClockInfo( dev, which_one, &ret );

        if ( NVML_SUCCESS != bad ) {
                SUBDBG( "something went wrong %s\n", nvmlErrorString(bad));
        }


        return (unsigned long long) ret;
}

Here is the caller graph for this function:

unsigned long long getMemoryInfo ( nvmlDevice_t  dev,
int  which_one 
)

Definition at line 148 of file linux-nvml.c.

{
        nvmlMemory_t meminfo;
        nvmlReturn_t bad; 
        bad = nvmlDeviceGetMemoryInfo( dev, &meminfo );

        if ( NVML_SUCCESS != bad ) {
                SUBDBG( "something went wrong %s\n", nvmlErrorString(bad));
        }

        switch (which_one) {
                case MEMINFO_TOTAL_MEMORY:
                        return meminfo.total;
                case MEMINFO_UNALLOCED:
                        return meminfo.free;
                case MEMINFO_ALLOCED:
                        return meminfo.used;
                default:
                        ;
        }
        return (unsigned long long)-1;
}

Here is the caller graph for this function:

unsigned long long getPowerUsage ( nvmlDevice_t  dev)

Definition at line 228 of file linux-nvml.c.

{
        unsigned int power;
        nvmlReturn_t bad; 
        bad = nvmlDeviceGetPowerUsage( dev, &power );

        if ( NVML_SUCCESS != bad ) {
                SUBDBG( "something went wrong %s\n", nvmlErrorString(bad));
        }


        return (unsigned long long) power;
}

Here is the caller graph for this function:

unsigned long long getPState ( nvmlDevice_t  dev)

Definition at line 172 of file linux-nvml.c.

{
        unsigned int ret = 0;
        nvmlPstates_t state = NVML_PSTATE_15;
        nvmlReturn_t bad; 
        bad = nvmlDeviceGetPerformanceState( dev, &state );

        if ( NVML_SUCCESS != bad ) {
                SUBDBG( "something went wrong %s\n", nvmlErrorString(bad));
        }


        switch ( state ) {
                case NVML_PSTATE_15:
                        ret++;
                case NVML_PSTATE_14:
                        ret++;
                case NVML_PSTATE_13:
                        ret++;
                case NVML_PSTATE_12:
                        ret++;
                case NVML_PSTATE_11:
                        ret++;
                case NVML_PSTATE_10:
                        ret++;
                case NVML_PSTATE_9:
                        ret++;
                case NVML_PSTATE_8:
                        ret++;
                case NVML_PSTATE_7:
                        ret++;
                case NVML_PSTATE_6:
                        ret++;
                case NVML_PSTATE_5:
                        ret++;
                case NVML_PSTATE_4:
                        ret++;
                case NVML_PSTATE_3:
                        ret++;
                case NVML_PSTATE_2:
                        ret++;
                case NVML_PSTATE_1:
                        ret++;
                case NVML_PSTATE_0:
                        break;
                case NVML_PSTATE_UNKNOWN:
                default:
                        /* This should never happen? 
                         * The API docs just state Unknown performance state... */
                        return (unsigned long long) -1;
        }

        return (unsigned long long)ret;
}

Here is the caller graph for this function:

unsigned long long getTemperature ( nvmlDevice_t  dev)

Definition at line 243 of file linux-nvml.c.

{
        unsigned int ret = 0;
        nvmlReturn_t bad; 
        bad = nvmlDeviceGetTemperature( dev, NVML_TEMPERATURE_GPU, &ret );

        if ( NVML_SUCCESS != bad ) {
                SUBDBG( "something went wrong %s\n", nvmlErrorString(bad));
        }


        return (unsigned long long)ret;
}

Here is the caller graph for this function:

unsigned long long getTotalEccErrors ( nvmlDevice_t  dev,
nvmlEccBitType_t  bits 
)

Definition at line 258 of file linux-nvml.c.

{
        unsigned long long counts = 0;
        nvmlReturn_t bad; 
        bad = nvmlDeviceGetTotalEccErrors( dev, bits, NVML_VOLATILE_ECC , &counts);

        if ( NVML_SUCCESS != bad ) {
                SUBDBG( "something went wrong %s\n", nvmlErrorString(bad));
        }


        return counts;
}

Here is the caller graph for this function:

unsigned long long getUtilization ( nvmlDevice_t  dev,
int  which_one 
)

Definition at line 276 of file linux-nvml.c.

{
        nvmlUtilization_t util;
        nvmlReturn_t bad; 
        bad = nvmlDeviceGetUtilizationRates( dev, &util );

        if ( NVML_SUCCESS != bad ) {
                SUBDBG( "something went wrong %s\n", nvmlErrorString(bad));
        }


        switch (which_one) {
                case GPU_UTILIZATION:
                        return (unsigned long long) util.gpu;
                case MEMORY_UTILIZATION:
                        return (unsigned long long) util.memory;
                default:
                        ;
        }

        return (unsigned long long) -1;
}

Here is the caller graph for this function:

static int nvml_hardware_read ( long long *  value,
int  which_one 
) [static]

Code that reads event values.

Definition at line 315 of file linux-nvml.c.

{
        nvml_native_event_entry_t *entry;
        nvmlDevice_t handle;
        int cudaIdx = -1;

        entry = &nvml_native_table[which_one];
        *value = (long long) -1;
        /* replace entry->resources with the current cuda_device->nvml device */
        cudaGetDevice( &cudaIdx );

        if ( cudaIdx < 0 || cudaIdx > device_count )
            return PAPI_EINVAL;

        /* Make sure the device we are running on has the requested event */
        if ( !HAS_FEATURE( features[cudaIdx] , entry->type) ) 
                return PAPI_EINVAL;

        handle = devices[cudaIdx];

        switch (entry->type) {
                case FEATURE_CLOCK_INFO:
                        *value =  getClockSpeed(    handle, 
                                        (nvmlClockType_t)entry->options.clock );
                        break;
                case FEATURE_ECC_LOCAL_ERRORS:
                        *value = getEccLocalErrors(     handle, 
                                        (nvmlEccBitType_t)entry->options.ecc_opts.bits, 
                                        (int)entry->options.ecc_opts.which_one);
                        break;
                case FEATURE_FAN_SPEED:
                        *value = getFanSpeed( handle );
                        break;
                case FEATURE_MAX_CLOCK:
                        *value = getMaxClockSpeed(  handle, 
                                        (nvmlClockType_t)entry->options.clock );
                        break;
                case FEATURE_MEMORY_INFO:
                        *value = getMemoryInfo(     handle, 
                                        (int)entry->options.which_one );
                        break;
                case FEATURE_PERF_STATES:
                        *value = getPState( handle );
                        break;
                case FEATURE_POWER:
                        *value = getPowerUsage( handle );
                        break;
                case FEATURE_TEMP:
                        *value = getTemperature( handle );
                        break;
                case FEATURE_ECC_TOTAL_ERRORS:
                        *value = getTotalEccErrors(     handle, 
                                        (nvmlEccBitType_t)entry->options.ecc_opts.bits );
                        break;
                case FEATURE_UTILIZATION:
                        *value = getUtilization(    handle, 
                                        (int)entry->options.which_one );
                        break;
                default:
                        return PAPI_EINVAL;
        }

        return PAPI_OK;


}

Here is the call graph for this function:

Here is the caller graph for this function:

static void nvml_hardware_reset ( ) [static]

Definition at line 300 of file linux-nvml.c.

{
        /* nvmlDeviceSet* and nvmlDeviceClear* calls require root/admin access, so while 
         * possible to implement a reset on the ECC counters, we pass */
        /* 
           int i;
           for ( i=0; i < device_count; i++ )
           nvmlDeviceClearEccErrorCounts( device[i], NVML_VOLATILE_ECC ); 
         */
}

Here is the caller graph for this function:


Variable Documentation

Vector that points to entry points for our component

Definition at line 42 of file linux-nvml.c.

int device_count = 0 [static]

Number of devices detected at component_init time

Definition at line 67 of file linux-nvml.c.

nvmlDevice_t* devices [static]

Definition at line 72 of file linux-nvml.c.

int* features [static]

Definition at line 73 of file linux-nvml.c.

int num_events = 0 [static]

number of events in the table

Definition at line 70 of file linux-nvml.c.

This table contains the native events

Definition at line 64 of file linux-nvml.c.

 All Data Structures Files Functions Variables Typedefs Enumerations Enumerator Defines