PAPI  5.0.1.0
linux-nvml.c
Go to the documentation of this file.
00001 /****************************/
00002 /* THIS IS OPEN SOURCE CODE */
00003 /****************************/
00004 
00022 #include <stdio.h>
00023 #include <string.h>
00024 #include <stdlib.h>
00025 #include <inttypes.h>
00026 #include <string.h>
00027 #include <nvml.h>
00028 /* Headers required by PAPI */
00029 #include "papi.h"
00030 #include "papi_internal.h"
00031 #include "papi_vector.h"
00032 #include "papi_memory.h"
00033 
00034 #include "linux-nvml.h"
00035 
00036 #include "nvml.h"
00037 #include "cuda.h"
00038 #include "cuda_runtime_api.h"
00039 
00040 
00041 /* Declare our vector in advance */
00042 papi_vector_t _nvml_vector;
00043 
00044 /* upto 25 events per card how many cards per system should we allow for?! */
00045 #define NVML_MAX_COUNTERS 100
00046 
00050 typedef struct nvml_control_state
00051 {
00052         int num_events;
00053         int which_counter[NVML_MAX_COUNTERS];
00054         long long counter[NVML_MAX_COUNTERS];   
00055 } nvml_control_state_t;
00056 
00058 typedef struct nvml_context
00059 {
00060         nvml_control_state_t state;
00061 } nvml_context_t;
00062 
00064 static nvml_native_event_entry_t *nvml_native_table;
00065 
00067 static int device_count = 0;
00068 
00070 static int num_events = 0;
00071 
00072 static nvmlDevice_t* devices;
00073 static int*          features;
00074 
00075 unsigned long long
00076 getClockSpeed( nvmlDevice_t dev, nvmlClockType_t which_one )
00077 {
00078         unsigned int ret = 0;
00079         nvmlReturn_t bad; 
00080         bad = nvmlDeviceGetClockInfo( dev, which_one, &ret );
00081 
00082         if ( NVML_SUCCESS != bad ) {
00083                 SUBDBG( "something went wrong %s\n", nvmlErrorString(bad));
00084         }
00085 
00086         return (unsigned long long)ret;
00087 }
00088 
00089         unsigned long long
00090 getEccLocalErrors( nvmlDevice_t dev, nvmlEccBitType_t bits, int which_one)
00091 {
00092         nvmlEccErrorCounts_t counts;
00093 
00094         nvmlReturn_t bad; 
00095         bad = nvmlDeviceGetDetailedEccErrors( dev, bits, NVML_VOLATILE_ECC , &counts);
00096 
00097         if ( NVML_SUCCESS != bad ) {
00098                 SUBDBG( "something went wrong %s\n", nvmlErrorString(bad));
00099         }
00100 
00101 
00102         switch ( which_one ) {
00103                 case LOCAL_ECC_REGFILE:
00104                         return counts.registerFile;
00105                 case LOCAL_ECC_L1:
00106                         return counts.l1Cache;
00107                 case LOCAL_ECC_L2:
00108                         return counts.l2Cache;
00109                 case LOCAL_ECC_MEM:
00110                         return counts.deviceMemory;
00111                 default:
00112                         ;
00113         }
00114         return (unsigned long long)-1;
00115 }
00116 
00117         unsigned long long 
00118 getFanSpeed( nvmlDevice_t dev ) 
00119 {
00120         unsigned int ret = 0;
00121         nvmlReturn_t bad; 
00122         bad = nvmlDeviceGetFanSpeed( dev, &ret );
00123 
00124         if ( NVML_SUCCESS != bad ) {
00125                 SUBDBG( "something went wrong %s\n", nvmlErrorString(bad));
00126         }
00127 
00128 
00129         return (unsigned long long)ret; 
00130 }
00131 
00132         unsigned long long
00133 getMaxClockSpeed( nvmlDevice_t dev, nvmlClockType_t which_one)
00134 {
00135         unsigned int ret = 0;
00136         nvmlReturn_t bad; 
00137         bad = nvmlDeviceGetClockInfo( dev, which_one, &ret );
00138 
00139         if ( NVML_SUCCESS != bad ) {
00140                 SUBDBG( "something went wrong %s\n", nvmlErrorString(bad));
00141         }
00142 
00143 
00144         return (unsigned long long) ret;
00145 }
00146 
00147         unsigned long long
00148 getMemoryInfo( nvmlDevice_t dev, int which_one )
00149 {
00150         nvmlMemory_t meminfo;
00151         nvmlReturn_t bad; 
00152         bad = nvmlDeviceGetMemoryInfo( dev, &meminfo );
00153 
00154         if ( NVML_SUCCESS != bad ) {
00155                 SUBDBG( "something went wrong %s\n", nvmlErrorString(bad));
00156         }
00157 
00158         switch (which_one) {
00159                 case MEMINFO_TOTAL_MEMORY:
00160                         return meminfo.total;
00161                 case MEMINFO_UNALLOCED:
00162                         return meminfo.free;
00163                 case MEMINFO_ALLOCED:
00164                         return meminfo.used;
00165                 default:
00166                         ;
00167         }
00168         return (unsigned long long)-1;
00169 }
00170 
00171         unsigned long long
00172 getPState( nvmlDevice_t dev ) 
00173 {
00174         unsigned int ret = 0;
00175         nvmlPstates_t state = NVML_PSTATE_15;
00176         nvmlReturn_t bad; 
00177         bad = nvmlDeviceGetPerformanceState( dev, &state );
00178 
00179         if ( NVML_SUCCESS != bad ) {
00180                 SUBDBG( "something went wrong %s\n", nvmlErrorString(bad));
00181         }
00182 
00183 
00184         switch ( state ) {
00185                 case NVML_PSTATE_15:
00186                         ret++;
00187                 case NVML_PSTATE_14:
00188                         ret++;
00189                 case NVML_PSTATE_13:
00190                         ret++;
00191                 case NVML_PSTATE_12:
00192                         ret++;
00193                 case NVML_PSTATE_11:
00194                         ret++;
00195                 case NVML_PSTATE_10:
00196                         ret++;
00197                 case NVML_PSTATE_9:
00198                         ret++;
00199                 case NVML_PSTATE_8:
00200                         ret++;
00201                 case NVML_PSTATE_7:
00202                         ret++;
00203                 case NVML_PSTATE_6:
00204                         ret++;
00205                 case NVML_PSTATE_5:
00206                         ret++;
00207                 case NVML_PSTATE_4:
00208                         ret++;
00209                 case NVML_PSTATE_3:
00210                         ret++;
00211                 case NVML_PSTATE_2:
00212                         ret++;
00213                 case NVML_PSTATE_1:
00214                         ret++;
00215                 case NVML_PSTATE_0:
00216                         break;
00217                 case NVML_PSTATE_UNKNOWN:
00218                 default:
00219                         /* This should never happen? 
00220                          * The API docs just state Unknown performance state... */
00221                         return (unsigned long long) -1;
00222         }
00223 
00224         return (unsigned long long)ret;
00225 }
00226 
00227         unsigned long long
00228 getPowerUsage( nvmlDevice_t dev )
00229 {
00230         unsigned int power;
00231         nvmlReturn_t bad; 
00232         bad = nvmlDeviceGetPowerUsage( dev, &power );
00233 
00234         if ( NVML_SUCCESS != bad ) {
00235                 SUBDBG( "something went wrong %s\n", nvmlErrorString(bad));
00236         }
00237 
00238 
00239         return (unsigned long long) power;
00240 }
00241 
00242         unsigned long long
00243 getTemperature( nvmlDevice_t dev )
00244 {
00245         unsigned int ret = 0;
00246         nvmlReturn_t bad; 
00247         bad = nvmlDeviceGetTemperature( dev, NVML_TEMPERATURE_GPU, &ret );
00248 
00249         if ( NVML_SUCCESS != bad ) {
00250                 SUBDBG( "something went wrong %s\n", nvmlErrorString(bad));
00251         }
00252 
00253 
00254         return (unsigned long long)ret;
00255 }
00256 
00257         unsigned long long
00258 getTotalEccErrors( nvmlDevice_t dev, nvmlEccBitType_t bits) 
00259 {
00260         unsigned long long counts = 0;
00261         nvmlReturn_t bad; 
00262         bad = nvmlDeviceGetTotalEccErrors( dev, bits, NVML_VOLATILE_ECC , &counts);
00263 
00264         if ( NVML_SUCCESS != bad ) {
00265                 SUBDBG( "something went wrong %s\n", nvmlErrorString(bad));
00266         }
00267 
00268 
00269         return counts;
00270 }
00271 
00272 /*  0 => gpu util
00273     1 => memory util
00274  */
00275         unsigned long long
00276 getUtilization( nvmlDevice_t dev, int which_one )
00277 {
00278         nvmlUtilization_t util;
00279         nvmlReturn_t bad; 
00280         bad = nvmlDeviceGetUtilizationRates( dev, &util );
00281 
00282         if ( NVML_SUCCESS != bad ) {
00283                 SUBDBG( "something went wrong %s\n", nvmlErrorString(bad));
00284         }
00285 
00286 
00287         switch (which_one) {
00288                 case GPU_UTILIZATION:
00289                         return (unsigned long long) util.gpu;
00290                 case MEMORY_UTILIZATION:
00291                         return (unsigned long long) util.memory;
00292                 default:
00293                         ;
00294         }
00295 
00296         return (unsigned long long) -1;
00297 }
00298 
00299         static void
00300 nvml_hardware_reset(  )
00301 {
00302         /* nvmlDeviceSet* and nvmlDeviceClear* calls require root/admin access, so while 
00303          * possible to implement a reset on the ECC counters, we pass */
00304         /* 
00305            int i;
00306            for ( i=0; i < device_count; i++ )
00307            nvmlDeviceClearEccErrorCounts( device[i], NVML_VOLATILE_ECC ); 
00308          */
00309 }
00310 
00312 /*   You might replace this with code that accesses       */
00313 /*   hardware or reads values from the operatings system. */
00314         static int 
00315 nvml_hardware_read( long long *value, int which_one)
00316         //, nvml_context_t *ctx)
00317 {
00318         nvml_native_event_entry_t *entry;
00319         nvmlDevice_t handle;
00320         int cudaIdx = -1;
00321 
00322         entry = &nvml_native_table[which_one];
00323         *value = (long long) -1;
00324         /* replace entry->resources with the current cuda_device->nvml device */
00325         cudaGetDevice( &cudaIdx );
00326 
00327         if ( cudaIdx < 0 || cudaIdx > device_count )
00328             return PAPI_EINVAL;
00329 
00330         /* Make sure the device we are running on has the requested event */
00331         if ( !HAS_FEATURE( features[cudaIdx] , entry->type) ) 
00332                 return PAPI_EINVAL;
00333 
00334         handle = devices[cudaIdx];
00335 
00336         switch (entry->type) {
00337                 case FEATURE_CLOCK_INFO:
00338                         *value =  getClockSpeed(    handle, 
00339                                         (nvmlClockType_t)entry->options.clock );
00340                         break;
00341                 case FEATURE_ECC_LOCAL_ERRORS:
00342                         *value = getEccLocalErrors(     handle, 
00343                                         (nvmlEccBitType_t)entry->options.ecc_opts.bits, 
00344                                         (int)entry->options.ecc_opts.which_one);
00345                         break;
00346                 case FEATURE_FAN_SPEED:
00347                         *value = getFanSpeed( handle );
00348                         break;
00349                 case FEATURE_MAX_CLOCK:
00350                         *value = getMaxClockSpeed(  handle, 
00351                                         (nvmlClockType_t)entry->options.clock );
00352                         break;
00353                 case FEATURE_MEMORY_INFO:
00354                         *value = getMemoryInfo(     handle, 
00355                                         (int)entry->options.which_one );
00356                         break;
00357                 case FEATURE_PERF_STATES:
00358                         *value = getPState( handle );
00359                         break;
00360                 case FEATURE_POWER:
00361                         *value = getPowerUsage( handle );
00362                         break;
00363                 case FEATURE_TEMP:
00364                         *value = getTemperature( handle );
00365                         break;
00366                 case FEATURE_ECC_TOTAL_ERRORS:
00367                         *value = getTotalEccErrors(     handle, 
00368                                         (nvmlEccBitType_t)entry->options.ecc_opts.bits );
00369                         break;
00370                 case FEATURE_UTILIZATION:
00371                         *value = getUtilization(    handle, 
00372                                         (int)entry->options.which_one );
00373                         break;
00374                 default:
00375                         return PAPI_EINVAL;
00376         }
00377 
00378         return PAPI_OK;
00379 
00380 
00381 }
00382 
00383 /********************************************************************/
00384 /* Below are the functions required by the PAPI component interface */
00385 /********************************************************************/
00386 
00388         int
00389 _papi_nvml_init_thread( hwd_context_t * ctx )
00390 {
00391         (void) ctx;
00392 
00393         SUBDBG( "_papi_nvml_init %p...", ctx );
00394 
00395         return PAPI_OK;
00396 }
00397 
00398         static int 
00399 detectDevices( ) 
00400 {
00401         nvmlReturn_t ret;
00402         nvmlEnableState_t mode = NVML_FEATURE_DISABLED;
00403         nvmlDevice_t handle;
00404         nvmlPciInfo_t info;
00405 
00406         cudaError_t cuerr;
00407 
00408         char busId[16];
00409         char name[64];
00410         char inforomECC[16];
00411         char inforomPower[16];
00412         char names[device_count][64];
00413         char nvml_busIds[device_count][16];
00414 
00415         float ecc_version = 0.0, power_version = 0.0;
00416 
00417         int i = 0,
00418             j = 0;
00419         int isTesla = 0;
00420         int isFermi = 0;
00421         int isUnique = 1;
00422 
00423         unsigned int temp = 0;
00424 
00425 
00426         /* list of nvml pci_busids */
00427     for (i=0; i < device_count; i++) {
00428         ret = nvmlDeviceGetHandleByIndex( i, &handle ); 
00429         if ( NVML_SUCCESS != ret ) {
00430             SUBDBG("nvmlDeviceGetHandleByIndex(%d) failed\n", i);
00431             return PAPI_ESYS;
00432         }
00433 
00434         ret = nvmlDeviceGetPciInfo( handle, &info );
00435         if ( NVML_SUCCESS != ret ) {
00436             SUBDBG("nvmlDeviceGetPciInfo() failed %s\n", nvmlErrorString(ret) );
00437             return PAPI_ESYS;
00438         }
00439 
00440         strncpy(nvml_busIds[i], info.busId, 16);
00441         
00442     }
00443 
00444     /* We want to key our list of nvmlDevice_ts by each device's cuda index */
00445     for (i=0; i < device_count; i++) {
00446             cuerr = cudaDeviceGetPCIBusId( busId, 16, i );
00447             if ( CUDA_SUCCESS != cuerr ) {
00448                 SUBDBG("cudaDeviceGetPCIBusId failed.\n");
00449                 return PAPI_ESYS;
00450             }
00451             for (j=0; j < device_count; j++ ) {
00452                     if ( !strncmp( busId, nvml_busIds[j], 16) ) {
00453                             ret = nvmlDeviceGetHandleByIndex(j, &devices[i] );
00454                             if ( NVML_SUCCESS != ret ) {
00455                                 SUBDBG("nvmlDeviceGetHandleByIndex(%d, &devices[%d]) failed.\n", j, i);
00456                             }
00457                                 return PAPI_ESYS;
00458                             break;
00459                     }
00460             }   
00461     }
00462 
00463         memset(names, 0x0, device_count*64);
00464         /* So for each card, check whats querable */
00465         for (i=0; i < device_count; i++ ) {
00466                 isTesla=0;
00467                 isFermi=1;
00468                 isUnique = 1;
00469                 features[i] = 0;
00470 
00471                 ret = nvmlDeviceGetName( devices[i], name, 64 );
00472                 if ( NVML_SUCCESS != ret) {
00473                     SUBDBG("nvmlDeviceGetName failed \n");
00474                     return PAPI_ESYS;
00475                 }
00476 
00477                 for (j=0; j < i; j++ ) 
00478                         if ( 0 == strncmp( name, names[j], 64 ) ) {
00479                                 /* if we have a match, and IF everything is sane, 
00480                                  * devices with the same name eg Tesla C2075 share features */
00481                                 isUnique = 0;
00482                                 features[i] = features[j];
00483 
00484                         }
00485 
00486                 if ( isUnique ) {
00487                         ret = nvmlDeviceGetInforomVersion( devices[i], NVML_INFOROM_ECC, inforomECC, 16);
00488                         if ( NVML_SUCCESS != ret ) {
00489                                 SUBDBG("nvmlGetInforomVersion carps %s\n", nvmlErrorString(ret ) );
00490                                 isFermi = 0;
00491                         }
00492                         ret = nvmlDeviceGetInforomVersion( devices[i], NVML_INFOROM_POWER, inforomPower, 16);
00493                         if ( NVML_SUCCESS != ret ) {
00494                                 /* This implies the card is older then Fermi */
00495                                 SUBDBG("nvmlGetInforomVersion carps %s\n", nvmlErrorString(ret ) );
00496                                 SUBDBG("Based upon the return to nvmlGetInforomVersion, we conclude this card is older then Fermi.\n");
00497                                 isFermi = 0;
00498                         } 
00499 
00500                         ecc_version = strtof(inforomECC, NULL );
00501                         power_version = strtof( inforomPower, NULL);
00502 
00503                         ret = nvmlDeviceGetName( devices[i], name, 64 );
00504                         isTesla = ( NULL == strstr(name, "Tesla") ) ? 0:1;
00505 
00506                         /* For Tesla and Quadro products from Fermi and Kepler families. */
00507                         if ( isFermi ) {
00508                                 features[i] |= FEATURE_CLOCK_INFO;
00509                                 num_events += 3;
00510                         }
00511 
00512                         /*  For Tesla and Quadro products from Fermi and Kepler families. 
00513                             requires NVML_INFOROM_ECC 2.0 or higher for location-based counts
00514                             requires NVML_INFOROM_ECC 1.0 or higher for all other ECC counts
00515                             requires ECC mode to be enabled. */
00516                         if ( isFermi ) {
00517                                 ret = nvmlDeviceGetEccMode( devices[i], &mode, NULL );
00518                                 if ( NVML_FEATURE_ENABLED == mode) {
00519                                         if ( ecc_version >= 2.0 ) {
00520                                                 features[i] |= FEATURE_ECC_LOCAL_ERRORS;
00521                                                 num_events += 8; /* {single bit, two bit errors} x { reg, l1, l2, memory } */
00522                                         } 
00523                                         if ( ecc_version >= 1.0 ) {
00524                                                 features[i] |= FEATURE_ECC_TOTAL_ERRORS;
00525                                                 num_events += 2; /* single bit errors, double bit errors */
00526                                         }
00527                                 }   
00528                         }
00529 
00530                         /* For all discrete products with dedicated fans */
00531                         features[i] |= FEATURE_FAN_SPEED;
00532                         num_events++;
00533 
00534                         /* For Tesla and Quadro products from Fermi and Kepler families. */
00535                         if ( isFermi ) {
00536                                 features[i] |= FEATURE_MAX_CLOCK;
00537                                 num_events += 3;
00538                         }
00539 
00540                         /* For all products */
00541                         features[i] |= FEATURE_MEMORY_INFO;
00542                         num_events += 3; /* total, free, used */
00543 
00544                         /* For Tesla and Quadro products from the Fermi and Kepler families. */
00545                         if ( isFermi ) {
00546                                 features[i] |= FEATURE_PERF_STATES;
00547                                 num_events++;
00548                         }
00549 
00550                         /*  For "GF11x" Tesla and Quadro products from the Fermi family
00551                             requires NVML_INFOROM_POWER 3.0 or higher
00552                             For Tesla and Quadro products from the Kepler family
00553                             does not require NVML_INFOROM_POWER */
00554                         if ( isFermi ) {
00555                                 ret = nvmlDeviceGetPowerUsage( devices[i], &temp);
00556                                 if ( NVML_SUCCESS == ret ) {
00557                                         features[i] |= FEATURE_POWER;
00558                                         num_events++;
00559                                 }
00560                         }
00561 
00562                         /* For all discrete and S-class products. */
00563                         features[i] |= FEATURE_TEMP;
00564                         num_events++;
00565 
00566                         /* For Tesla and Quadro products from the Fermi and Kepler families */
00567                         if (isFermi) {
00568                                 features[i] |= FEATURE_UTILIZATION;
00569                                 num_events += 2;
00570                         }
00571 
00572                         strncpy( names[i], name, 64); 
00573 
00574                 }
00575         }
00576         return PAPI_OK;
00577 
00578 }
00579 
00580         static void
00581 createNativeEvents( )
00582 {
00583         char name[64];
00584         char sanitized_name[PAPI_MAX_STR_LEN];
00585         char names[device_count][64];
00586 
00587         int i, nameLen = 0, j;
00588         int isUnique = 1;
00589 
00590         nvml_native_event_entry_t* entry;
00591         nvmlReturn_t ret;
00592 
00593         nvml_native_table = (nvml_native_event_entry_t*) papi_malloc( 
00594                         sizeof(nvml_native_event_entry_t) * num_events );   
00595         memset( nvml_native_table, 0x0, sizeof(nvml_native_event_entry_t) * num_events );
00596         entry = &nvml_native_table[0];
00597 
00598         for (i=0; i < device_count; i++ ) {
00599                 memset( names[i], 0x0, 64 );
00600                 isUnique = 1;
00601                 ret = nvmlDeviceGetName( devices[i], name, 64 );
00602 
00603                 for (j=0; j < i; j++ ) 
00604                 {
00605                         if ( 0 == strncmp( name, names[j], 64 ) )
00606                                 isUnique = 0;
00607                 }
00608 
00609                 if ( isUnique ) {
00610                         nameLen = strlen(name);
00611                         strncpy(sanitized_name, name, PAPI_MAX_STR_LEN );
00612                         for (j=0; j < nameLen; j++)
00613                                 if ( ' ' == sanitized_name[j] )
00614                                         sanitized_name[j] = '_';
00615 
00616 
00617 
00618                         if ( HAS_FEATURE( features[i], FEATURE_CLOCK_INFO ) ) {
00619                                 sprintf( entry->name, "%s:graphics_clock", sanitized_name );
00620                                 strncpy(entry->description,"Graphics clock domain (MHz).", PAPI_MAX_STR_LEN );
00621                                 entry->options.clock = NVML_CLOCK_GRAPHICS;
00622                                 entry->type = FEATURE_CLOCK_INFO;
00623                                 entry++;
00624 
00625                                 sprintf( entry->name, "%s:sm_clock", sanitized_name);
00626                                 strncpy(entry->description,"SM clock domain (MHz).", PAPI_MAX_STR_LEN);
00627                                 entry->options.clock = NVML_CLOCK_SM;
00628                                 entry->type = FEATURE_CLOCK_INFO;
00629                                 entry++;
00630 
00631                                 sprintf( entry->name, "%s:memory_clock", sanitized_name);
00632                                 strncpy(entry->description,"Memory clock domain (MHz).", PAPI_MAX_STR_LEN);
00633                                 entry->options.clock = NVML_CLOCK_MEM;
00634                                 entry->type = FEATURE_CLOCK_INFO;
00635                                 entry++;
00636                         }   
00637 
00638                         if ( HAS_FEATURE( features[i], FEATURE_ECC_LOCAL_ERRORS ) ) { 
00639                                 sprintf(entry->name, "%s:l1_single_ecc_errors", sanitized_name);
00640                                 strncpy(entry->description,"L1 cache single bit ECC", PAPI_MAX_STR_LEN);
00641                                 entry->options.ecc_opts = (struct local_ecc){
00642                                         .bits = NVML_SINGLE_BIT_ECC,
00643                                                 .which_one = LOCAL_ECC_L1,
00644                                 };
00645                                 entry->type = FEATURE_ECC_LOCAL_ERRORS;
00646                                 entry++;
00647 
00648                                 sprintf(entry->name, "%s:l2_single_ecc_errors", sanitized_name);
00649                                 strncpy(entry->description,"L2 cache single bit ECC", PAPI_MAX_STR_LEN);
00650                                 entry->options.ecc_opts = (struct local_ecc){
00651                                         .bits = NVML_SINGLE_BIT_ECC,
00652                                                 .which_one = LOCAL_ECC_L2,
00653                                 };
00654                                 entry->type = FEATURE_ECC_LOCAL_ERRORS;
00655                                 entry++;
00656 
00657                                 sprintf(entry->name, "%s:memory_single_ecc_errors", sanitized_name);
00658                                 strncpy(entry->description,"Device memory single bit ECC", PAPI_MAX_STR_LEN);
00659                                 entry->options.ecc_opts = (struct local_ecc){
00660                                         .bits = NVML_SINGLE_BIT_ECC,
00661                                                 .which_one = LOCAL_ECC_MEM,
00662                                 };
00663                                 entry->type = FEATURE_ECC_LOCAL_ERRORS;
00664                                 entry++;
00665 
00666                                 sprintf(entry->name, "%s:regfile_single_ecc_errors", sanitized_name);
00667                                 strncpy(entry->description,"Register file single bit ECC", PAPI_MAX_STR_LEN);
00668                                 entry->options.ecc_opts = (struct local_ecc){
00669                                         .bits = NVML_SINGLE_BIT_ECC,
00670                                                 .which_one = LOCAL_ECC_REGFILE,
00671                                 };
00672                                 entry->type = FEATURE_ECC_LOCAL_ERRORS;
00673                                 entry++;
00674 
00675                                 sprintf(entry->name, "%s:1l_double_ecc_errors", sanitized_name);
00676                                 strncpy(entry->description,"L1 cache double bit ECC", PAPI_MAX_STR_LEN);
00677                                 entry->options.ecc_opts = (struct local_ecc){
00678                                         .bits = NVML_DOUBLE_BIT_ECC,
00679                                                 .which_one = LOCAL_ECC_L1,
00680                                 };
00681                                 entry->type = FEATURE_ECC_LOCAL_ERRORS;
00682                                 entry++;
00683 
00684                                 sprintf(entry->name, "%s:l2_double_ecc_errors", sanitized_name);
00685                                 strncpy(entry->description,"L2 cache double bit ECC", PAPI_MAX_STR_LEN);
00686                                 entry->options.ecc_opts = (struct local_ecc){
00687                                         .bits = NVML_DOUBLE_BIT_ECC,
00688                                                 .which_one = LOCAL_ECC_L2,
00689                                 };
00690                                 entry->type = FEATURE_ECC_LOCAL_ERRORS;
00691                                 entry++;
00692 
00693                                 sprintf(entry->name, "%s:memory_double_ecc_errors", sanitized_name);
00694                                 strncpy(entry->description,"Device memory double bit ECC", PAPI_MAX_STR_LEN);
00695                                 entry->options.ecc_opts = (struct local_ecc){
00696                                         .bits = NVML_DOUBLE_BIT_ECC,
00697                                                 .which_one = LOCAL_ECC_MEM,
00698                                 };
00699                                 entry->type = FEATURE_ECC_LOCAL_ERRORS;
00700                                 entry++;
00701 
00702                                 sprintf(entry->name, "%s:regfile_double_ecc_errors", sanitized_name);
00703                                 strncpy(entry->description,"Register file double bit ECC", PAPI_MAX_STR_LEN);
00704                                 entry->options.ecc_opts = (struct local_ecc){
00705                                         .bits = NVML_DOUBLE_BIT_ECC,
00706                                                 .which_one = LOCAL_ECC_REGFILE,
00707                                 };
00708                                 entry->type = FEATURE_ECC_LOCAL_ERRORS;
00709                                 entry++;
00710                         }
00711 
00712                         if ( HAS_FEATURE( features[i], FEATURE_FAN_SPEED ) ) {
00713                                 sprintf( entry->name, "%s:fan_speed", sanitized_name);
00714                                 strncpy(entry->description,"The fan speed expressed as a percent of the maximum, i.e. full speed is 100%", PAPI_MAX_STR_LEN);
00715                                 entry->type = FEATURE_FAN_SPEED;
00716                                 entry++;
00717                         }
00718 
00719                         if ( HAS_FEATURE( features[i], FEATURE_MAX_CLOCK ) ) {
00720                                 sprintf( entry->name, "%s:graphics_max_clock", sanitized_name);
00721                                 strncpy(entry->description,"Maximal Graphics clock domain (MHz).", PAPI_MAX_STR_LEN);
00722                                 entry->options.clock = NVML_CLOCK_GRAPHICS;
00723                                 entry->type = FEATURE_MAX_CLOCK;
00724                                 entry++;
00725 
00726                                 sprintf( entry->name, "%s:sm_max_clock", sanitized_name);
00727                                 strncpy(entry->description,"Maximal SM clock domain (MHz).", PAPI_MAX_STR_LEN);
00728                                 entry->options.clock = NVML_CLOCK_SM;
00729                                 entry->type = FEATURE_MAX_CLOCK;
00730                                 entry++;
00731 
00732                                 sprintf( entry->name, "%s:memory_max_clock", sanitized_name);
00733                                 strncpy(entry->description,"Maximal Memory clock domain (MHz).", PAPI_MAX_STR_LEN);
00734                                 entry->options.clock = NVML_CLOCK_MEM;
00735                                 entry->type = FEATURE_MAX_CLOCK;
00736                                 entry++;
00737                         }
00738 
00739                         if ( HAS_FEATURE( features[i], FEATURE_MEMORY_INFO ) ) {
00740                                 sprintf( entry->name, "%s:total_memory", sanitized_name);
00741                                 strncpy(entry->description,"Total installed FB memory (in bytes).", PAPI_MAX_STR_LEN);
00742                                 entry->options.which_one = MEMINFO_TOTAL_MEMORY;
00743                                 entry->type = FEATURE_MEMORY_INFO;
00744                                 entry++;
00745 
00746                                 sprintf( entry->name, "%s:unallocated_memory", sanitized_name);
00747                                 strncpy(entry->description,"Uncallocated FB memory (in bytes).", PAPI_MAX_STR_LEN);
00748                                 entry->options.which_one = MEMINFO_UNALLOCED;
00749                                 entry->type = FEATURE_MEMORY_INFO;
00750                                 entry++;
00751 
00752                                 sprintf( entry->name, "%s:allocated_memory", sanitized_name);
00753                                 strncpy(entry->description, "Allocated FB memory (in bytes). Note that the driver/GPU always sets aside a small amount of memory for bookkeeping.", PAPI_MAX_STR_LEN);
00754                                 entry->options.which_one = MEMINFO_ALLOCED;
00755                                 entry->type = FEATURE_MEMORY_INFO;
00756                                 entry++;
00757                         }
00758 
00759                         if ( HAS_FEATURE( features[i], FEATURE_PERF_STATES ) ) {
00760                                 sprintf( entry->name, "%s:pstate", sanitized_name);
00761                                 strncpy(entry->description,"The performance state of the device.", PAPI_MAX_STR_LEN);
00762                                 entry->type = FEATURE_PERF_STATES;
00763                                 entry++;
00764                         }
00765 
00766                         if ( HAS_FEATURE( features[i], FEATURE_POWER ) ) {
00767                                 sprintf( entry->name, "%s:power", sanitized_name);
00768                                 strncpy(entry->description,"Power usage reading for the device, in miliwatts. This is the power draw for the entire board, including GPU, memory, etc.\n The reading is accurate to within a range of +/-5 watts.", PAPI_MAX_STR_LEN);
00769                                 entry->type = FEATURE_POWER;
00770                                 entry++;
00771                         }
00772 
00773                         if ( HAS_FEATURE( features[i], FEATURE_TEMP ) ) {
00774                                 sprintf( entry->name, "%s:temperature", sanitized_name);
00775                                 strncpy(entry->description,"Current temperature readings for the device, in degrees C.", PAPI_MAX_STR_LEN);
00776                                 entry->type = FEATURE_TEMP;
00777                                 entry++;
00778                         }
00779 
00780                         if ( HAS_FEATURE( features[i], FEATURE_ECC_TOTAL_ERRORS ) ) {
00781                                 sprintf( entry->name, "%s:total_ecc_errors", sanitized_name);
00782                                 strncpy(entry->description,"Total single bit errors.", PAPI_MAX_STR_LEN);
00783                                 entry->options.ecc_opts = (struct local_ecc){ 
00784                                         .bits = NVML_SINGLE_BIT_ECC, 
00785                                 };
00786                                 entry->type = FEATURE_ECC_TOTAL_ERRORS;
00787                                 entry++;
00788 
00789                                 sprintf( entry->name, "%s:total_ecc_errors", sanitized_name);
00790                                 strncpy(entry->description,"Total double bit errors.", PAPI_MAX_STR_LEN);
00791                                 entry->options.ecc_opts = (struct local_ecc){ 
00792                                         .bits = NVML_DOUBLE_BIT_ECC, 
00793                                 };
00794                                 entry->type = FEATURE_ECC_TOTAL_ERRORS;
00795                                 entry++;
00796                         }
00797 
00798                         if ( HAS_FEATURE( features[i], FEATURE_UTILIZATION ) ) {
00799                                 sprintf( entry->name, "%s:gpu_utilization", sanitized_name);
00800                                 strncpy(entry->description,"Percent of time over the past second during which one or more kernels was executing on the GPU.", PAPI_MAX_STR_LEN);
00801                                 entry->options.which_one = GPU_UTILIZATION;
00802                                 entry->type = FEATURE_UTILIZATION;
00803                                 entry++;
00804 
00805                                 sprintf( entry->name, "%s:memory_utilization", sanitized_name);
00806                                 strncpy(entry->description,"Percent of time over the past second during which global (device) memory was being read or written.", PAPI_MAX_STR_LEN);
00807                                 entry->options.which_one = MEMORY_UTILIZATION;
00808                                 entry->type = FEATURE_UTILIZATION;
00809                                 entry++;
00810                         }
00811                         strncpy( names[i], name, 64); 
00812                 }
00813         }
00814 }
00815 
00820         int
00821 _papi_nvml_init_component( int cidx )
00822 {
00823         nvmlReturn_t ret;
00824         cudaError_t cuerr;
00825 
00826         int cuda_count = 0;
00827         unsigned int nvml_count = 0;
00828 
00829         ret = nvmlInit();
00830         if ( NVML_SUCCESS != ret ) {
00831                 strcpy(_nvml_vector.cmp_info.disabled_reason, "The NVIDIA managament library failed to initialize.");
00832                 goto disable;
00833         }
00834 
00835         cuerr = cuInit( 0 );
00836         if ( CUDA_SUCCESS != cuerr ) {
00837                 strcpy(_nvml_vector.cmp_info.disabled_reason, "The CUDA library failed to initialize.");
00838                 goto disable;
00839         }
00840 
00841         /* Figure out the number of CUDA devices in the system */
00842         ret = nvmlDeviceGetCount( &nvml_count );
00843         if ( NVML_SUCCESS != ret ) {
00844                 strcpy(_nvml_vector.cmp_info.disabled_reason, "Unable to get a count of devices from the NVIDIA managament library.");
00845                 goto disable;
00846         }
00847 
00848         cuerr = cudaGetDeviceCount( &cuda_count );
00849         if ( CUDA_SUCCESS != cuerr ) {
00850                 strcpy(_nvml_vector.cmp_info.disabled_reason, "Unable to get a device count from CUDA.");
00851                 goto disable;
00852         }
00853 
00854         /* We can probably recover from this, when we're clever */
00855         if ( nvml_count != cuda_count ) {
00856                 strcpy(_nvml_vector.cmp_info.disabled_reason, "Cuda and the NVIDIA managament library have different device counts.");
00857                 goto disable;
00858         }
00859 
00860         device_count = cuda_count;
00861 
00862         /* A per device representation of what events are present */
00863         features = (int*)papi_malloc(sizeof(int) * device_count );
00864 
00865         /* Handles to each device */
00866         devices = (nvmlDevice_t*)papi_malloc(sizeof(nvmlDevice_t) * device_count);
00867 
00868         /* Figure out what events are supported on each card. */
00869         if ( (papi_errorcode = detectDevices( ) ) != PAPI_OK ) {
00870             papi_free(features);
00871             papi_free(devices);
00872             sprintf(_nvml_vector.cmp_info.disabled_reason, "An error occured in device feature detection, please check your NVIDIA Management Library and CUDA install." );
00873             goto disable;
00874         }
00875 
00876         /* The assumption is that if everything went swimmingly in detectDevices, 
00877             all nvml calls here should be fine. */
00878         createNativeEvents( );
00879 
00880         /* Export the total number of events available */
00881         _nvml_vector.cmp_info.num_native_events = num_events;
00882 
00883         /* Export the component id */
00884         _nvml_vector.cmp_info.CmpIdx = cidx;
00885 
00886         /* Export the number of 'counters' */
00887         _nvml_vector.cmp_info.num_cntrs = num_events;
00888         _nvml_vector.cmp_info.num_mpx_cntrs = num_events;
00889 
00890         return PAPI_OK;
00891 
00892 disable:
00893         _nvml_vector.cmp_info.num_cntrs = 0;
00894         return PAPI_OK; 
00895 }
00896 
00897 
00903         int
00904 _papi_nvml_init_control_state( hwd_control_state_t * ctl )
00905 {
00906         SUBDBG( "nvml_init_control_state... %p\n", ctl );
00907         nvml_control_state_t *nvml_ctl = ( nvml_control_state_t * ) ctl;
00908         memset( nvml_ctl, 0, sizeof ( nvml_control_state_t ) );
00909 
00910         return PAPI_OK;
00911 }
00912 
00913 
00915         int
00916 _papi_nvml_update_control_state( hwd_control_state_t *ctl, 
00917                 NativeInfo_t *native,
00918                 int count, 
00919                 hwd_context_t *ctx )
00920 {
00921         int i, index;
00922 
00923         nvml_control_state_t *nvml_ctl = ( nvml_control_state_t * ) ctl;   
00924         (void) ctx;
00925 
00926         SUBDBG( "_papi_nvml_update_control_state %p %p...", ctl, ctx );
00927 
00928         /* if no events, return */
00929         if (count==0) return PAPI_OK;
00930 
00931         for( i = 0; i < count; i++ ) {
00932                 index = native[i].ni_event;
00933                 nvml_ctl->which_counter[i]=index;
00934                 /* We have no constraints on event position, so any event */
00935                 /* can be in any slot.                                    */
00936                 native[i].ni_position = i;
00937         }
00938         nvml_ctl->num_events=count;
00939         return PAPI_OK;
00940 }
00942         int
00943 _papi_nvml_start( hwd_context_t *ctx, hwd_control_state_t *ctl )
00944 {
00945 
00946         (void) ctx;
00947         (void) ctl;
00948 
00949         SUBDBG( "nvml_start %p %p...", ctx, ctl );
00950         /* anything that would need to be set at counter start time */
00951 
00952         /* reset */
00953         /* start the counting */
00954 
00955         return PAPI_OK;
00956 }
00957 
00958 
00960         int
00961 _papi_nvml_stop( hwd_context_t *ctx, hwd_control_state_t *ctl )
00962 {
00963         int i;
00964         (void) ctx;
00965         (void) ctl;
00966         int ret;
00967         SUBDBG( "nvml_stop %p %p...", ctx, ctl );
00968 
00969         nvml_control_state_t* nvml_ctl = ( nvml_control_state_t*) ctl;
00970 
00971         for (i=0;i<nvml_ctl->num_events;i++) {
00972                 if ( PAPI_OK != 
00973                                 ( ret = nvml_hardware_read( &nvml_ctl->counter[i], 
00974                                                             nvml_ctl->which_counter[i]) ))
00975                         return ret;
00976 
00977         }
00978 
00979         return PAPI_OK;
00980 }
00981 
00982 
00984         int
00985 _papi_nvml_read( hwd_context_t *ctx, hwd_control_state_t *ctl,
00986                 long long **events, int flags )
00987 {
00988 
00989         (void) ctx;
00990         (void) flags;
00991         int i;
00992         int ret;
00993         nvml_control_state_t* nvml_ctl = ( nvml_control_state_t*) ctl;   
00994 
00995         SUBDBG( "nvml_read... %p %d", ctx, flags );
00996 
00997         for (i=0;i<nvml_ctl->num_events;i++) {
00998                 if ( PAPI_OK != 
00999                                 ( ret = nvml_hardware_read( &nvml_ctl->counter[i], 
01000                                                             nvml_ctl->which_counter[i]) ))
01001                         return ret;
01002 
01003         }
01004         /* return pointer to the values we read */
01005         *events = nvml_ctl->counter;    
01006         return PAPI_OK;
01007 }
01008 
01010 /*    otherwise, the updated state is written to ESI->hw_start      */
01011         int
01012 _papi_nvml_write( hwd_context_t *ctx, hwd_control_state_t *ctl,
01013                 long long *events )
01014 {
01015 
01016         (void) ctx;
01017         (void) ctl;
01018         (void) events;
01019 
01020         SUBDBG( "nvml_write... %p %p", ctx, ctl );
01021 
01022         /* You can change ECC mode and compute exclusivity modes on the cards */
01023         /* But I don't see this as a function of a PAPI component at this time */
01024         /* All implementation issues aside. */
01025         return PAPI_OK;
01026 }
01027 
01028 
01030 /*  If the eventset is not currently running, then the saved value in the   */
01031 /*  EventSet is set to zero without calling this routine.                   */
01032         int
01033 _papi_nvml_reset( hwd_context_t * ctx, hwd_control_state_t * ctl )
01034 {
01035         (void) ctx;
01036         (void) ctl;
01037 
01038         SUBDBG( "nvml_reset ctx=%p ctrl=%p...", ctx, ctl );
01039 
01040         /* Reset the hardware */
01041         nvml_hardware_reset(  );
01042 
01043         return PAPI_OK;
01044 }
01045 
01047         int
01048 _papi_nvml_shutdown_component()
01049 {
01050 
01051         SUBDBG( "nvml_shutdown_component..." );
01052 
01053         papi_free(nvml_native_table);
01054         papi_free(devices);
01055 
01056         nvmlShutdown();
01057 
01058         device_count = 0;
01059         num_events = 0;
01060 
01061         return PAPI_OK;
01062 }
01063 
01065         int
01066 _papi_nvml_shutdown_thread( hwd_context_t *ctx )
01067 {
01068 
01069         (void) ctx;
01070 
01071         SUBDBG( "nvml_shutdown_thread... %p", ctx );
01072 
01073         /* Last chance to clean up thread */
01074 
01075         return PAPI_OK;
01076 }
01077 
01078 
01079 
01083         int
01084 _papi_nvml_ctl( hwd_context_t * ctx, int code, _papi_int_option_t * option )
01085 {
01086 
01087         (void) ctx;
01088         (void) code;
01089         (void) option;
01090 
01091         SUBDBG( "nvml_ctl..." );
01092 
01093         /* FIXME.  This should maybe set up more state, such as which counters are active and */
01094         /*         counter mappings. */
01095 
01096         return PAPI_OK;
01097 }
01098 
01108         int
01109 _papi_nvml_set_domain( hwd_control_state_t * cntrl, int domain )
01110 {
01111         (void) cntrl;
01112 
01113         int found = 0;
01114         SUBDBG( "nvml_set_domain..." );
01115 
01116         if ( PAPI_DOM_USER & domain ) {
01117                 SUBDBG( " PAPI_DOM_USER " );
01118                 found = 1;
01119         }
01120         if ( PAPI_DOM_KERNEL & domain ) {
01121                 SUBDBG( " PAPI_DOM_KERNEL " );
01122                 found = 1;
01123         }
01124         if ( PAPI_DOM_OTHER & domain ) {
01125                 SUBDBG( " PAPI_DOM_OTHER " );
01126                 found = 1;
01127         }
01128         if ( PAPI_DOM_ALL & domain ) {
01129                 SUBDBG( " PAPI_DOM_ALL " );
01130                 found = 1;
01131         }
01132         if ( !found )
01133                 return ( PAPI_EINVAL );
01134 
01135         return PAPI_OK;
01136 }
01137 
01138 
01139 /**************************************************************/
01140 /* Naming functions, used to translate event numbers to names */
01141 /**************************************************************/
01142 
01143 
01150         int
01151 _papi_nvml_ntv_enum_events( unsigned int *EventCode, int modifier )
01152 {
01153         int index;
01154 
01155         switch ( modifier ) {
01156 
01157                 /* return EventCode of first event */
01158                 case PAPI_ENUM_FIRST:
01159                         /* return the first event that we support */
01160 
01161                         *EventCode = 0;
01162                         return PAPI_OK;
01163 
01164                         /* return EventCode of next available event */
01165                 case PAPI_ENUM_EVENTS:
01166                         index = *EventCode;
01167 
01168                         /* Make sure we are in range */
01169                         if ( index < num_events - 1 ) {
01170 
01171                                 /* This assumes a non-sparse mapping of the events */
01172                                 *EventCode = *EventCode + 1;
01173                                 return PAPI_OK;
01174                         } else {
01175                                 return PAPI_ENOEVNT;
01176                         }
01177                         break;
01178 
01179                 default:
01180                         return PAPI_EINVAL;
01181         }
01182 
01183         return PAPI_EINVAL;
01184 }
01185 
01191         int
01192 _papi_nvml_ntv_code_to_name( unsigned int EventCode, char *name, int len )
01193 {
01194         int index;
01195 
01196         index = EventCode;
01197 
01198         /* Make sure we are in range */
01199         if (index >= num_events) return PAPI_ENOEVNT;
01200 
01201         strncpy( name, nvml_native_table[index].name, len );
01202 
01203         return PAPI_OK;
01204 }
01205 
01211         int
01212 _papi_nvml_ntv_code_to_descr( unsigned int EventCode, char *descr, int len )
01213 {
01214         int index;
01215         index = EventCode;
01216 
01217         if (index >= num_events) return PAPI_ENOEVNT;
01218 
01219         strncpy( descr, nvml_native_table[index].description, len );
01220 
01221         return PAPI_OK;
01222 }
01223 
01225 papi_vector_t _nvml_vector = {
01226         .cmp_info = {
01227                 /* default component information */
01228                 /* (unspecified values are initialized to 0) */
01229 
01230                 .name = "nvml",
01231                 .short_name="nvml",
01232                 .version = "1.0",
01233                 .support_version = "n/a",
01234                 .kernel_version = "n/a",
01235 
01236                 .num_preset_events = 0,
01237                 .num_native_events = 0, /* set by init_component */
01238                 .default_domain = PAPI_DOM_USER,
01239                 .available_domains = PAPI_DOM_USER,
01240                 .default_granularity = PAPI_GRN_THR,
01241                 .available_granularities = PAPI_GRN_THR,
01242                 .hardware_intr_sig = PAPI_INT_SIGNAL,
01243 
01244 
01245                 /* component specific cmp_info initializations */
01246                 .hardware_intr = 0,
01247                 .precise_intr = 0,
01248                 .posix1b_timers = 0,
01249                 .kernel_profile = 0,
01250                 .kernel_multiplex = 0,
01251                 .fast_counter_read = 0,
01252                 .fast_real_timer = 0,
01253                 .fast_virtual_timer = 0,
01254                 .attach = 0,
01255                 .attach_must_ptrace = 0,
01256                 .cntr_umasks = 0,
01257                 .cpu = 0,
01258                 .inherit = 0,
01259         },
01260 
01261         /* sizes of framework-opaque component-private structures */
01262         .size = {
01263              .context = sizeof ( nvml_context_t ),
01264              .control_state = sizeof ( nvml_control_state_t ),
01265              .reg_value = sizeof ( nvml_register_t ),
01266                      // .reg_alloc = sizeof ( nvml_reg_alloc_t ),
01267         },
01268 
01269         /* function pointers */
01270 
01271         /* Used for general PAPI interactions */
01272         .start =                _papi_nvml_start,
01273         .stop =                 _papi_nvml_stop,
01274         .read =                 _papi_nvml_read,
01275         .reset =                _papi_nvml_reset,   
01276         .write =                _papi_nvml_write,
01277         .init_component =       _papi_nvml_init_component,  
01278         .init_thread =          _papi_nvml_init_thread,
01279         .init_control_state =   _papi_nvml_init_control_state,
01280         .update_control_state = _papi_nvml_update_control_state,
01281         .ctl =                  _papi_nvml_ctl, 
01282         .shutdown_thread =      _papi_nvml_shutdown_thread,
01283         .shutdown_component =   _papi_nvml_shutdown_component,
01284         .set_domain =           _papi_nvml_set_domain,
01285         .cleanup_eventset =     NULL,
01286         /* called in add_native_events() */
01287         .allocate_registers =   NULL,
01288 
01289         /* Used for overflow/profiling */
01290         .dispatch_timer =       NULL,
01291         .get_overflow_address = NULL,
01292         .stop_profiling =       NULL,
01293         .set_overflow =         NULL,
01294         .set_profile =          NULL,
01295 
01296         /* Name Mapping Functions */
01297         .ntv_enum_events =   _papi_nvml_ntv_enum_events,
01298         .ntv_name_to_code  = NULL,
01299         .ntv_code_to_name =  _papi_nvml_ntv_code_to_name,
01300         .ntv_code_to_descr = _papi_nvml_ntv_code_to_descr,
01301 
01302 };
01303 
 All Data Structures Files Functions Variables Typedefs Enumerations Enumerator Defines