PAPI  5.3.0.0
linux-nvml.c
Go to the documentation of this file.
00001 /****************************/
00002 /* THIS IS OPEN SOURCE CODE */
00003 /****************************/
00004 
00020 #include <dlfcn.h>
00021 
00022 #include <stdio.h>
00023 #include <string.h>
00024 #include <stdlib.h>
00025 #include <inttypes.h>
00026 #include <string.h>
00027 /* Headers required by PAPI */
00028 #include "papi.h"
00029 #include "papi_internal.h"
00030 #include "papi_vector.h"
00031 #include "papi_memory.h"
00032 
00033 #include "linux-nvml.h"
00034 
00035 #include "nvml.h"
00036 #include "cuda.h"
00037 #include "cuda_runtime_api.h"
00038 
00039 void (*_dl_non_dynamic_init)(void) __attribute__((weak));
00040 
00041 /*****  CHANGE PROTOTYPES TO DECLARE CUDA AND NVML LIBRARY SYMBOLS AS WEAK  *****
00042  *  This is done so that a version of PAPI built with the nvml component can    *
00043  *  be installed on a system which does not have the cuda libraries installed.  *
00044  *                                                                              *
00045  *  If this is done without these prototypes, then all papi services on the     *
00046  *  system without the cuda libraries installed will fail.  The PAPI libraries  *
00047  *  contain references to the cuda libraries which are not installed.  The      *
00048  *  load of PAPI commands fails because the cuda library references can not be  *
00049  *  resolved.                                                                   *
00050  *                                                                              *
00051  *  This also defines pointers to the cuda library functions that we call.      *
00052  *  These function pointers will be resolved with dlopen/dlsym calls at         *
00053  *  component initialization time.  The component then calls the cuda library   *
00054  *  functions through these function pointers.                                  *
00055  ********************************************************************************/
00056 #undef CUDAAPI
00057 #define CUDAAPI __attribute__((weak))
00058 CUresult CUDAAPI cuInit(unsigned int);
00059 
00060 CUresult (*cuInitPtr)(unsigned int);
00061 
00062 #undef CUDARTAPI
00063 #define CUDARTAPI __attribute__((weak))
00064 cudaError_t CUDARTAPI cudaGetDevice(int *);
00065 cudaError_t CUDARTAPI cudaGetDeviceCount(int *);
00066 cudaError_t CUDARTAPI cudaDeviceGetPCIBusId(char *, int, int);
00067 
00068 cudaError_t (*cudaGetDevicePtr)(int *);
00069 cudaError_t (*cudaGetDeviceCountPtr)(int *);
00070 cudaError_t (*cudaDeviceGetPCIBusIdPtr)(char *, int, int);
00071 
00072 #undef DECLDIR
00073 #define DECLDIR __attribute__((weak))
00074 nvmlReturn_t DECLDIR nvmlDeviceGetClockInfo                (nvmlDevice_t, nvmlClockType_t, unsigned int *);
00075 const char*  DECLDIR nvmlErrorString                       (nvmlReturn_t);
00076 nvmlReturn_t DECLDIR nvmlDeviceGetDetailedEccErrors        (nvmlDevice_t, nvmlEccBitType_t, nvmlEccCounterType_t, nvmlEccErrorCounts_t *);
00077 nvmlReturn_t DECLDIR nvmlDeviceGetFanSpeed                 (nvmlDevice_t, unsigned int *);
00078 nvmlReturn_t DECLDIR nvmlDeviceGetMemoryInfo               (nvmlDevice_t, nvmlMemory_t *);
00079 nvmlReturn_t DECLDIR nvmlDeviceGetPerformanceState         (nvmlDevice_t, nvmlPstates_t *);
00080 nvmlReturn_t DECLDIR nvmlDeviceGetPowerUsage               (nvmlDevice_t, unsigned int *);
00081 nvmlReturn_t DECLDIR nvmlDeviceGetTemperature              (nvmlDevice_t, nvmlTemperatureSensors_t, unsigned int *);
00082 nvmlReturn_t DECLDIR nvmlDeviceGetTotalEccErrors           (nvmlDevice_t, nvmlEccBitType_t, nvmlEccCounterType_t, unsigned long long *);
00083 nvmlReturn_t DECLDIR nvmlDeviceGetUtilizationRates         (nvmlDevice_t, nvmlUtilization_t *);
00084 nvmlReturn_t DECLDIR nvmlDeviceGetHandleByIndex            (unsigned int, nvmlDevice_t *);
00085 nvmlReturn_t DECLDIR nvmlDeviceGetPciInfo                  (nvmlDevice_t, nvmlPciInfo_t *);
00086 nvmlReturn_t DECLDIR nvmlDeviceGetName                     (nvmlDevice_t, char *, unsigned int);
00087 nvmlReturn_t DECLDIR nvmlDeviceGetInforomVersion           (nvmlDevice_t, nvmlInforomObject_t, char *, unsigned int);
00088 nvmlReturn_t DECLDIR nvmlDeviceGetEccMode                  (nvmlDevice_t, nvmlEnableState_t *, nvmlEnableState_t *);
00089 nvmlReturn_t DECLDIR nvmlInit                              (void);
00090 nvmlReturn_t DECLDIR nvmlDeviceGetCount                    (unsigned int *);
00091 nvmlReturn_t DECLDIR nvmlShutdown                          (void);
00092 
00093 nvmlReturn_t       (*nvmlDeviceGetClockInfoPtr)            (nvmlDevice_t, nvmlClockType_t, unsigned int *);
00094 char*              (*nvmlErrorStringPtr)                   (nvmlReturn_t);
00095 nvmlReturn_t       (*nvmlDeviceGetDetailedEccErrorsPtr)    (nvmlDevice_t, nvmlEccBitType_t, nvmlEccCounterType_t, nvmlEccErrorCounts_t *);
00096 nvmlReturn_t       (*nvmlDeviceGetFanSpeedPtr)             (nvmlDevice_t, unsigned int *);
00097 nvmlReturn_t       (*nvmlDeviceGetMemoryInfoPtr)           (nvmlDevice_t, nvmlMemory_t *);
00098 nvmlReturn_t       (*nvmlDeviceGetPerformanceStatePtr)     (nvmlDevice_t, nvmlPstates_t *);
00099 nvmlReturn_t       (*nvmlDeviceGetPowerUsagePtr)           (nvmlDevice_t, unsigned int *);
00100 nvmlReturn_t       (*nvmlDeviceGetTemperaturePtr)          (nvmlDevice_t, nvmlTemperatureSensors_t, unsigned int *);
00101 nvmlReturn_t       (*nvmlDeviceGetTotalEccErrorsPtr)       (nvmlDevice_t, nvmlEccBitType_t, nvmlEccCounterType_t, unsigned long long *);
00102 nvmlReturn_t       (*nvmlDeviceGetUtilizationRatesPtr)     (nvmlDevice_t, nvmlUtilization_t *);
00103 nvmlReturn_t       (*nvmlDeviceGetHandleByIndexPtr)        (unsigned int, nvmlDevice_t *);
00104 nvmlReturn_t       (*nvmlDeviceGetPciInfoPtr)              (nvmlDevice_t, nvmlPciInfo_t *);
00105 nvmlReturn_t       (*nvmlDeviceGetNamePtr)                 (nvmlDevice_t, char *, unsigned int);
00106 nvmlReturn_t       (*nvmlDeviceGetInforomVersionPtr)       (nvmlDevice_t, nvmlInforomObject_t, char *, unsigned int);
00107 nvmlReturn_t       (*nvmlDeviceGetEccModePtr)              (nvmlDevice_t, nvmlEnableState_t *, nvmlEnableState_t *);
00108 nvmlReturn_t       (*nvmlInitPtr)                          (void);
00109 nvmlReturn_t       (*nvmlDeviceGetCountPtr)                (unsigned int *);
00110 nvmlReturn_t       (*nvmlShutdownPtr)                      (void);
00111 
00112 
00113 // file handles used to access cuda libraries with dlopen
00114 static void* dl1 = NULL;
00115 static void* dl2 = NULL;
00116 static void* dl3 = NULL;
00117 
00118 static int linkCudaLibraries ();
00119 
00120 
00121 /* Declare our vector in advance */
00122 papi_vector_t _nvml_vector;
00123 
00124 /* upto 25 events per card how many cards per system should we allow for?! */
00125 #define NVML_MAX_COUNTERS 100
00126 
00130 typedef struct nvml_control_state
00131 {
00132         int num_events;
00133         int which_counter[NVML_MAX_COUNTERS];
00134         long long counter[NVML_MAX_COUNTERS];   
00135 } nvml_control_state_t;
00136 
00138 typedef struct nvml_context
00139 {
00140         nvml_control_state_t state;
00141 } nvml_context_t;
00142 
00144 static nvml_native_event_entry_t *nvml_native_table=NULL;
00145 
00147 static int device_count = 0;
00148 
00150 static int num_events = 0;
00151 
00152 static nvmlDevice_t* devices=NULL;
00153 static int*          features=NULL;
00154 
00155 unsigned long long
00156 getClockSpeed( nvmlDevice_t dev, nvmlClockType_t which_one )
00157 {
00158         unsigned int ret = 0;
00159         nvmlReturn_t bad; 
00160         bad = (*nvmlDeviceGetClockInfoPtr)( dev, which_one, &ret );
00161 
00162         if ( NVML_SUCCESS != bad ) {
00163                 SUBDBG( "something went wrong %s\n", (*nvmlErrorStringPtr)(bad));
00164         }
00165 
00166         return (unsigned long long)ret;
00167 }
00168 
00169         unsigned long long
00170 getEccLocalErrors( nvmlDevice_t dev, nvmlEccBitType_t bits, int which_one)
00171 {
00172         nvmlEccErrorCounts_t counts;
00173 
00174         nvmlReturn_t bad; 
00175         bad = (*nvmlDeviceGetDetailedEccErrorsPtr)( dev, bits, NVML_VOLATILE_ECC , &counts);
00176 
00177         if ( NVML_SUCCESS != bad ) {
00178                 SUBDBG( "something went wrong %s\n", (*nvmlErrorStringPtr)(bad));
00179         }
00180 
00181 
00182         switch ( which_one ) {
00183                 case LOCAL_ECC_REGFILE:
00184                         return counts.registerFile;
00185                 case LOCAL_ECC_L1:
00186                         return counts.l1Cache;
00187                 case LOCAL_ECC_L2:
00188                         return counts.l2Cache;
00189                 case LOCAL_ECC_MEM:
00190                         return counts.deviceMemory;
00191                 default:
00192                         ;
00193         }
00194         return (unsigned long long)-1;
00195 }
00196 
00197         unsigned long long 
00198 getFanSpeed( nvmlDevice_t dev ) 
00199 {
00200         unsigned int ret = 0;
00201         nvmlReturn_t bad; 
00202         bad = (*nvmlDeviceGetFanSpeedPtr)( dev, &ret );
00203 
00204         if ( NVML_SUCCESS != bad ) {
00205                 SUBDBG( "something went wrong %s\n", (*nvmlErrorStringPtr)(bad));
00206         }
00207 
00208 
00209         return (unsigned long long)ret; 
00210 }
00211 
00212         unsigned long long
00213 getMaxClockSpeed( nvmlDevice_t dev, nvmlClockType_t which_one)
00214 {
00215         unsigned int ret = 0;
00216         nvmlReturn_t bad; 
00217         bad = (*nvmlDeviceGetClockInfoPtr)( dev, which_one, &ret );
00218 
00219         if ( NVML_SUCCESS != bad ) {
00220                 SUBDBG( "something went wrong %s\n", (*nvmlErrorStringPtr)(bad));
00221         }
00222 
00223 
00224         return (unsigned long long) ret;
00225 }
00226 
00227         unsigned long long
00228 getMemoryInfo( nvmlDevice_t dev, int which_one )
00229 {
00230         nvmlMemory_t meminfo;
00231         nvmlReturn_t bad; 
00232         bad = (*nvmlDeviceGetMemoryInfoPtr)( dev, &meminfo );
00233 
00234         if ( NVML_SUCCESS != bad ) {
00235                 SUBDBG( "something went wrong %s\n", (*nvmlErrorStringPtr)(bad));
00236         }
00237 
00238         switch (which_one) {
00239                 case MEMINFO_TOTAL_MEMORY:
00240                         return meminfo.total;
00241                 case MEMINFO_UNALLOCED:
00242                         return meminfo.free;
00243                 case MEMINFO_ALLOCED:
00244                         return meminfo.used;
00245                 default:
00246                         ;
00247         }
00248         return (unsigned long long)-1;
00249 }
00250 
00251         unsigned long long
00252 getPState( nvmlDevice_t dev ) 
00253 {
00254         unsigned int ret = 0;
00255         nvmlPstates_t state = NVML_PSTATE_15;
00256         nvmlReturn_t bad; 
00257         bad = (*nvmlDeviceGetPerformanceStatePtr)( dev, &state );
00258 
00259         if ( NVML_SUCCESS != bad ) {
00260                 SUBDBG( "something went wrong %s\n", (*nvmlErrorStringPtr)(bad));
00261         }
00262 
00263 
00264         switch ( state ) {
00265                 case NVML_PSTATE_15:
00266                         ret++;
00267                 case NVML_PSTATE_14:
00268                         ret++;
00269                 case NVML_PSTATE_13:
00270                         ret++;
00271                 case NVML_PSTATE_12:
00272                         ret++;
00273                 case NVML_PSTATE_11:
00274                         ret++;
00275                 case NVML_PSTATE_10:
00276                         ret++;
00277                 case NVML_PSTATE_9:
00278                         ret++;
00279                 case NVML_PSTATE_8:
00280                         ret++;
00281                 case NVML_PSTATE_7:
00282                         ret++;
00283                 case NVML_PSTATE_6:
00284                         ret++;
00285                 case NVML_PSTATE_5:
00286                         ret++;
00287                 case NVML_PSTATE_4:
00288                         ret++;
00289                 case NVML_PSTATE_3:
00290                         ret++;
00291                 case NVML_PSTATE_2:
00292                         ret++;
00293                 case NVML_PSTATE_1:
00294                         ret++;
00295                 case NVML_PSTATE_0:
00296                         break;
00297                 case NVML_PSTATE_UNKNOWN:
00298                 default:
00299                         /* This should never happen? 
00300                          * The API docs just state Unknown performance state... */
00301                         return (unsigned long long) -1;
00302         }
00303 
00304         return (unsigned long long)ret;
00305 }
00306 
00307         unsigned long long
00308 getPowerUsage( nvmlDevice_t dev )
00309 {
00310         unsigned int power;
00311         nvmlReturn_t bad; 
00312         bad = (*nvmlDeviceGetPowerUsagePtr)( dev, &power );
00313 
00314         if ( NVML_SUCCESS != bad ) {
00315                 SUBDBG( "something went wrong %s\n", (*nvmlErrorStringPtr)(bad));
00316         }
00317 
00318 
00319         return (unsigned long long) power;
00320 }
00321 
00322         unsigned long long
00323 getTemperature( nvmlDevice_t dev )
00324 {
00325         unsigned int ret = 0;
00326         nvmlReturn_t bad; 
00327         bad = (*nvmlDeviceGetTemperaturePtr)( dev, NVML_TEMPERATURE_GPU, &ret );
00328 
00329         if ( NVML_SUCCESS != bad ) {
00330                 SUBDBG( "something went wrong %s\n", (*nvmlErrorStringPtr)(bad));
00331         }
00332 
00333 
00334         return (unsigned long long)ret;
00335 }
00336 
00337         unsigned long long
00338 getTotalEccErrors( nvmlDevice_t dev, nvmlEccBitType_t bits) 
00339 {
00340         unsigned long long counts = 0;
00341         nvmlReturn_t bad; 
00342         bad = (*nvmlDeviceGetTotalEccErrorsPtr)( dev, bits, NVML_VOLATILE_ECC , &counts);
00343 
00344         if ( NVML_SUCCESS != bad ) {
00345                 SUBDBG( "something went wrong %s\n", (*nvmlErrorStringPtr)(bad));
00346         }
00347 
00348 
00349         return counts;
00350 }
00351 
00352 /*  0 => gpu util
00353     1 => memory util
00354  */
00355         unsigned long long
00356 getUtilization( nvmlDevice_t dev, int which_one )
00357 {
00358         nvmlUtilization_t util;
00359         nvmlReturn_t bad; 
00360         bad = (*nvmlDeviceGetUtilizationRatesPtr)( dev, &util );
00361 
00362         if ( NVML_SUCCESS != bad ) {
00363                 SUBDBG( "something went wrong %s\n", (*nvmlErrorStringPtr)(bad));
00364         }
00365 
00366 
00367         switch (which_one) {
00368                 case GPU_UTILIZATION:
00369                         return (unsigned long long) util.gpu;
00370                 case MEMORY_UTILIZATION:
00371                         return (unsigned long long) util.memory;
00372                 default:
00373                         ;
00374         }
00375 
00376         return (unsigned long long) -1;
00377 }
00378 
00379         static void
00380 nvml_hardware_reset(  )
00381 {
00382         /* nvmlDeviceSet* and nvmlDeviceClear* calls require root/admin access, so while 
00383          * possible to implement a reset on the ECC counters, we pass */
00384         /* 
00385            int i;
00386            for ( i=0; i < device_count; i++ )
00387            nvmlDeviceClearEccErrorCounts( device[i], NVML_VOLATILE_ECC ); 
00388          */
00389 }
00390 
00392 /*   You might replace this with code that accesses       */
00393 /*   hardware or reads values from the operatings system. */
00394         static int 
00395 nvml_hardware_read( long long *value, int which_one)
00396         //, nvml_context_t *ctx)
00397 {
00398         nvml_native_event_entry_t *entry;
00399         nvmlDevice_t handle;
00400         int cudaIdx = -1;
00401 
00402         entry = &nvml_native_table[which_one];
00403         *value = (long long) -1;
00404         /* replace entry->resources with the current cuda_device->nvml device */
00405         (*cudaGetDevicePtr)( &cudaIdx );
00406 
00407         if ( cudaIdx < 0 || cudaIdx > device_count )
00408             return PAPI_EINVAL;
00409 
00410         /* Make sure the device we are running on has the requested event */
00411         if ( !HAS_FEATURE( features[cudaIdx] , entry->type) ) 
00412                 return PAPI_EINVAL;
00413 
00414         handle = devices[cudaIdx];
00415 
00416         switch (entry->type) {
00417                 case FEATURE_CLOCK_INFO:
00418                         *value =  getClockSpeed(    handle, 
00419                                         (nvmlClockType_t)entry->options.clock );
00420                         break;
00421                 case FEATURE_ECC_LOCAL_ERRORS:
00422                         *value = getEccLocalErrors(     handle, 
00423                                         (nvmlEccBitType_t)entry->options.ecc_opts.bits, 
00424                                         (int)entry->options.ecc_opts.which_one);
00425                         break;
00426                 case FEATURE_FAN_SPEED:
00427                         *value = getFanSpeed( handle );
00428                         break;
00429                 case FEATURE_MAX_CLOCK:
00430                         *value = getMaxClockSpeed(  handle, 
00431                                         (nvmlClockType_t)entry->options.clock );
00432                         break;
00433                 case FEATURE_MEMORY_INFO:
00434                         *value = getMemoryInfo(     handle, 
00435                                         (int)entry->options.which_one );
00436                         break;
00437                 case FEATURE_PERF_STATES:
00438                         *value = getPState( handle );
00439                         break;
00440                 case FEATURE_POWER:
00441                         *value = getPowerUsage( handle );
00442                         break;
00443                 case FEATURE_TEMP:
00444                         *value = getTemperature( handle );
00445                         break;
00446                 case FEATURE_ECC_TOTAL_ERRORS:
00447                         *value = getTotalEccErrors(     handle, 
00448                                         (nvmlEccBitType_t)entry->options.ecc_opts.bits );
00449                         break;
00450                 case FEATURE_UTILIZATION:
00451                         *value = getUtilization(    handle, 
00452                                         (int)entry->options.which_one );
00453                         break;
00454                 default:
00455                         return PAPI_EINVAL;
00456         }
00457 
00458         return PAPI_OK;
00459 
00460 
00461 }
00462 
00463 /********************************************************************/
00464 /* Below are the functions required by the PAPI component interface */
00465 /********************************************************************/
00466 
00468         int
00469 _papi_nvml_init_thread( hwd_context_t * ctx )
00470 {
00471         (void) ctx;
00472 
00473         SUBDBG( "Enter: ctx: %p\n", ctx );
00474 
00475         return PAPI_OK;
00476 }
00477 
00478         static int 
00479 detectDevices( ) 
00480 {
00481         nvmlReturn_t ret;
00482         nvmlEnableState_t mode = NVML_FEATURE_DISABLED;
00483         nvmlDevice_t handle;
00484         nvmlPciInfo_t info;
00485 
00486         cudaError_t cuerr;
00487 
00488         char busId[16];
00489         char name[64];
00490         char inforomECC[16];
00491         char inforomPower[16];
00492         char names[device_count][64];
00493         char nvml_busIds[device_count][16];
00494 
00495         float ecc_version = 0.0, power_version = 0.0;
00496 
00497         int i = 0,
00498             j = 0;
00499         int isTesla = 0;
00500         int isFermi = 0;
00501         int isUnique = 1;
00502 
00503         unsigned int temp = 0;
00504 
00505 
00506         /* list of nvml pci_busids */
00507     for (i=0; i < device_count; i++) {
00508         ret = (*nvmlDeviceGetHandleByIndexPtr)( i, &handle );
00509         if ( NVML_SUCCESS != ret ) {
00510             SUBDBG("nvmlDeviceGetHandleByIndex(%d) failed\n", i);
00511             return PAPI_ESYS;
00512         }
00513 
00514         ret = (*nvmlDeviceGetPciInfoPtr)( handle, &info );
00515         if ( NVML_SUCCESS != ret ) {
00516             SUBDBG("nvmlDeviceGetPciInfo() failed %s\n", (*nvmlErrorStringPtr)(ret) );
00517             return PAPI_ESYS;
00518         }
00519         strncpy(nvml_busIds[i], info.busId, 16);
00520     }
00521 
00522     /* We want to key our list of nvmlDevice_ts by each device's cuda index */
00523     for (i=0; i < device_count; i++) {
00524             cuerr = (*cudaDeviceGetPCIBusIdPtr)( busId, 16, i );
00525             if ( CUDA_SUCCESS != cuerr ) {
00526                 SUBDBG("cudaDeviceGetPCIBusId failed.\n");
00527                 return PAPI_ESYS;
00528             }
00529             for (j=0; j < device_count; j++ ) {
00530                     if ( !strncmp( busId, nvml_busIds[j], 16) ) {
00531                             ret = (*nvmlDeviceGetHandleByIndexPtr)(j, &devices[i] );
00532                             if ( NVML_SUCCESS != ret ) {
00533                                 SUBDBG("nvmlDeviceGetHandleByIndex(%d, &devices[%d]) failed.\n", j, i);
00534                                 return PAPI_ESYS;
00535                             }
00536                             break;
00537                     }
00538             }   
00539     }
00540 
00541         memset(names, 0x0, device_count*64);
00542         /* So for each card, check whats querable */
00543         for (i=0; i < device_count; i++ ) {
00544                 isTesla=0;
00545                 isFermi=1;
00546                 isUnique = 1;
00547                 features[i] = 0;
00548 
00549                 ret = (*nvmlDeviceGetNamePtr)( devices[i], name, 64 );
00550                 if ( NVML_SUCCESS != ret) {
00551                     SUBDBG("nvmlDeviceGetName failed \n");
00552                     return PAPI_ESYS;
00553                 }
00554 
00555                 for (j=0; j < i; j++ ) 
00556                         if ( 0 == strncmp( name, names[j], 64 ) ) {
00557                                 /* if we have a match, and IF everything is sane, 
00558                                  * devices with the same name eg Tesla C2075 share features */
00559                                 isUnique = 0;
00560                                 features[i] = features[j];
00561 
00562                         }
00563 
00564                 if ( isUnique ) {
00565                         ret = (*nvmlDeviceGetInforomVersionPtr)( devices[i], NVML_INFOROM_ECC, inforomECC, 16);
00566                         if ( NVML_SUCCESS != ret ) {
00567                                 SUBDBG("nvmlGetInforomVersion carps %s\n", (*nvmlErrorStringPtr)(ret ) );
00568                                 isFermi = 0;
00569                         }
00570                         ret = (*nvmlDeviceGetInforomVersionPtr)( devices[i], NVML_INFOROM_POWER, inforomPower, 16);
00571                         if ( NVML_SUCCESS != ret ) {
00572                                 /* This implies the card is older then Fermi */
00573                                 SUBDBG("nvmlGetInforomVersion carps %s\n", (*nvmlErrorStringPtr)(ret ) );
00574                                 SUBDBG("Based upon the return to nvmlGetInforomVersion, we conclude this card is older then Fermi.\n");
00575                                 isFermi = 0;
00576                         } 
00577 
00578                         ecc_version = strtof(inforomECC, NULL );
00579                         power_version = strtof( inforomPower, NULL);
00580 
00581                         ret = (*nvmlDeviceGetNamePtr)( devices[i], name, 64 );
00582                         isTesla = ( NULL == strstr(name, "Tesla") ) ? 0:1;
00583 
00584                         /* For Tesla and Quadro products from Fermi and Kepler families. */
00585                         if ( isFermi ) {
00586                                 features[i] |= FEATURE_CLOCK_INFO;
00587                                 num_events += 3;
00588                         }
00589 
00590                         /*  For Tesla and Quadro products from Fermi and Kepler families. 
00591                             requires NVML_INFOROM_ECC 2.0 or higher for location-based counts
00592                             requires NVML_INFOROM_ECC 1.0 or higher for all other ECC counts
00593                             requires ECC mode to be enabled. */
00594                         ret = (*nvmlDeviceGetEccModePtr)( devices[i], &mode, NULL );
00595                         if ( NVML_SUCCESS == ret ) {
00596                             if ( NVML_FEATURE_ENABLED == mode) {
00597                             if ( ecc_version >= 2.0 ) {
00598                                 features[i] |= FEATURE_ECC_LOCAL_ERRORS;
00599                                 num_events += 8; /* {single bit, two bit errors} x { reg, l1, l2, memory } */
00600                             }
00601                             if ( ecc_version >= 1.0 ) {
00602                                 features[i] |= FEATURE_ECC_TOTAL_ERRORS;
00603                                 num_events += 2; /* single bit errors, double bit errors */
00604                             }
00605                             }
00606                         } else {
00607                             SUBDBG("nvmlDeviceGetEccMode does not appear to be supported. (nvml\
00608 return code %d)\n", ret);
00609                         }
00610 
00611                         /* For all discrete products with dedicated fans */
00612                         features[i] |= FEATURE_FAN_SPEED;
00613                         num_events++;
00614 
00615                         /* For Tesla and Quadro products from Fermi and Kepler families. */
00616                         if ( isFermi ) {
00617                                 features[i] |= FEATURE_MAX_CLOCK;
00618                                 num_events += 3;
00619                         }
00620 
00621                         /* For all products */
00622                         features[i] |= FEATURE_MEMORY_INFO;
00623                         num_events += 3; /* total, free, used */
00624 
00625                         /* For Tesla and Quadro products from the Fermi and Kepler families. */
00626                         if ( isFermi ) {
00627                                 features[i] |= FEATURE_PERF_STATES;
00628                                 num_events++;
00629                         }
00630 
00631                         /*  For "GF11x" Tesla and Quadro products from the Fermi family
00632                             requires NVML_INFOROM_POWER 3.0 or higher
00633                             For Tesla and Quadro products from the Kepler family
00634                             does not require NVML_INFOROM_POWER */
00635                         /* Just try reading power, if it works, enable it*/
00636                         ret = (*nvmlDeviceGetPowerUsagePtr)( devices[i], &temp);
00637                         if ( NVML_SUCCESS == ret ) {
00638                             features[i] |= FEATURE_POWER;
00639                             num_events++;
00640                         } else {
00641                             SUBDBG("nvmlDeviceGetPowerUsage does not appear to be supported on\
00642 this card. (nvml return code %d)\n", ret );
00643                         }
00644 
00645                         /* For all discrete and S-class products. */
00646                         features[i] |= FEATURE_TEMP;
00647                         num_events++;
00648 
00649                         /* For Tesla and Quadro products from the Fermi and Kepler families */
00650                         if (isFermi) {
00651                                 features[i] |= FEATURE_UTILIZATION;
00652                                 num_events += 2;
00653                         }
00654 
00655                         strncpy( names[i], name, 64); 
00656 
00657                 }
00658         }
00659         return PAPI_OK;
00660 }
00661 
00662     static void
00663 createNativeEvents( )
00664 {
00665         char name[64];
00666         char sanitized_name[PAPI_MAX_STR_LEN];
00667         char names[device_count][64];
00668 
00669         int i, nameLen = 0, j;
00670         int isUnique = 1;
00671 
00672         nvml_native_event_entry_t* entry;
00673         nvmlReturn_t ret;
00674 
00675         nvml_native_table = (nvml_native_event_entry_t*) papi_malloc( 
00676                         sizeof(nvml_native_event_entry_t) * num_events );   
00677         memset( nvml_native_table, 0x0, sizeof(nvml_native_event_entry_t) * num_events );
00678         entry = &nvml_native_table[0];
00679 
00680         for (i=0; i < device_count; i++ ) {
00681                 memset( names[i], 0x0, 64 );
00682                 isUnique = 1;
00683                 ret = (*nvmlDeviceGetNamePtr)( devices[i], name, 64 );
00684 
00685                 for (j=0; j < i; j++ ) 
00686                 {
00687                         if ( 0 == strncmp( name, names[j], 64 ) )
00688                                 isUnique = 0;
00689                 }
00690 
00691                 if ( isUnique ) {
00692                         nameLen = strlen(name);
00693                         strncpy(sanitized_name, name, PAPI_MAX_STR_LEN );
00694                         for (j=0; j < nameLen; j++)
00695                                 if ( ' ' == sanitized_name[j] )
00696                                         sanitized_name[j] = '_';
00697 
00698 
00699 
00700                         if ( HAS_FEATURE( features[i], FEATURE_CLOCK_INFO ) ) {
00701                                 sprintf( entry->name, "%s:graphics_clock", sanitized_name );
00702                                 strncpy(entry->description,"Graphics clock domain (MHz).", PAPI_MAX_STR_LEN );
00703                                 entry->options.clock = NVML_CLOCK_GRAPHICS;
00704                                 entry->type = FEATURE_CLOCK_INFO;
00705                                 entry++;
00706 
00707                                 sprintf( entry->name, "%s:sm_clock", sanitized_name);
00708                                 strncpy(entry->description,"SM clock domain (MHz).", PAPI_MAX_STR_LEN);
00709                                 entry->options.clock = NVML_CLOCK_SM;
00710                                 entry->type = FEATURE_CLOCK_INFO;
00711                                 entry++;
00712 
00713                                 sprintf( entry->name, "%s:memory_clock", sanitized_name);
00714                                 strncpy(entry->description,"Memory clock domain (MHz).", PAPI_MAX_STR_LEN);
00715                                 entry->options.clock = NVML_CLOCK_MEM;
00716                                 entry->type = FEATURE_CLOCK_INFO;
00717                                 entry++;
00718                         }   
00719 
00720                         if ( HAS_FEATURE( features[i], FEATURE_ECC_LOCAL_ERRORS ) ) { 
00721                                 sprintf(entry->name, "%s:l1_single_ecc_errors", sanitized_name);
00722                                 strncpy(entry->description,"L1 cache single bit ECC", PAPI_MAX_STR_LEN);
00723                                 entry->options.ecc_opts = (struct local_ecc){
00724                                         .bits = NVML_SINGLE_BIT_ECC,
00725                                                 .which_one = LOCAL_ECC_L1,
00726                                 };
00727                                 entry->type = FEATURE_ECC_LOCAL_ERRORS;
00728                                 entry++;
00729 
00730                                 sprintf(entry->name, "%s:l2_single_ecc_errors", sanitized_name);
00731                                 strncpy(entry->description,"L2 cache single bit ECC", PAPI_MAX_STR_LEN);
00732                                 entry->options.ecc_opts = (struct local_ecc){
00733                                         .bits = NVML_SINGLE_BIT_ECC,
00734                                                 .which_one = LOCAL_ECC_L2,
00735                                 };
00736                                 entry->type = FEATURE_ECC_LOCAL_ERRORS;
00737                                 entry++;
00738 
00739                                 sprintf(entry->name, "%s:memory_single_ecc_errors", sanitized_name);
00740                                 strncpy(entry->description,"Device memory single bit ECC", PAPI_MAX_STR_LEN);
00741                                 entry->options.ecc_opts = (struct local_ecc){
00742                                         .bits = NVML_SINGLE_BIT_ECC,
00743                                                 .which_one = LOCAL_ECC_MEM,
00744                                 };
00745                                 entry->type = FEATURE_ECC_LOCAL_ERRORS;
00746                                 entry++;
00747 
00748                                 sprintf(entry->name, "%s:regfile_single_ecc_errors", sanitized_name);
00749                                 strncpy(entry->description,"Register file single bit ECC", PAPI_MAX_STR_LEN);
00750                                 entry->options.ecc_opts = (struct local_ecc){
00751                                         .bits = NVML_SINGLE_BIT_ECC,
00752                                                 .which_one = LOCAL_ECC_REGFILE,
00753                                 };
00754                                 entry->type = FEATURE_ECC_LOCAL_ERRORS;
00755                                 entry++;
00756 
00757                                 sprintf(entry->name, "%s:1l_double_ecc_errors", sanitized_name);
00758                                 strncpy(entry->description,"L1 cache double bit ECC", PAPI_MAX_STR_LEN);
00759                                 entry->options.ecc_opts = (struct local_ecc){
00760                                         .bits = NVML_DOUBLE_BIT_ECC,
00761                                                 .which_one = LOCAL_ECC_L1,
00762                                 };
00763                                 entry->type = FEATURE_ECC_LOCAL_ERRORS;
00764                                 entry++;
00765 
00766                                 sprintf(entry->name, "%s:l2_double_ecc_errors", sanitized_name);
00767                                 strncpy(entry->description,"L2 cache double bit ECC", PAPI_MAX_STR_LEN);
00768                                 entry->options.ecc_opts = (struct local_ecc){
00769                                         .bits = NVML_DOUBLE_BIT_ECC,
00770                                                 .which_one = LOCAL_ECC_L2,
00771                                 };
00772                                 entry->type = FEATURE_ECC_LOCAL_ERRORS;
00773                                 entry++;
00774 
00775                                 sprintf(entry->name, "%s:memory_double_ecc_errors", sanitized_name);
00776                                 strncpy(entry->description,"Device memory double bit ECC", PAPI_MAX_STR_LEN);
00777                                 entry->options.ecc_opts = (struct local_ecc){
00778                                         .bits = NVML_DOUBLE_BIT_ECC,
00779                                                 .which_one = LOCAL_ECC_MEM,
00780                                 };
00781                                 entry->type = FEATURE_ECC_LOCAL_ERRORS;
00782                                 entry++;
00783 
00784                                 sprintf(entry->name, "%s:regfile_double_ecc_errors", sanitized_name);
00785                                 strncpy(entry->description,"Register file double bit ECC", PAPI_MAX_STR_LEN);
00786                                 entry->options.ecc_opts = (struct local_ecc){
00787                                         .bits = NVML_DOUBLE_BIT_ECC,
00788                                                 .which_one = LOCAL_ECC_REGFILE,
00789                                 };
00790                                 entry->type = FEATURE_ECC_LOCAL_ERRORS;
00791                                 entry++;
00792                         }
00793 
00794                         if ( HAS_FEATURE( features[i], FEATURE_FAN_SPEED ) ) {
00795                                 sprintf( entry->name, "%s:fan_speed", sanitized_name);
00796                                 strncpy(entry->description,"The fan speed expressed as a percent of the maximum, i.e. full speed is 100%", PAPI_MAX_STR_LEN);
00797                                 entry->type = FEATURE_FAN_SPEED;
00798                                 entry++;
00799                         }
00800 
00801                         if ( HAS_FEATURE( features[i], FEATURE_MAX_CLOCK ) ) {
00802                                 sprintf( entry->name, "%s:graphics_max_clock", sanitized_name);
00803                                 strncpy(entry->description,"Maximal Graphics clock domain (MHz).", PAPI_MAX_STR_LEN);
00804                                 entry->options.clock = NVML_CLOCK_GRAPHICS;
00805                                 entry->type = FEATURE_MAX_CLOCK;
00806                                 entry++;
00807 
00808                                 sprintf( entry->name, "%s:sm_max_clock", sanitized_name);
00809                                 strncpy(entry->description,"Maximal SM clock domain (MHz).", PAPI_MAX_STR_LEN);
00810                                 entry->options.clock = NVML_CLOCK_SM;
00811                                 entry->type = FEATURE_MAX_CLOCK;
00812                                 entry++;
00813 
00814                                 sprintf( entry->name, "%s:memory_max_clock", sanitized_name);
00815                                 strncpy(entry->description,"Maximal Memory clock domain (MHz).", PAPI_MAX_STR_LEN);
00816                                 entry->options.clock = NVML_CLOCK_MEM;
00817                                 entry->type = FEATURE_MAX_CLOCK;
00818                                 entry++;
00819                         }
00820 
00821                         if ( HAS_FEATURE( features[i], FEATURE_MEMORY_INFO ) ) {
00822                                 sprintf( entry->name, "%s:total_memory", sanitized_name);
00823                                 strncpy(entry->description,"Total installed FB memory (in bytes).", PAPI_MAX_STR_LEN);
00824                                 entry->options.which_one = MEMINFO_TOTAL_MEMORY;
00825                                 entry->type = FEATURE_MEMORY_INFO;
00826                                 entry++;
00827 
00828                                 sprintf( entry->name, "%s:unallocated_memory", sanitized_name);
00829                                 strncpy(entry->description,"Uncallocated FB memory (in bytes).", PAPI_MAX_STR_LEN);
00830                                 entry->options.which_one = MEMINFO_UNALLOCED;
00831                                 entry->type = FEATURE_MEMORY_INFO;
00832                                 entry++;
00833 
00834                                 sprintf( entry->name, "%s:allocated_memory", sanitized_name);
00835                                 strncpy(entry->description, "Allocated FB memory (in bytes). Note that the driver/GPU always sets aside a small amount of memory for bookkeeping.", PAPI_MAX_STR_LEN);
00836                                 entry->options.which_one = MEMINFO_ALLOCED;
00837                                 entry->type = FEATURE_MEMORY_INFO;
00838                                 entry++;
00839                         }
00840 
00841                         if ( HAS_FEATURE( features[i], FEATURE_PERF_STATES ) ) {
00842                                 sprintf( entry->name, "%s:pstate", sanitized_name);
00843                                 strncpy(entry->description,"The performance state of the device.", PAPI_MAX_STR_LEN);
00844                                 entry->type = FEATURE_PERF_STATES;
00845                                 entry++;
00846                         }
00847 
00848                         if ( HAS_FEATURE( features[i], FEATURE_POWER ) ) {
00849                                 sprintf( entry->name, "%s:power", sanitized_name);
00850                                 strncpy(entry->description,"Power usage reading for the device, in miliwatts. This is the power draw for the entire board, including GPU, memory, etc.\n The reading is accurate to within a range of +/-5 watts.", PAPI_MAX_STR_LEN);
00851                                 entry->type = FEATURE_POWER;
00852                                 entry++;
00853                         }
00854 
00855                         if ( HAS_FEATURE( features[i], FEATURE_TEMP ) ) {
00856                                 sprintf( entry->name, "%s:temperature", sanitized_name);
00857                                 strncpy(entry->description,"Current temperature readings for the device, in degrees C.", PAPI_MAX_STR_LEN);
00858                                 entry->type = FEATURE_TEMP;
00859                                 entry++;
00860                         }
00861 
00862                         if ( HAS_FEATURE( features[i], FEATURE_ECC_TOTAL_ERRORS ) ) {
00863                                 sprintf( entry->name, "%s:total_ecc_errors", sanitized_name);
00864                                 strncpy(entry->description,"Total single bit errors.", PAPI_MAX_STR_LEN);
00865                                 entry->options.ecc_opts = (struct local_ecc){ 
00866                                         .bits = NVML_SINGLE_BIT_ECC, 
00867                                 };
00868                                 entry->type = FEATURE_ECC_TOTAL_ERRORS;
00869                                 entry++;
00870 
00871                                 sprintf( entry->name, "%s:total_ecc_errors", sanitized_name);
00872                                 strncpy(entry->description,"Total double bit errors.", PAPI_MAX_STR_LEN);
00873                                 entry->options.ecc_opts = (struct local_ecc){ 
00874                                         .bits = NVML_DOUBLE_BIT_ECC, 
00875                                 };
00876                                 entry->type = FEATURE_ECC_TOTAL_ERRORS;
00877                                 entry++;
00878                         }
00879 
00880                         if ( HAS_FEATURE( features[i], FEATURE_UTILIZATION ) ) {
00881                                 sprintf( entry->name, "%s:gpu_utilization", sanitized_name);
00882                                 strncpy(entry->description,"Percent of time over the past second during which one or more kernels was executing on the GPU.", PAPI_MAX_STR_LEN);
00883                                 entry->options.which_one = GPU_UTILIZATION;
00884                                 entry->type = FEATURE_UTILIZATION;
00885                                 entry++;
00886 
00887                                 sprintf( entry->name, "%s:memory_utilization", sanitized_name);
00888                                 strncpy(entry->description,"Percent of time over the past second during which global (device) memory was being read or written.", PAPI_MAX_STR_LEN);
00889                                 entry->options.which_one = MEMORY_UTILIZATION;
00890                                 entry->type = FEATURE_UTILIZATION;
00891                                 entry++;
00892                         }
00893                         strncpy( names[i], name, 64); 
00894                 }
00895         }
00896 }
00897 
00902         int
00903 _papi_nvml_init_component( int cidx )
00904 {
00905         SUBDBG ("Entry: cidx: %d\n", cidx);
00906         nvmlReturn_t ret;
00907         cudaError_t cuerr;
00908         int papi_errorcode;
00909 
00910         int cuda_count = 0;
00911         unsigned int nvml_count = 0;
00912 
00913         /* link in the cuda and nvml libraries and resolve the symbols we need to use */
00914         if (linkCudaLibraries() != PAPI_OK) {
00915             SUBDBG ("Dynamic link of CUDA libraries failed, component will be disabled.\n");
00916             SUBDBG ("See disable reason in papi_component_avail output for more details.\n");
00917             return (PAPI_ENOSUPP);
00918         }
00919 
00920         ret = (*nvmlInitPtr)();
00921         if ( NVML_SUCCESS != ret ) {
00922                 strcpy(_nvml_vector.cmp_info.disabled_reason, "The NVIDIA managament library failed to initialize.");
00923                 return PAPI_ENOSUPP;
00924         }
00925 
00926         cuerr = (*cuInitPtr)( 0 );
00927         if ( CUDA_SUCCESS != cuerr ) {
00928                 strcpy(_nvml_vector.cmp_info.disabled_reason, "The CUDA library failed to initialize.");
00929                 return PAPI_ENOSUPP;
00930         }
00931 
00932         /* Figure out the number of CUDA devices in the system */
00933         ret = (*nvmlDeviceGetCountPtr)( &nvml_count );
00934         if ( NVML_SUCCESS != ret ) {
00935                 strcpy(_nvml_vector.cmp_info.disabled_reason, "Unable to get a count of devices from the NVIDIA managament library.");
00936                 return PAPI_ENOSUPP;
00937         }
00938 
00939         cuerr = (*cudaGetDeviceCountPtr)( &cuda_count );
00940         if ( CUDA_SUCCESS != cuerr ) {
00941                 strcpy(_nvml_vector.cmp_info.disabled_reason, "Unable to get a device count from CUDA.");
00942                 return PAPI_ENOSUPP;
00943         }
00944 
00945         /* We can probably recover from this, when we're clever */
00946         if ( (cuda_count > 0) && (nvml_count != (unsigned int)cuda_count ) ) {
00947                 strcpy(_nvml_vector.cmp_info.disabled_reason, "Cuda and the NVIDIA managament library have different device counts.");
00948                 return PAPI_ENOSUPP;
00949         }
00950 
00951         device_count = cuda_count;
00952 
00953         /* A per device representation of what events are present */
00954         features = (int*)papi_malloc(sizeof(int) * device_count );
00955 
00956         /* Handles to each device */
00957         devices = (nvmlDevice_t*)papi_malloc(sizeof(nvmlDevice_t) * device_count);
00958 
00959         /* Figure out what events are supported on each card. */
00960         if ( (papi_errorcode = detectDevices( ) ) != PAPI_OK ) {
00961             papi_free(features);
00962             papi_free(devices);
00963             sprintf(_nvml_vector.cmp_info.disabled_reason, "An error occured in device feature detection, please check your NVIDIA Management Library and CUDA install." );
00964             return PAPI_ENOSUPP;
00965         }
00966 
00967         /* The assumption is that if everything went swimmingly in detectDevices, 
00968             all nvml calls here should be fine. */
00969         createNativeEvents( );
00970 
00971         /* Export the total number of events available */
00972         _nvml_vector.cmp_info.num_native_events = num_events;
00973 
00974         /* Export the component id */
00975         _nvml_vector.cmp_info.CmpIdx = cidx;
00976 
00977         /* Export the number of 'counters' */
00978         _nvml_vector.cmp_info.num_cntrs = num_events;
00979         _nvml_vector.cmp_info.num_mpx_cntrs = num_events;
00980 
00981         return PAPI_OK;
00982 }
00983 
00984 
00985 /*
00986  * Link the necessary CUDA libraries to use the cuda component.  If any of them can not be found, then
00987  * the CUDA component will just be disabled.  This is done at runtime so that a version of PAPI built
00988  * with the CUDA component can be installed and used on systems which have the CUDA libraries installed
00989  * and on systems where these libraries are not installed.
00990  */
00991 static int
00992 linkCudaLibraries ()
00993 {
00994     /* Attempt to guess if we were statically linked to libc, if so bail */
00995     if ( _dl_non_dynamic_init != NULL ) {
00996         strncpy(_nvml_vector.cmp_info.disabled_reason, "NVML component does not support statically linking of libc.", PAPI_MAX_STR_LEN);
00997         return PAPI_ENOSUPP;
00998     }
00999 
01000     /* Need to link in the cuda libraries, if not found disable the component */
01001     dl1 = dlopen("libcuda.so", RTLD_NOW | RTLD_GLOBAL);
01002     if (!dl1)
01003     {
01004         strncpy(_nvml_vector.cmp_info.disabled_reason, "CUDA library libcuda.so not found.",PAPI_MAX_STR_LEN);
01005         return ( PAPI_ENOSUPP );
01006     }
01007     cuInitPtr = dlsym(dl1, "cuInit");
01008     if (dlerror() != NULL)
01009     {
01010         strncpy(_nvml_vector.cmp_info.disabled_reason, "CUDA function cuInit not found.",PAPI_MAX_STR_LEN);
01011         return ( PAPI_ENOSUPP );
01012     }
01013 
01014     dl2 = dlopen("libcudart.so", RTLD_NOW | RTLD_GLOBAL);
01015     if (!dl2)
01016     {
01017         strncpy(_nvml_vector.cmp_info.disabled_reason, "CUDA runtime library libcudart.so not found.",PAPI_MAX_STR_LEN);
01018         return ( PAPI_ENOSUPP );
01019     }
01020     cudaGetDevicePtr = dlsym(dl2, "cudaGetDevice");
01021     if (dlerror() != NULL)
01022     {
01023         strncpy(_nvml_vector.cmp_info.disabled_reason, "CUDART function cudaGetDevice not found.",PAPI_MAX_STR_LEN);
01024         return ( PAPI_ENOSUPP );
01025     }
01026     cudaGetDeviceCountPtr = dlsym(dl2, "cudaGetDeviceCount");
01027     if (dlerror() != NULL)
01028     {
01029         strncpy(_nvml_vector.cmp_info.disabled_reason, "CUDART function cudaGetDeviceCount not found.",PAPI_MAX_STR_LEN);
01030         return ( PAPI_ENOSUPP );
01031     }
01032     cudaDeviceGetPCIBusIdPtr = dlsym(dl2, "cudaDeviceGetPCIBusId");
01033     if (dlerror() != NULL)
01034     {
01035         strncpy(_nvml_vector.cmp_info.disabled_reason, "CUDART function cudaDeviceGetPCIBusId not found.",PAPI_MAX_STR_LEN);
01036         return ( PAPI_ENOSUPP );
01037     }
01038 
01039     dl3 = dlopen("libnvidia-ml.so", RTLD_NOW | RTLD_GLOBAL);
01040     if (!dl3)
01041     {
01042         strncpy(_nvml_vector.cmp_info.disabled_reason, "NVML runtime library libnvidia-ml.so not found.",PAPI_MAX_STR_LEN);
01043         return ( PAPI_ENOSUPP );
01044     }
01045     nvmlDeviceGetClockInfoPtr = dlsym(dl3, "nvmlDeviceGetClockInfo");
01046     if (dlerror() != NULL)
01047     {
01048         strncpy(_nvml_vector.cmp_info.disabled_reason, "NVML function nvmlDeviceGetClockInfo not found.",PAPI_MAX_STR_LEN);
01049         return ( PAPI_ENOSUPP );
01050     }
01051     nvmlErrorStringPtr = dlsym(dl3, "nvmlErrorString");
01052     if (dlerror() != NULL)
01053     {
01054         strncpy(_nvml_vector.cmp_info.disabled_reason, "NVML function nvmlErrorString not found.",PAPI_MAX_STR_LEN);
01055         return ( PAPI_ENOSUPP );
01056     }
01057     nvmlDeviceGetDetailedEccErrorsPtr = dlsym(dl3, "nvmlDeviceGetDetailedEccErrors");
01058     if (dlerror() != NULL)
01059     {
01060         strncpy(_nvml_vector.cmp_info.disabled_reason, "NVML function nvmlDeviceGetDetailedEccErrors not found.",PAPI_MAX_STR_LEN);
01061         return ( PAPI_ENOSUPP );
01062     }
01063     nvmlDeviceGetFanSpeedPtr = dlsym(dl3, "nvmlDeviceGetFanSpeed");
01064     if (dlerror() != NULL)
01065     {
01066         strncpy(_nvml_vector.cmp_info.disabled_reason, "NVML function nvmlDeviceGetFanSpeed not found.",PAPI_MAX_STR_LEN);
01067         return ( PAPI_ENOSUPP );
01068     }
01069     nvmlDeviceGetMemoryInfoPtr = dlsym(dl3, "nvmlDeviceGetMemoryInfo");
01070     if (dlerror() != NULL)
01071     {
01072         strncpy(_nvml_vector.cmp_info.disabled_reason, "NVML function nvmlDeviceGetMemoryInfo not found.",PAPI_MAX_STR_LEN);
01073         return ( PAPI_ENOSUPP );
01074     }
01075     nvmlDeviceGetPerformanceStatePtr = dlsym(dl3, "nvmlDeviceGetPerformanceState");
01076     if (dlerror() != NULL)
01077     {
01078         strncpy(_nvml_vector.cmp_info.disabled_reason, "NVML function nvmlDeviceGetPerformanceState not found.",PAPI_MAX_STR_LEN);
01079         return ( PAPI_ENOSUPP );
01080     }
01081     nvmlDeviceGetPowerUsagePtr = dlsym(dl3, "nvmlDeviceGetPowerUsage");
01082     if (dlerror() != NULL)
01083     {
01084         strncpy(_nvml_vector.cmp_info.disabled_reason, "NVML function nvmlDeviceGetPowerUsage not found.",PAPI_MAX_STR_LEN);
01085         return ( PAPI_ENOSUPP );
01086     }
01087     nvmlDeviceGetTemperaturePtr = dlsym(dl3, "nvmlDeviceGetTemperature");
01088     if (dlerror() != NULL)
01089     {
01090         strncpy(_nvml_vector.cmp_info.disabled_reason, "NVML function nvmlDeviceGetTemperature not found.",PAPI_MAX_STR_LEN);
01091         return ( PAPI_ENOSUPP );
01092     }
01093     nvmlDeviceGetTotalEccErrorsPtr = dlsym(dl3, "nvmlDeviceGetTotalEccErrors");
01094     if (dlerror() != NULL)
01095     {
01096         strncpy(_nvml_vector.cmp_info.disabled_reason, "NVML function nvmlDeviceGetTotalEccErrors not found.",PAPI_MAX_STR_LEN);
01097         return ( PAPI_ENOSUPP );
01098     }
01099     nvmlDeviceGetUtilizationRatesPtr = dlsym(dl3, "nvmlDeviceGetUtilizationRates");
01100     if (dlerror() != NULL)
01101     {
01102         strncpy(_nvml_vector.cmp_info.disabled_reason, "NVML function nvmlDeviceGetUtilizationRates not found.",PAPI_MAX_STR_LEN);
01103         return ( PAPI_ENOSUPP );
01104     }
01105     nvmlDeviceGetHandleByIndexPtr = dlsym(dl3, "nvmlDeviceGetHandleByIndex");
01106     if (dlerror() != NULL)
01107     {
01108         strncpy(_nvml_vector.cmp_info.disabled_reason, "NVML function nvmlDeviceGetHandleByIndex not found.",PAPI_MAX_STR_LEN);
01109         return ( PAPI_ENOSUPP );
01110     }
01111     nvmlDeviceGetPciInfoPtr = dlsym(dl3, "nvmlDeviceGetPciInfo");
01112     if (dlerror() != NULL)
01113     {
01114         strncpy(_nvml_vector.cmp_info.disabled_reason, "NVML function nvmlDeviceGetPciInfo not found.",PAPI_MAX_STR_LEN);
01115         return ( PAPI_ENOSUPP );
01116     }
01117     nvmlDeviceGetNamePtr = dlsym(dl3, "nvmlDeviceGetName");
01118     if (dlerror() != NULL)
01119     {
01120         strncpy(_nvml_vector.cmp_info.disabled_reason, "NVML function nvmlDeviceGetName not found.",PAPI_MAX_STR_LEN);
01121         return ( PAPI_ENOSUPP );
01122     }
01123     nvmlDeviceGetInforomVersionPtr = dlsym(dl3, "nvmlDeviceGetInforomVersion");
01124     if (dlerror() != NULL)
01125     {
01126         strncpy(_nvml_vector.cmp_info.disabled_reason, "NVML function nvmlDeviceGetInforomVersion not found.",PAPI_MAX_STR_LEN);
01127         return ( PAPI_ENOSUPP );
01128     }
01129     nvmlDeviceGetEccModePtr = dlsym(dl3, "nvmlDeviceGetEccMode");
01130     if (dlerror() != NULL)
01131     {
01132         strncpy(_nvml_vector.cmp_info.disabled_reason, "NVML function nvmlDeviceGetEccMode not found.",PAPI_MAX_STR_LEN);
01133         return ( PAPI_ENOSUPP );
01134     }
01135     nvmlInitPtr = dlsym(dl3, "nvmlInit");
01136     if (dlerror() != NULL)
01137     {
01138         strncpy(_nvml_vector.cmp_info.disabled_reason, "NVML function nvmlInit not found.",PAPI_MAX_STR_LEN);
01139         return ( PAPI_ENOSUPP );
01140     }
01141     nvmlDeviceGetCountPtr = dlsym(dl3, "nvmlDeviceGetCount");
01142     if (dlerror() != NULL)
01143     {
01144         strncpy(_nvml_vector.cmp_info.disabled_reason, "NVML function nvmlDeviceGetCount not found.",PAPI_MAX_STR_LEN);
01145         return ( PAPI_ENOSUPP );
01146     }
01147     nvmlShutdownPtr = dlsym(dl3, "nvmlShutdown");
01148     if (dlerror() != NULL)
01149     {
01150         strncpy(_nvml_vector.cmp_info.disabled_reason, "NVML function nvmlShutdown not found.",PAPI_MAX_STR_LEN);
01151         return ( PAPI_ENOSUPP );
01152     }
01153 
01154     return ( PAPI_OK );
01155 }
01156 
01157 
01163         int
01164 _papi_nvml_init_control_state( hwd_control_state_t * ctl )
01165 {
01166         SUBDBG( "nvml_init_control_state... %p\n", ctl );
01167         nvml_control_state_t *nvml_ctl = ( nvml_control_state_t * ) ctl;
01168         memset( nvml_ctl, 0, sizeof ( nvml_control_state_t ) );
01169 
01170         return PAPI_OK;
01171 }
01172 
01173 
01175         int
01176 _papi_nvml_update_control_state( hwd_control_state_t *ctl, 
01177                 NativeInfo_t *native,
01178                 int count, 
01179                 hwd_context_t *ctx )
01180 {
01181         SUBDBG( "Enter: ctl: %p, ctx: %p\n", ctl, ctx );
01182         int i, index;
01183 
01184         nvml_control_state_t *nvml_ctl = ( nvml_control_state_t * ) ctl;   
01185         (void) ctx;
01186 
01187 
01188         /* if no events, return */
01189         if (count==0) return PAPI_OK;
01190 
01191         for( i = 0; i < count; i++ ) {
01192                 index = native[i].ni_event;
01193                 nvml_ctl->which_counter[i]=index;
01194                 /* We have no constraints on event position, so any event */
01195                 /* can be in any slot.                                    */
01196                 native[i].ni_position = i;
01197         }
01198         nvml_ctl->num_events=count;
01199         return PAPI_OK;
01200 }
01202         int
01203 _papi_nvml_start( hwd_context_t *ctx, hwd_control_state_t *ctl )
01204 {
01205         SUBDBG( "Enter: ctx: %p, ctl: %p\n", ctx, ctl );
01206 
01207         (void) ctx;
01208         (void) ctl;
01209 
01210         /* anything that would need to be set at counter start time */
01211 
01212         /* reset */
01213         /* start the counting */
01214 
01215         return PAPI_OK;
01216 }
01217 
01218 
01220         int
01221 _papi_nvml_stop( hwd_context_t *ctx, hwd_control_state_t *ctl )
01222 {
01223         SUBDBG( "Enter: ctx: %p, ctl: %p\n", ctx, ctl );
01224 
01225         int i;
01226         (void) ctx;
01227         (void) ctl;
01228         int ret;
01229 
01230         nvml_control_state_t* nvml_ctl = ( nvml_control_state_t*) ctl;
01231 
01232         for (i=0;i<nvml_ctl->num_events;i++) {
01233                 if ( PAPI_OK != 
01234                                 ( ret = nvml_hardware_read( &nvml_ctl->counter[i], 
01235                                                             nvml_ctl->which_counter[i]) ))
01236                         return ret;
01237 
01238         }
01239 
01240         return PAPI_OK;
01241 }
01242 
01243 
01245         int
01246 _papi_nvml_read( hwd_context_t *ctx, hwd_control_state_t *ctl,
01247                 long long **events, int flags )
01248 {
01249         SUBDBG( "Enter: ctx: %p, flags: %d\n", ctx, flags );
01250 
01251         (void) ctx;
01252         (void) flags;
01253         int i;
01254         int ret;
01255         nvml_control_state_t* nvml_ctl = ( nvml_control_state_t*) ctl;   
01256 
01257 
01258         for (i=0;i<nvml_ctl->num_events;i++) {
01259                 if ( PAPI_OK != 
01260                                 ( ret = nvml_hardware_read( &nvml_ctl->counter[i], 
01261                                                             nvml_ctl->which_counter[i]) ))
01262                         return ret;
01263 
01264         }
01265         /* return pointer to the values we read */
01266         *events = nvml_ctl->counter;    
01267         return PAPI_OK;
01268 }
01269 
01271 /*    otherwise, the updated state is written to ESI->hw_start      */
01272         int
01273 _papi_nvml_write( hwd_context_t *ctx, hwd_control_state_t *ctl,
01274                 long long *events )
01275 {
01276         SUBDBG( "Enter: ctx: %p, ctl: %p\n", ctx, ctl );
01277 
01278         (void) ctx;
01279         (void) ctl;
01280         (void) events;
01281 
01282 
01283         /* You can change ECC mode and compute exclusivity modes on the cards */
01284         /* But I don't see this as a function of a PAPI component at this time */
01285         /* All implementation issues aside. */
01286         return PAPI_OK;
01287 }
01288 
01289 
01291 /*  If the eventset is not currently running, then the saved value in the   */
01292 /*  EventSet is set to zero without calling this routine.                   */
01293         int
01294 _papi_nvml_reset( hwd_context_t * ctx, hwd_control_state_t * ctl )
01295 {
01296         SUBDBG( "Enter: ctx: %p, ctl: %p\n", ctx, ctl );
01297         
01298         (void) ctx;
01299         (void) ctl;
01300 
01301         /* Reset the hardware */
01302         nvml_hardware_reset(  );
01303 
01304         return PAPI_OK;
01305 }
01306 
01308         int
01309 _papi_nvml_shutdown_component()
01310 {
01311         SUBDBG( "Enter:\n" );
01312 
01313     if (nvml_native_table != NULL)
01314         papi_free(nvml_native_table);
01315     if (devices != NULL)
01316         papi_free(devices);
01317     if (features != NULL)
01318         papi_free(features);
01319 
01320         (*nvmlShutdownPtr)();
01321 
01322         device_count = 0;
01323         num_events = 0;
01324 
01325         // close the dynamic libraries needed by this component (opened in the init component call)
01326         dlclose(dl1);
01327         dlclose(dl2);
01328         dlclose(dl3);
01329 
01330         return PAPI_OK;
01331 }
01332 
01334         int
01335 _papi_nvml_shutdown_thread( hwd_context_t *ctx )
01336 {
01337         SUBDBG( "Enter: ctx: %p\n", ctx );
01338 
01339         (void) ctx;
01340 
01341         /* Last chance to clean up thread */
01342 
01343         return PAPI_OK;
01344 }
01345 
01346 
01347 
01351         int
01352 _papi_nvml_ctl( hwd_context_t * ctx, int code, _papi_int_option_t * option )
01353 {
01354         SUBDBG( "Enter: ctx: %p, code: %d\n", ctx, code );
01355 
01356         (void) ctx;
01357         (void) code;
01358         (void) option;
01359 
01360 
01361         /* FIXME.  This should maybe set up more state, such as which counters are active and */
01362         /*         counter mappings. */
01363 
01364         return PAPI_OK;
01365 }
01366 
01376         int
01377 _papi_nvml_set_domain( hwd_control_state_t * cntrl, int domain )
01378 {
01379         SUBDBG( "Enter: cntrl: %p, domain: %d\n", cntrl, domain );
01380 
01381         (void) cntrl;
01382 
01383         int found = 0;
01384 
01385         if ( PAPI_DOM_USER & domain ) {
01386                 SUBDBG( " PAPI_DOM_USER \n" );
01387                 found = 1;
01388         }
01389         if ( PAPI_DOM_KERNEL & domain ) {
01390                 SUBDBG( " PAPI_DOM_KERNEL \n" );
01391                 found = 1;
01392         }
01393         if ( PAPI_DOM_OTHER & domain ) {
01394                 SUBDBG( " PAPI_DOM_OTHER \n" );
01395                 found = 1;
01396         }
01397         if ( PAPI_DOM_ALL & domain ) {
01398                 SUBDBG( " PAPI_DOM_ALL \n" );
01399                 found = 1;
01400         }
01401         if ( !found )
01402                 return ( PAPI_EINVAL );
01403 
01404         return PAPI_OK;
01405 }
01406 
01407 
01408 /**************************************************************/
01409 /* Naming functions, used to translate event numbers to names */
01410 /**************************************************************/
01411 
01412 
01419         int
01420 _papi_nvml_ntv_enum_events( unsigned int *EventCode, int modifier )
01421 {
01422         int index;
01423 
01424         switch ( modifier ) {
01425 
01426                 /* return EventCode of first event */
01427                 case PAPI_ENUM_FIRST:
01428                         /* return the first event that we support */
01429 
01430                         *EventCode = 0;
01431                         return PAPI_OK;
01432 
01433                         /* return EventCode of next available event */
01434                 case PAPI_ENUM_EVENTS:
01435                         index = *EventCode;
01436 
01437                         /* Make sure we are in range */
01438                         if ( index < num_events - 1 ) {
01439 
01440                                 /* This assumes a non-sparse mapping of the events */
01441                                 *EventCode = *EventCode + 1;
01442                                 return PAPI_OK;
01443                         } else {
01444                                 return PAPI_ENOEVNT;
01445                         }
01446                         break;
01447 
01448                 default:
01449                         return PAPI_EINVAL;
01450         }
01451 
01452         return PAPI_EINVAL;
01453 }
01454 
01460         int
01461 _papi_nvml_ntv_code_to_name( unsigned int EventCode, char *name, int len )
01462 {
01463         SUBDBG("Entry: EventCode: %#x, name: %s, len: %d\n", EventCode, name, len);
01464         int index;
01465 
01466         index = EventCode;
01467 
01468         /* Make sure we are in range */
01469         if (index >= num_events) return PAPI_ENOEVNT;
01470 
01471         strncpy( name, nvml_native_table[index].name, len );
01472 
01473         return PAPI_OK;
01474 }
01475 
01481         int
01482 _papi_nvml_ntv_code_to_descr( unsigned int EventCode, char *descr, int len )
01483 {
01484         int index;
01485         index = EventCode;
01486 
01487         if (index >= num_events) return PAPI_ENOEVNT;
01488 
01489         strncpy( descr, nvml_native_table[index].description, len );
01490 
01491         return PAPI_OK;
01492 }
01493 
01495 papi_vector_t _nvml_vector = {
01496         .cmp_info = {
01497                 /* default component information */
01498                 /* (unspecified values are initialized to 0) */
01499 
01500                 .name = "nvml",
01501                 .short_name="nvml",
01502                 .version = "1.0",
01503                 .description = "NVML provides the API for monitoring NVIDIA hardware (power usage, temperature, fan speed, etc)",
01504                 .support_version = "n/a",
01505                 .kernel_version = "n/a",
01506 
01507                 .num_preset_events = 0,
01508                 .num_native_events = 0, /* set by init_component */
01509                 .default_domain = PAPI_DOM_USER,
01510                 .available_domains = PAPI_DOM_USER,
01511                 .default_granularity = PAPI_GRN_THR,
01512                 .available_granularities = PAPI_GRN_THR,
01513                 .hardware_intr_sig = PAPI_INT_SIGNAL,
01514 
01515 
01516                 /* component specific cmp_info initializations */
01517                 .hardware_intr = 0,
01518                 .precise_intr = 0,
01519                 .posix1b_timers = 0,
01520                 .kernel_profile = 0,
01521                 .kernel_multiplex = 0,
01522                 .fast_counter_read = 0,
01523                 .fast_real_timer = 0,
01524                 .fast_virtual_timer = 0,
01525                 .attach = 0,
01526                 .attach_must_ptrace = 0,
01527                 .cntr_umasks = 0,
01528                 .cpu = 0,
01529                 .inherit = 0,
01530         },
01531 
01532         /* sizes of framework-opaque component-private structures */
01533         .size = {
01534              .context = sizeof ( nvml_context_t ),
01535              .control_state = sizeof ( nvml_control_state_t ),
01536              .reg_value = sizeof ( nvml_register_t ),
01537                      // .reg_alloc = sizeof ( nvml_reg_alloc_t ),
01538         },
01539 
01540         /* function pointers */
01541 
01542         /* Used for general PAPI interactions */
01543         .start =                _papi_nvml_start,
01544         .stop =                 _papi_nvml_stop,
01545         .read =                 _papi_nvml_read,
01546         .reset =                _papi_nvml_reset,   
01547         .write =                _papi_nvml_write,
01548         .init_component =       _papi_nvml_init_component,  
01549         .init_thread =          _papi_nvml_init_thread,
01550         .init_control_state =   _papi_nvml_init_control_state,
01551         .update_control_state = _papi_nvml_update_control_state,
01552         .ctl =                  _papi_nvml_ctl, 
01553         .shutdown_thread =      _papi_nvml_shutdown_thread,
01554         .shutdown_component =   _papi_nvml_shutdown_component,
01555         .set_domain =           _papi_nvml_set_domain,
01556         .cleanup_eventset =     NULL,
01557         /* called in add_native_events() */
01558         .allocate_registers =   NULL,
01559 
01560         /* Used for overflow/profiling */
01561         .dispatch_timer =       NULL,
01562         .get_overflow_address = NULL,
01563         .stop_profiling =       NULL,
01564         .set_overflow =         NULL,
01565         .set_profile =          NULL,
01566 
01567         /* Name Mapping Functions */
01568         .ntv_enum_events =   _papi_nvml_ntv_enum_events,
01569         .ntv_name_to_code  = NULL,
01570         .ntv_code_to_name =  _papi_nvml_ntv_code_to_name,
01571         .ntv_code_to_descr = _papi_nvml_ntv_code_to_descr,
01572 
01573 };
01574 
 All Data Structures Files Functions Variables Typedefs Enumerations Enumerator Defines