|
PAPI
5.3.0.0
|
00001 /****************************/ 00002 /* THIS IS OPEN SOURCE CODE */ 00003 /****************************/ 00004 00020 #include <dlfcn.h> 00021 00022 #include <stdio.h> 00023 #include <string.h> 00024 #include <stdlib.h> 00025 #include <inttypes.h> 00026 #include <string.h> 00027 /* Headers required by PAPI */ 00028 #include "papi.h" 00029 #include "papi_internal.h" 00030 #include "papi_vector.h" 00031 #include "papi_memory.h" 00032 00033 #include "linux-nvml.h" 00034 00035 #include "nvml.h" 00036 #include "cuda.h" 00037 #include "cuda_runtime_api.h" 00038 00039 void (*_dl_non_dynamic_init)(void) __attribute__((weak)); 00040 00041 /***** CHANGE PROTOTYPES TO DECLARE CUDA AND NVML LIBRARY SYMBOLS AS WEAK ***** 00042 * This is done so that a version of PAPI built with the nvml component can * 00043 * be installed on a system which does not have the cuda libraries installed. * 00044 * * 00045 * If this is done without these prototypes, then all papi services on the * 00046 * system without the cuda libraries installed will fail. The PAPI libraries * 00047 * contain references to the cuda libraries which are not installed. The * 00048 * load of PAPI commands fails because the cuda library references can not be * 00049 * resolved. * 00050 * * 00051 * This also defines pointers to the cuda library functions that we call. * 00052 * These function pointers will be resolved with dlopen/dlsym calls at * 00053 * component initialization time. The component then calls the cuda library * 00054 * functions through these function pointers. * 00055 ********************************************************************************/ 00056 #undef CUDAAPI 00057 #define CUDAAPI __attribute__((weak)) 00058 CUresult CUDAAPI cuInit(unsigned int); 00059 00060 CUresult (*cuInitPtr)(unsigned int); 00061 00062 #undef CUDARTAPI 00063 #define CUDARTAPI __attribute__((weak)) 00064 cudaError_t CUDARTAPI cudaGetDevice(int *); 00065 cudaError_t CUDARTAPI cudaGetDeviceCount(int *); 00066 cudaError_t CUDARTAPI cudaDeviceGetPCIBusId(char *, int, int); 00067 00068 cudaError_t (*cudaGetDevicePtr)(int *); 00069 cudaError_t (*cudaGetDeviceCountPtr)(int *); 00070 cudaError_t (*cudaDeviceGetPCIBusIdPtr)(char *, int, int); 00071 00072 #undef DECLDIR 00073 #define DECLDIR __attribute__((weak)) 00074 nvmlReturn_t DECLDIR nvmlDeviceGetClockInfo (nvmlDevice_t, nvmlClockType_t, unsigned int *); 00075 const char* DECLDIR nvmlErrorString (nvmlReturn_t); 00076 nvmlReturn_t DECLDIR nvmlDeviceGetDetailedEccErrors (nvmlDevice_t, nvmlEccBitType_t, nvmlEccCounterType_t, nvmlEccErrorCounts_t *); 00077 nvmlReturn_t DECLDIR nvmlDeviceGetFanSpeed (nvmlDevice_t, unsigned int *); 00078 nvmlReturn_t DECLDIR nvmlDeviceGetMemoryInfo (nvmlDevice_t, nvmlMemory_t *); 00079 nvmlReturn_t DECLDIR nvmlDeviceGetPerformanceState (nvmlDevice_t, nvmlPstates_t *); 00080 nvmlReturn_t DECLDIR nvmlDeviceGetPowerUsage (nvmlDevice_t, unsigned int *); 00081 nvmlReturn_t DECLDIR nvmlDeviceGetTemperature (nvmlDevice_t, nvmlTemperatureSensors_t, unsigned int *); 00082 nvmlReturn_t DECLDIR nvmlDeviceGetTotalEccErrors (nvmlDevice_t, nvmlEccBitType_t, nvmlEccCounterType_t, unsigned long long *); 00083 nvmlReturn_t DECLDIR nvmlDeviceGetUtilizationRates (nvmlDevice_t, nvmlUtilization_t *); 00084 nvmlReturn_t DECLDIR nvmlDeviceGetHandleByIndex (unsigned int, nvmlDevice_t *); 00085 nvmlReturn_t DECLDIR nvmlDeviceGetPciInfo (nvmlDevice_t, nvmlPciInfo_t *); 00086 nvmlReturn_t DECLDIR nvmlDeviceGetName (nvmlDevice_t, char *, unsigned int); 00087 nvmlReturn_t DECLDIR nvmlDeviceGetInforomVersion (nvmlDevice_t, nvmlInforomObject_t, char *, unsigned int); 00088 nvmlReturn_t DECLDIR nvmlDeviceGetEccMode (nvmlDevice_t, nvmlEnableState_t *, nvmlEnableState_t *); 00089 nvmlReturn_t DECLDIR nvmlInit (void); 00090 nvmlReturn_t DECLDIR nvmlDeviceGetCount (unsigned int *); 00091 nvmlReturn_t DECLDIR nvmlShutdown (void); 00092 00093 nvmlReturn_t (*nvmlDeviceGetClockInfoPtr) (nvmlDevice_t, nvmlClockType_t, unsigned int *); 00094 char* (*nvmlErrorStringPtr) (nvmlReturn_t); 00095 nvmlReturn_t (*nvmlDeviceGetDetailedEccErrorsPtr) (nvmlDevice_t, nvmlEccBitType_t, nvmlEccCounterType_t, nvmlEccErrorCounts_t *); 00096 nvmlReturn_t (*nvmlDeviceGetFanSpeedPtr) (nvmlDevice_t, unsigned int *); 00097 nvmlReturn_t (*nvmlDeviceGetMemoryInfoPtr) (nvmlDevice_t, nvmlMemory_t *); 00098 nvmlReturn_t (*nvmlDeviceGetPerformanceStatePtr) (nvmlDevice_t, nvmlPstates_t *); 00099 nvmlReturn_t (*nvmlDeviceGetPowerUsagePtr) (nvmlDevice_t, unsigned int *); 00100 nvmlReturn_t (*nvmlDeviceGetTemperaturePtr) (nvmlDevice_t, nvmlTemperatureSensors_t, unsigned int *); 00101 nvmlReturn_t (*nvmlDeviceGetTotalEccErrorsPtr) (nvmlDevice_t, nvmlEccBitType_t, nvmlEccCounterType_t, unsigned long long *); 00102 nvmlReturn_t (*nvmlDeviceGetUtilizationRatesPtr) (nvmlDevice_t, nvmlUtilization_t *); 00103 nvmlReturn_t (*nvmlDeviceGetHandleByIndexPtr) (unsigned int, nvmlDevice_t *); 00104 nvmlReturn_t (*nvmlDeviceGetPciInfoPtr) (nvmlDevice_t, nvmlPciInfo_t *); 00105 nvmlReturn_t (*nvmlDeviceGetNamePtr) (nvmlDevice_t, char *, unsigned int); 00106 nvmlReturn_t (*nvmlDeviceGetInforomVersionPtr) (nvmlDevice_t, nvmlInforomObject_t, char *, unsigned int); 00107 nvmlReturn_t (*nvmlDeviceGetEccModePtr) (nvmlDevice_t, nvmlEnableState_t *, nvmlEnableState_t *); 00108 nvmlReturn_t (*nvmlInitPtr) (void); 00109 nvmlReturn_t (*nvmlDeviceGetCountPtr) (unsigned int *); 00110 nvmlReturn_t (*nvmlShutdownPtr) (void); 00111 00112 00113 // file handles used to access cuda libraries with dlopen 00114 static void* dl1 = NULL; 00115 static void* dl2 = NULL; 00116 static void* dl3 = NULL; 00117 00118 static int linkCudaLibraries (); 00119 00120 00121 /* Declare our vector in advance */ 00122 papi_vector_t _nvml_vector; 00123 00124 /* upto 25 events per card how many cards per system should we allow for?! */ 00125 #define NVML_MAX_COUNTERS 100 00126 00130 typedef struct nvml_control_state 00131 { 00132 int num_events; 00133 int which_counter[NVML_MAX_COUNTERS]; 00134 long long counter[NVML_MAX_COUNTERS]; 00135 } nvml_control_state_t; 00136 00138 typedef struct nvml_context 00139 { 00140 nvml_control_state_t state; 00141 } nvml_context_t; 00142 00144 static nvml_native_event_entry_t *nvml_native_table=NULL; 00145 00147 static int device_count = 0; 00148 00150 static int num_events = 0; 00151 00152 static nvmlDevice_t* devices=NULL; 00153 static int* features=NULL; 00154 00155 unsigned long long 00156 getClockSpeed( nvmlDevice_t dev, nvmlClockType_t which_one ) 00157 { 00158 unsigned int ret = 0; 00159 nvmlReturn_t bad; 00160 bad = (*nvmlDeviceGetClockInfoPtr)( dev, which_one, &ret ); 00161 00162 if ( NVML_SUCCESS != bad ) { 00163 SUBDBG( "something went wrong %s\n", (*nvmlErrorStringPtr)(bad)); 00164 } 00165 00166 return (unsigned long long)ret; 00167 } 00168 00169 unsigned long long 00170 getEccLocalErrors( nvmlDevice_t dev, nvmlEccBitType_t bits, int which_one) 00171 { 00172 nvmlEccErrorCounts_t counts; 00173 00174 nvmlReturn_t bad; 00175 bad = (*nvmlDeviceGetDetailedEccErrorsPtr)( dev, bits, NVML_VOLATILE_ECC , &counts); 00176 00177 if ( NVML_SUCCESS != bad ) { 00178 SUBDBG( "something went wrong %s\n", (*nvmlErrorStringPtr)(bad)); 00179 } 00180 00181 00182 switch ( which_one ) { 00183 case LOCAL_ECC_REGFILE: 00184 return counts.registerFile; 00185 case LOCAL_ECC_L1: 00186 return counts.l1Cache; 00187 case LOCAL_ECC_L2: 00188 return counts.l2Cache; 00189 case LOCAL_ECC_MEM: 00190 return counts.deviceMemory; 00191 default: 00192 ; 00193 } 00194 return (unsigned long long)-1; 00195 } 00196 00197 unsigned long long 00198 getFanSpeed( nvmlDevice_t dev ) 00199 { 00200 unsigned int ret = 0; 00201 nvmlReturn_t bad; 00202 bad = (*nvmlDeviceGetFanSpeedPtr)( dev, &ret ); 00203 00204 if ( NVML_SUCCESS != bad ) { 00205 SUBDBG( "something went wrong %s\n", (*nvmlErrorStringPtr)(bad)); 00206 } 00207 00208 00209 return (unsigned long long)ret; 00210 } 00211 00212 unsigned long long 00213 getMaxClockSpeed( nvmlDevice_t dev, nvmlClockType_t which_one) 00214 { 00215 unsigned int ret = 0; 00216 nvmlReturn_t bad; 00217 bad = (*nvmlDeviceGetClockInfoPtr)( dev, which_one, &ret ); 00218 00219 if ( NVML_SUCCESS != bad ) { 00220 SUBDBG( "something went wrong %s\n", (*nvmlErrorStringPtr)(bad)); 00221 } 00222 00223 00224 return (unsigned long long) ret; 00225 } 00226 00227 unsigned long long 00228 getMemoryInfo( nvmlDevice_t dev, int which_one ) 00229 { 00230 nvmlMemory_t meminfo; 00231 nvmlReturn_t bad; 00232 bad = (*nvmlDeviceGetMemoryInfoPtr)( dev, &meminfo ); 00233 00234 if ( NVML_SUCCESS != bad ) { 00235 SUBDBG( "something went wrong %s\n", (*nvmlErrorStringPtr)(bad)); 00236 } 00237 00238 switch (which_one) { 00239 case MEMINFO_TOTAL_MEMORY: 00240 return meminfo.total; 00241 case MEMINFO_UNALLOCED: 00242 return meminfo.free; 00243 case MEMINFO_ALLOCED: 00244 return meminfo.used; 00245 default: 00246 ; 00247 } 00248 return (unsigned long long)-1; 00249 } 00250 00251 unsigned long long 00252 getPState( nvmlDevice_t dev ) 00253 { 00254 unsigned int ret = 0; 00255 nvmlPstates_t state = NVML_PSTATE_15; 00256 nvmlReturn_t bad; 00257 bad = (*nvmlDeviceGetPerformanceStatePtr)( dev, &state ); 00258 00259 if ( NVML_SUCCESS != bad ) { 00260 SUBDBG( "something went wrong %s\n", (*nvmlErrorStringPtr)(bad)); 00261 } 00262 00263 00264 switch ( state ) { 00265 case NVML_PSTATE_15: 00266 ret++; 00267 case NVML_PSTATE_14: 00268 ret++; 00269 case NVML_PSTATE_13: 00270 ret++; 00271 case NVML_PSTATE_12: 00272 ret++; 00273 case NVML_PSTATE_11: 00274 ret++; 00275 case NVML_PSTATE_10: 00276 ret++; 00277 case NVML_PSTATE_9: 00278 ret++; 00279 case NVML_PSTATE_8: 00280 ret++; 00281 case NVML_PSTATE_7: 00282 ret++; 00283 case NVML_PSTATE_6: 00284 ret++; 00285 case NVML_PSTATE_5: 00286 ret++; 00287 case NVML_PSTATE_4: 00288 ret++; 00289 case NVML_PSTATE_3: 00290 ret++; 00291 case NVML_PSTATE_2: 00292 ret++; 00293 case NVML_PSTATE_1: 00294 ret++; 00295 case NVML_PSTATE_0: 00296 break; 00297 case NVML_PSTATE_UNKNOWN: 00298 default: 00299 /* This should never happen? 00300 * The API docs just state Unknown performance state... */ 00301 return (unsigned long long) -1; 00302 } 00303 00304 return (unsigned long long)ret; 00305 } 00306 00307 unsigned long long 00308 getPowerUsage( nvmlDevice_t dev ) 00309 { 00310 unsigned int power; 00311 nvmlReturn_t bad; 00312 bad = (*nvmlDeviceGetPowerUsagePtr)( dev, &power ); 00313 00314 if ( NVML_SUCCESS != bad ) { 00315 SUBDBG( "something went wrong %s\n", (*nvmlErrorStringPtr)(bad)); 00316 } 00317 00318 00319 return (unsigned long long) power; 00320 } 00321 00322 unsigned long long 00323 getTemperature( nvmlDevice_t dev ) 00324 { 00325 unsigned int ret = 0; 00326 nvmlReturn_t bad; 00327 bad = (*nvmlDeviceGetTemperaturePtr)( dev, NVML_TEMPERATURE_GPU, &ret ); 00328 00329 if ( NVML_SUCCESS != bad ) { 00330 SUBDBG( "something went wrong %s\n", (*nvmlErrorStringPtr)(bad)); 00331 } 00332 00333 00334 return (unsigned long long)ret; 00335 } 00336 00337 unsigned long long 00338 getTotalEccErrors( nvmlDevice_t dev, nvmlEccBitType_t bits) 00339 { 00340 unsigned long long counts = 0; 00341 nvmlReturn_t bad; 00342 bad = (*nvmlDeviceGetTotalEccErrorsPtr)( dev, bits, NVML_VOLATILE_ECC , &counts); 00343 00344 if ( NVML_SUCCESS != bad ) { 00345 SUBDBG( "something went wrong %s\n", (*nvmlErrorStringPtr)(bad)); 00346 } 00347 00348 00349 return counts; 00350 } 00351 00352 /* 0 => gpu util 00353 1 => memory util 00354 */ 00355 unsigned long long 00356 getUtilization( nvmlDevice_t dev, int which_one ) 00357 { 00358 nvmlUtilization_t util; 00359 nvmlReturn_t bad; 00360 bad = (*nvmlDeviceGetUtilizationRatesPtr)( dev, &util ); 00361 00362 if ( NVML_SUCCESS != bad ) { 00363 SUBDBG( "something went wrong %s\n", (*nvmlErrorStringPtr)(bad)); 00364 } 00365 00366 00367 switch (which_one) { 00368 case GPU_UTILIZATION: 00369 return (unsigned long long) util.gpu; 00370 case MEMORY_UTILIZATION: 00371 return (unsigned long long) util.memory; 00372 default: 00373 ; 00374 } 00375 00376 return (unsigned long long) -1; 00377 } 00378 00379 static void 00380 nvml_hardware_reset( ) 00381 { 00382 /* nvmlDeviceSet* and nvmlDeviceClear* calls require root/admin access, so while 00383 * possible to implement a reset on the ECC counters, we pass */ 00384 /* 00385 int i; 00386 for ( i=0; i < device_count; i++ ) 00387 nvmlDeviceClearEccErrorCounts( device[i], NVML_VOLATILE_ECC ); 00388 */ 00389 } 00390 00392 /* You might replace this with code that accesses */ 00393 /* hardware or reads values from the operatings system. */ 00394 static int 00395 nvml_hardware_read( long long *value, int which_one) 00396 //, nvml_context_t *ctx) 00397 { 00398 nvml_native_event_entry_t *entry; 00399 nvmlDevice_t handle; 00400 int cudaIdx = -1; 00401 00402 entry = &nvml_native_table[which_one]; 00403 *value = (long long) -1; 00404 /* replace entry->resources with the current cuda_device->nvml device */ 00405 (*cudaGetDevicePtr)( &cudaIdx ); 00406 00407 if ( cudaIdx < 0 || cudaIdx > device_count ) 00408 return PAPI_EINVAL; 00409 00410 /* Make sure the device we are running on has the requested event */ 00411 if ( !HAS_FEATURE( features[cudaIdx] , entry->type) ) 00412 return PAPI_EINVAL; 00413 00414 handle = devices[cudaIdx]; 00415 00416 switch (entry->type) { 00417 case FEATURE_CLOCK_INFO: 00418 *value = getClockSpeed( handle, 00419 (nvmlClockType_t)entry->options.clock ); 00420 break; 00421 case FEATURE_ECC_LOCAL_ERRORS: 00422 *value = getEccLocalErrors( handle, 00423 (nvmlEccBitType_t)entry->options.ecc_opts.bits, 00424 (int)entry->options.ecc_opts.which_one); 00425 break; 00426 case FEATURE_FAN_SPEED: 00427 *value = getFanSpeed( handle ); 00428 break; 00429 case FEATURE_MAX_CLOCK: 00430 *value = getMaxClockSpeed( handle, 00431 (nvmlClockType_t)entry->options.clock ); 00432 break; 00433 case FEATURE_MEMORY_INFO: 00434 *value = getMemoryInfo( handle, 00435 (int)entry->options.which_one ); 00436 break; 00437 case FEATURE_PERF_STATES: 00438 *value = getPState( handle ); 00439 break; 00440 case FEATURE_POWER: 00441 *value = getPowerUsage( handle ); 00442 break; 00443 case FEATURE_TEMP: 00444 *value = getTemperature( handle ); 00445 break; 00446 case FEATURE_ECC_TOTAL_ERRORS: 00447 *value = getTotalEccErrors( handle, 00448 (nvmlEccBitType_t)entry->options.ecc_opts.bits ); 00449 break; 00450 case FEATURE_UTILIZATION: 00451 *value = getUtilization( handle, 00452 (int)entry->options.which_one ); 00453 break; 00454 default: 00455 return PAPI_EINVAL; 00456 } 00457 00458 return PAPI_OK; 00459 00460 00461 } 00462 00463 /********************************************************************/ 00464 /* Below are the functions required by the PAPI component interface */ 00465 /********************************************************************/ 00466 00468 int 00469 _papi_nvml_init_thread( hwd_context_t * ctx ) 00470 { 00471 (void) ctx; 00472 00473 SUBDBG( "Enter: ctx: %p\n", ctx ); 00474 00475 return PAPI_OK; 00476 } 00477 00478 static int 00479 detectDevices( ) 00480 { 00481 nvmlReturn_t ret; 00482 nvmlEnableState_t mode = NVML_FEATURE_DISABLED; 00483 nvmlDevice_t handle; 00484 nvmlPciInfo_t info; 00485 00486 cudaError_t cuerr; 00487 00488 char busId[16]; 00489 char name[64]; 00490 char inforomECC[16]; 00491 char inforomPower[16]; 00492 char names[device_count][64]; 00493 char nvml_busIds[device_count][16]; 00494 00495 float ecc_version = 0.0, power_version = 0.0; 00496 00497 int i = 0, 00498 j = 0; 00499 int isTesla = 0; 00500 int isFermi = 0; 00501 int isUnique = 1; 00502 00503 unsigned int temp = 0; 00504 00505 00506 /* list of nvml pci_busids */ 00507 for (i=0; i < device_count; i++) { 00508 ret = (*nvmlDeviceGetHandleByIndexPtr)( i, &handle ); 00509 if ( NVML_SUCCESS != ret ) { 00510 SUBDBG("nvmlDeviceGetHandleByIndex(%d) failed\n", i); 00511 return PAPI_ESYS; 00512 } 00513 00514 ret = (*nvmlDeviceGetPciInfoPtr)( handle, &info ); 00515 if ( NVML_SUCCESS != ret ) { 00516 SUBDBG("nvmlDeviceGetPciInfo() failed %s\n", (*nvmlErrorStringPtr)(ret) ); 00517 return PAPI_ESYS; 00518 } 00519 strncpy(nvml_busIds[i], info.busId, 16); 00520 } 00521 00522 /* We want to key our list of nvmlDevice_ts by each device's cuda index */ 00523 for (i=0; i < device_count; i++) { 00524 cuerr = (*cudaDeviceGetPCIBusIdPtr)( busId, 16, i ); 00525 if ( CUDA_SUCCESS != cuerr ) { 00526 SUBDBG("cudaDeviceGetPCIBusId failed.\n"); 00527 return PAPI_ESYS; 00528 } 00529 for (j=0; j < device_count; j++ ) { 00530 if ( !strncmp( busId, nvml_busIds[j], 16) ) { 00531 ret = (*nvmlDeviceGetHandleByIndexPtr)(j, &devices[i] ); 00532 if ( NVML_SUCCESS != ret ) { 00533 SUBDBG("nvmlDeviceGetHandleByIndex(%d, &devices[%d]) failed.\n", j, i); 00534 return PAPI_ESYS; 00535 } 00536 break; 00537 } 00538 } 00539 } 00540 00541 memset(names, 0x0, device_count*64); 00542 /* So for each card, check whats querable */ 00543 for (i=0; i < device_count; i++ ) { 00544 isTesla=0; 00545 isFermi=1; 00546 isUnique = 1; 00547 features[i] = 0; 00548 00549 ret = (*nvmlDeviceGetNamePtr)( devices[i], name, 64 ); 00550 if ( NVML_SUCCESS != ret) { 00551 SUBDBG("nvmlDeviceGetName failed \n"); 00552 return PAPI_ESYS; 00553 } 00554 00555 for (j=0; j < i; j++ ) 00556 if ( 0 == strncmp( name, names[j], 64 ) ) { 00557 /* if we have a match, and IF everything is sane, 00558 * devices with the same name eg Tesla C2075 share features */ 00559 isUnique = 0; 00560 features[i] = features[j]; 00561 00562 } 00563 00564 if ( isUnique ) { 00565 ret = (*nvmlDeviceGetInforomVersionPtr)( devices[i], NVML_INFOROM_ECC, inforomECC, 16); 00566 if ( NVML_SUCCESS != ret ) { 00567 SUBDBG("nvmlGetInforomVersion carps %s\n", (*nvmlErrorStringPtr)(ret ) ); 00568 isFermi = 0; 00569 } 00570 ret = (*nvmlDeviceGetInforomVersionPtr)( devices[i], NVML_INFOROM_POWER, inforomPower, 16); 00571 if ( NVML_SUCCESS != ret ) { 00572 /* This implies the card is older then Fermi */ 00573 SUBDBG("nvmlGetInforomVersion carps %s\n", (*nvmlErrorStringPtr)(ret ) ); 00574 SUBDBG("Based upon the return to nvmlGetInforomVersion, we conclude this card is older then Fermi.\n"); 00575 isFermi = 0; 00576 } 00577 00578 ecc_version = strtof(inforomECC, NULL ); 00579 power_version = strtof( inforomPower, NULL); 00580 00581 ret = (*nvmlDeviceGetNamePtr)( devices[i], name, 64 ); 00582 isTesla = ( NULL == strstr(name, "Tesla") ) ? 0:1; 00583 00584 /* For Tesla and Quadro products from Fermi and Kepler families. */ 00585 if ( isFermi ) { 00586 features[i] |= FEATURE_CLOCK_INFO; 00587 num_events += 3; 00588 } 00589 00590 /* For Tesla and Quadro products from Fermi and Kepler families. 00591 requires NVML_INFOROM_ECC 2.0 or higher for location-based counts 00592 requires NVML_INFOROM_ECC 1.0 or higher for all other ECC counts 00593 requires ECC mode to be enabled. */ 00594 ret = (*nvmlDeviceGetEccModePtr)( devices[i], &mode, NULL ); 00595 if ( NVML_SUCCESS == ret ) { 00596 if ( NVML_FEATURE_ENABLED == mode) { 00597 if ( ecc_version >= 2.0 ) { 00598 features[i] |= FEATURE_ECC_LOCAL_ERRORS; 00599 num_events += 8; /* {single bit, two bit errors} x { reg, l1, l2, memory } */ 00600 } 00601 if ( ecc_version >= 1.0 ) { 00602 features[i] |= FEATURE_ECC_TOTAL_ERRORS; 00603 num_events += 2; /* single bit errors, double bit errors */ 00604 } 00605 } 00606 } else { 00607 SUBDBG("nvmlDeviceGetEccMode does not appear to be supported. (nvml\ 00608 return code %d)\n", ret); 00609 } 00610 00611 /* For all discrete products with dedicated fans */ 00612 features[i] |= FEATURE_FAN_SPEED; 00613 num_events++; 00614 00615 /* For Tesla and Quadro products from Fermi and Kepler families. */ 00616 if ( isFermi ) { 00617 features[i] |= FEATURE_MAX_CLOCK; 00618 num_events += 3; 00619 } 00620 00621 /* For all products */ 00622 features[i] |= FEATURE_MEMORY_INFO; 00623 num_events += 3; /* total, free, used */ 00624 00625 /* For Tesla and Quadro products from the Fermi and Kepler families. */ 00626 if ( isFermi ) { 00627 features[i] |= FEATURE_PERF_STATES; 00628 num_events++; 00629 } 00630 00631 /* For "GF11x" Tesla and Quadro products from the Fermi family 00632 requires NVML_INFOROM_POWER 3.0 or higher 00633 For Tesla and Quadro products from the Kepler family 00634 does not require NVML_INFOROM_POWER */ 00635 /* Just try reading power, if it works, enable it*/ 00636 ret = (*nvmlDeviceGetPowerUsagePtr)( devices[i], &temp); 00637 if ( NVML_SUCCESS == ret ) { 00638 features[i] |= FEATURE_POWER; 00639 num_events++; 00640 } else { 00641 SUBDBG("nvmlDeviceGetPowerUsage does not appear to be supported on\ 00642 this card. (nvml return code %d)\n", ret ); 00643 } 00644 00645 /* For all discrete and S-class products. */ 00646 features[i] |= FEATURE_TEMP; 00647 num_events++; 00648 00649 /* For Tesla and Quadro products from the Fermi and Kepler families */ 00650 if (isFermi) { 00651 features[i] |= FEATURE_UTILIZATION; 00652 num_events += 2; 00653 } 00654 00655 strncpy( names[i], name, 64); 00656 00657 } 00658 } 00659 return PAPI_OK; 00660 } 00661 00662 static void 00663 createNativeEvents( ) 00664 { 00665 char name[64]; 00666 char sanitized_name[PAPI_MAX_STR_LEN]; 00667 char names[device_count][64]; 00668 00669 int i, nameLen = 0, j; 00670 int isUnique = 1; 00671 00672 nvml_native_event_entry_t* entry; 00673 nvmlReturn_t ret; 00674 00675 nvml_native_table = (nvml_native_event_entry_t*) papi_malloc( 00676 sizeof(nvml_native_event_entry_t) * num_events ); 00677 memset( nvml_native_table, 0x0, sizeof(nvml_native_event_entry_t) * num_events ); 00678 entry = &nvml_native_table[0]; 00679 00680 for (i=0; i < device_count; i++ ) { 00681 memset( names[i], 0x0, 64 ); 00682 isUnique = 1; 00683 ret = (*nvmlDeviceGetNamePtr)( devices[i], name, 64 ); 00684 00685 for (j=0; j < i; j++ ) 00686 { 00687 if ( 0 == strncmp( name, names[j], 64 ) ) 00688 isUnique = 0; 00689 } 00690 00691 if ( isUnique ) { 00692 nameLen = strlen(name); 00693 strncpy(sanitized_name, name, PAPI_MAX_STR_LEN ); 00694 for (j=0; j < nameLen; j++) 00695 if ( ' ' == sanitized_name[j] ) 00696 sanitized_name[j] = '_'; 00697 00698 00699 00700 if ( HAS_FEATURE( features[i], FEATURE_CLOCK_INFO ) ) { 00701 sprintf( entry->name, "%s:graphics_clock", sanitized_name ); 00702 strncpy(entry->description,"Graphics clock domain (MHz).", PAPI_MAX_STR_LEN ); 00703 entry->options.clock = NVML_CLOCK_GRAPHICS; 00704 entry->type = FEATURE_CLOCK_INFO; 00705 entry++; 00706 00707 sprintf( entry->name, "%s:sm_clock", sanitized_name); 00708 strncpy(entry->description,"SM clock domain (MHz).", PAPI_MAX_STR_LEN); 00709 entry->options.clock = NVML_CLOCK_SM; 00710 entry->type = FEATURE_CLOCK_INFO; 00711 entry++; 00712 00713 sprintf( entry->name, "%s:memory_clock", sanitized_name); 00714 strncpy(entry->description,"Memory clock domain (MHz).", PAPI_MAX_STR_LEN); 00715 entry->options.clock = NVML_CLOCK_MEM; 00716 entry->type = FEATURE_CLOCK_INFO; 00717 entry++; 00718 } 00719 00720 if ( HAS_FEATURE( features[i], FEATURE_ECC_LOCAL_ERRORS ) ) { 00721 sprintf(entry->name, "%s:l1_single_ecc_errors", sanitized_name); 00722 strncpy(entry->description,"L1 cache single bit ECC", PAPI_MAX_STR_LEN); 00723 entry->options.ecc_opts = (struct local_ecc){ 00724 .bits = NVML_SINGLE_BIT_ECC, 00725 .which_one = LOCAL_ECC_L1, 00726 }; 00727 entry->type = FEATURE_ECC_LOCAL_ERRORS; 00728 entry++; 00729 00730 sprintf(entry->name, "%s:l2_single_ecc_errors", sanitized_name); 00731 strncpy(entry->description,"L2 cache single bit ECC", PAPI_MAX_STR_LEN); 00732 entry->options.ecc_opts = (struct local_ecc){ 00733 .bits = NVML_SINGLE_BIT_ECC, 00734 .which_one = LOCAL_ECC_L2, 00735 }; 00736 entry->type = FEATURE_ECC_LOCAL_ERRORS; 00737 entry++; 00738 00739 sprintf(entry->name, "%s:memory_single_ecc_errors", sanitized_name); 00740 strncpy(entry->description,"Device memory single bit ECC", PAPI_MAX_STR_LEN); 00741 entry->options.ecc_opts = (struct local_ecc){ 00742 .bits = NVML_SINGLE_BIT_ECC, 00743 .which_one = LOCAL_ECC_MEM, 00744 }; 00745 entry->type = FEATURE_ECC_LOCAL_ERRORS; 00746 entry++; 00747 00748 sprintf(entry->name, "%s:regfile_single_ecc_errors", sanitized_name); 00749 strncpy(entry->description,"Register file single bit ECC", PAPI_MAX_STR_LEN); 00750 entry->options.ecc_opts = (struct local_ecc){ 00751 .bits = NVML_SINGLE_BIT_ECC, 00752 .which_one = LOCAL_ECC_REGFILE, 00753 }; 00754 entry->type = FEATURE_ECC_LOCAL_ERRORS; 00755 entry++; 00756 00757 sprintf(entry->name, "%s:1l_double_ecc_errors", sanitized_name); 00758 strncpy(entry->description,"L1 cache double bit ECC", PAPI_MAX_STR_LEN); 00759 entry->options.ecc_opts = (struct local_ecc){ 00760 .bits = NVML_DOUBLE_BIT_ECC, 00761 .which_one = LOCAL_ECC_L1, 00762 }; 00763 entry->type = FEATURE_ECC_LOCAL_ERRORS; 00764 entry++; 00765 00766 sprintf(entry->name, "%s:l2_double_ecc_errors", sanitized_name); 00767 strncpy(entry->description,"L2 cache double bit ECC", PAPI_MAX_STR_LEN); 00768 entry->options.ecc_opts = (struct local_ecc){ 00769 .bits = NVML_DOUBLE_BIT_ECC, 00770 .which_one = LOCAL_ECC_L2, 00771 }; 00772 entry->type = FEATURE_ECC_LOCAL_ERRORS; 00773 entry++; 00774 00775 sprintf(entry->name, "%s:memory_double_ecc_errors", sanitized_name); 00776 strncpy(entry->description,"Device memory double bit ECC", PAPI_MAX_STR_LEN); 00777 entry->options.ecc_opts = (struct local_ecc){ 00778 .bits = NVML_DOUBLE_BIT_ECC, 00779 .which_one = LOCAL_ECC_MEM, 00780 }; 00781 entry->type = FEATURE_ECC_LOCAL_ERRORS; 00782 entry++; 00783 00784 sprintf(entry->name, "%s:regfile_double_ecc_errors", sanitized_name); 00785 strncpy(entry->description,"Register file double bit ECC", PAPI_MAX_STR_LEN); 00786 entry->options.ecc_opts = (struct local_ecc){ 00787 .bits = NVML_DOUBLE_BIT_ECC, 00788 .which_one = LOCAL_ECC_REGFILE, 00789 }; 00790 entry->type = FEATURE_ECC_LOCAL_ERRORS; 00791 entry++; 00792 } 00793 00794 if ( HAS_FEATURE( features[i], FEATURE_FAN_SPEED ) ) { 00795 sprintf( entry->name, "%s:fan_speed", sanitized_name); 00796 strncpy(entry->description,"The fan speed expressed as a percent of the maximum, i.e. full speed is 100%", PAPI_MAX_STR_LEN); 00797 entry->type = FEATURE_FAN_SPEED; 00798 entry++; 00799 } 00800 00801 if ( HAS_FEATURE( features[i], FEATURE_MAX_CLOCK ) ) { 00802 sprintf( entry->name, "%s:graphics_max_clock", sanitized_name); 00803 strncpy(entry->description,"Maximal Graphics clock domain (MHz).", PAPI_MAX_STR_LEN); 00804 entry->options.clock = NVML_CLOCK_GRAPHICS; 00805 entry->type = FEATURE_MAX_CLOCK; 00806 entry++; 00807 00808 sprintf( entry->name, "%s:sm_max_clock", sanitized_name); 00809 strncpy(entry->description,"Maximal SM clock domain (MHz).", PAPI_MAX_STR_LEN); 00810 entry->options.clock = NVML_CLOCK_SM; 00811 entry->type = FEATURE_MAX_CLOCK; 00812 entry++; 00813 00814 sprintf( entry->name, "%s:memory_max_clock", sanitized_name); 00815 strncpy(entry->description,"Maximal Memory clock domain (MHz).", PAPI_MAX_STR_LEN); 00816 entry->options.clock = NVML_CLOCK_MEM; 00817 entry->type = FEATURE_MAX_CLOCK; 00818 entry++; 00819 } 00820 00821 if ( HAS_FEATURE( features[i], FEATURE_MEMORY_INFO ) ) { 00822 sprintf( entry->name, "%s:total_memory", sanitized_name); 00823 strncpy(entry->description,"Total installed FB memory (in bytes).", PAPI_MAX_STR_LEN); 00824 entry->options.which_one = MEMINFO_TOTAL_MEMORY; 00825 entry->type = FEATURE_MEMORY_INFO; 00826 entry++; 00827 00828 sprintf( entry->name, "%s:unallocated_memory", sanitized_name); 00829 strncpy(entry->description,"Uncallocated FB memory (in bytes).", PAPI_MAX_STR_LEN); 00830 entry->options.which_one = MEMINFO_UNALLOCED; 00831 entry->type = FEATURE_MEMORY_INFO; 00832 entry++; 00833 00834 sprintf( entry->name, "%s:allocated_memory", sanitized_name); 00835 strncpy(entry->description, "Allocated FB memory (in bytes). Note that the driver/GPU always sets aside a small amount of memory for bookkeeping.", PAPI_MAX_STR_LEN); 00836 entry->options.which_one = MEMINFO_ALLOCED; 00837 entry->type = FEATURE_MEMORY_INFO; 00838 entry++; 00839 } 00840 00841 if ( HAS_FEATURE( features[i], FEATURE_PERF_STATES ) ) { 00842 sprintf( entry->name, "%s:pstate", sanitized_name); 00843 strncpy(entry->description,"The performance state of the device.", PAPI_MAX_STR_LEN); 00844 entry->type = FEATURE_PERF_STATES; 00845 entry++; 00846 } 00847 00848 if ( HAS_FEATURE( features[i], FEATURE_POWER ) ) { 00849 sprintf( entry->name, "%s:power", sanitized_name); 00850 strncpy(entry->description,"Power usage reading for the device, in miliwatts. This is the power draw for the entire board, including GPU, memory, etc.\n The reading is accurate to within a range of +/-5 watts.", PAPI_MAX_STR_LEN); 00851 entry->type = FEATURE_POWER; 00852 entry++; 00853 } 00854 00855 if ( HAS_FEATURE( features[i], FEATURE_TEMP ) ) { 00856 sprintf( entry->name, "%s:temperature", sanitized_name); 00857 strncpy(entry->description,"Current temperature readings for the device, in degrees C.", PAPI_MAX_STR_LEN); 00858 entry->type = FEATURE_TEMP; 00859 entry++; 00860 } 00861 00862 if ( HAS_FEATURE( features[i], FEATURE_ECC_TOTAL_ERRORS ) ) { 00863 sprintf( entry->name, "%s:total_ecc_errors", sanitized_name); 00864 strncpy(entry->description,"Total single bit errors.", PAPI_MAX_STR_LEN); 00865 entry->options.ecc_opts = (struct local_ecc){ 00866 .bits = NVML_SINGLE_BIT_ECC, 00867 }; 00868 entry->type = FEATURE_ECC_TOTAL_ERRORS; 00869 entry++; 00870 00871 sprintf( entry->name, "%s:total_ecc_errors", sanitized_name); 00872 strncpy(entry->description,"Total double bit errors.", PAPI_MAX_STR_LEN); 00873 entry->options.ecc_opts = (struct local_ecc){ 00874 .bits = NVML_DOUBLE_BIT_ECC, 00875 }; 00876 entry->type = FEATURE_ECC_TOTAL_ERRORS; 00877 entry++; 00878 } 00879 00880 if ( HAS_FEATURE( features[i], FEATURE_UTILIZATION ) ) { 00881 sprintf( entry->name, "%s:gpu_utilization", sanitized_name); 00882 strncpy(entry->description,"Percent of time over the past second during which one or more kernels was executing on the GPU.", PAPI_MAX_STR_LEN); 00883 entry->options.which_one = GPU_UTILIZATION; 00884 entry->type = FEATURE_UTILIZATION; 00885 entry++; 00886 00887 sprintf( entry->name, "%s:memory_utilization", sanitized_name); 00888 strncpy(entry->description,"Percent of time over the past second during which global (device) memory was being read or written.", PAPI_MAX_STR_LEN); 00889 entry->options.which_one = MEMORY_UTILIZATION; 00890 entry->type = FEATURE_UTILIZATION; 00891 entry++; 00892 } 00893 strncpy( names[i], name, 64); 00894 } 00895 } 00896 } 00897 00902 int 00903 _papi_nvml_init_component( int cidx ) 00904 { 00905 SUBDBG ("Entry: cidx: %d\n", cidx); 00906 nvmlReturn_t ret; 00907 cudaError_t cuerr; 00908 int papi_errorcode; 00909 00910 int cuda_count = 0; 00911 unsigned int nvml_count = 0; 00912 00913 /* link in the cuda and nvml libraries and resolve the symbols we need to use */ 00914 if (linkCudaLibraries() != PAPI_OK) { 00915 SUBDBG ("Dynamic link of CUDA libraries failed, component will be disabled.\n"); 00916 SUBDBG ("See disable reason in papi_component_avail output for more details.\n"); 00917 return (PAPI_ENOSUPP); 00918 } 00919 00920 ret = (*nvmlInitPtr)(); 00921 if ( NVML_SUCCESS != ret ) { 00922 strcpy(_nvml_vector.cmp_info.disabled_reason, "The NVIDIA managament library failed to initialize."); 00923 return PAPI_ENOSUPP; 00924 } 00925 00926 cuerr = (*cuInitPtr)( 0 ); 00927 if ( CUDA_SUCCESS != cuerr ) { 00928 strcpy(_nvml_vector.cmp_info.disabled_reason, "The CUDA library failed to initialize."); 00929 return PAPI_ENOSUPP; 00930 } 00931 00932 /* Figure out the number of CUDA devices in the system */ 00933 ret = (*nvmlDeviceGetCountPtr)( &nvml_count ); 00934 if ( NVML_SUCCESS != ret ) { 00935 strcpy(_nvml_vector.cmp_info.disabled_reason, "Unable to get a count of devices from the NVIDIA managament library."); 00936 return PAPI_ENOSUPP; 00937 } 00938 00939 cuerr = (*cudaGetDeviceCountPtr)( &cuda_count ); 00940 if ( CUDA_SUCCESS != cuerr ) { 00941 strcpy(_nvml_vector.cmp_info.disabled_reason, "Unable to get a device count from CUDA."); 00942 return PAPI_ENOSUPP; 00943 } 00944 00945 /* We can probably recover from this, when we're clever */ 00946 if ( (cuda_count > 0) && (nvml_count != (unsigned int)cuda_count ) ) { 00947 strcpy(_nvml_vector.cmp_info.disabled_reason, "Cuda and the NVIDIA managament library have different device counts."); 00948 return PAPI_ENOSUPP; 00949 } 00950 00951 device_count = cuda_count; 00952 00953 /* A per device representation of what events are present */ 00954 features = (int*)papi_malloc(sizeof(int) * device_count ); 00955 00956 /* Handles to each device */ 00957 devices = (nvmlDevice_t*)papi_malloc(sizeof(nvmlDevice_t) * device_count); 00958 00959 /* Figure out what events are supported on each card. */ 00960 if ( (papi_errorcode = detectDevices( ) ) != PAPI_OK ) { 00961 papi_free(features); 00962 papi_free(devices); 00963 sprintf(_nvml_vector.cmp_info.disabled_reason, "An error occured in device feature detection, please check your NVIDIA Management Library and CUDA install." ); 00964 return PAPI_ENOSUPP; 00965 } 00966 00967 /* The assumption is that if everything went swimmingly in detectDevices, 00968 all nvml calls here should be fine. */ 00969 createNativeEvents( ); 00970 00971 /* Export the total number of events available */ 00972 _nvml_vector.cmp_info.num_native_events = num_events; 00973 00974 /* Export the component id */ 00975 _nvml_vector.cmp_info.CmpIdx = cidx; 00976 00977 /* Export the number of 'counters' */ 00978 _nvml_vector.cmp_info.num_cntrs = num_events; 00979 _nvml_vector.cmp_info.num_mpx_cntrs = num_events; 00980 00981 return PAPI_OK; 00982 } 00983 00984 00985 /* 00986 * Link the necessary CUDA libraries to use the cuda component. If any of them can not be found, then 00987 * the CUDA component will just be disabled. This is done at runtime so that a version of PAPI built 00988 * with the CUDA component can be installed and used on systems which have the CUDA libraries installed 00989 * and on systems where these libraries are not installed. 00990 */ 00991 static int 00992 linkCudaLibraries () 00993 { 00994 /* Attempt to guess if we were statically linked to libc, if so bail */ 00995 if ( _dl_non_dynamic_init != NULL ) { 00996 strncpy(_nvml_vector.cmp_info.disabled_reason, "NVML component does not support statically linking of libc.", PAPI_MAX_STR_LEN); 00997 return PAPI_ENOSUPP; 00998 } 00999 01000 /* Need to link in the cuda libraries, if not found disable the component */ 01001 dl1 = dlopen("libcuda.so", RTLD_NOW | RTLD_GLOBAL); 01002 if (!dl1) 01003 { 01004 strncpy(_nvml_vector.cmp_info.disabled_reason, "CUDA library libcuda.so not found.",PAPI_MAX_STR_LEN); 01005 return ( PAPI_ENOSUPP ); 01006 } 01007 cuInitPtr = dlsym(dl1, "cuInit"); 01008 if (dlerror() != NULL) 01009 { 01010 strncpy(_nvml_vector.cmp_info.disabled_reason, "CUDA function cuInit not found.",PAPI_MAX_STR_LEN); 01011 return ( PAPI_ENOSUPP ); 01012 } 01013 01014 dl2 = dlopen("libcudart.so", RTLD_NOW | RTLD_GLOBAL); 01015 if (!dl2) 01016 { 01017 strncpy(_nvml_vector.cmp_info.disabled_reason, "CUDA runtime library libcudart.so not found.",PAPI_MAX_STR_LEN); 01018 return ( PAPI_ENOSUPP ); 01019 } 01020 cudaGetDevicePtr = dlsym(dl2, "cudaGetDevice"); 01021 if (dlerror() != NULL) 01022 { 01023 strncpy(_nvml_vector.cmp_info.disabled_reason, "CUDART function cudaGetDevice not found.",PAPI_MAX_STR_LEN); 01024 return ( PAPI_ENOSUPP ); 01025 } 01026 cudaGetDeviceCountPtr = dlsym(dl2, "cudaGetDeviceCount"); 01027 if (dlerror() != NULL) 01028 { 01029 strncpy(_nvml_vector.cmp_info.disabled_reason, "CUDART function cudaGetDeviceCount not found.",PAPI_MAX_STR_LEN); 01030 return ( PAPI_ENOSUPP ); 01031 } 01032 cudaDeviceGetPCIBusIdPtr = dlsym(dl2, "cudaDeviceGetPCIBusId"); 01033 if (dlerror() != NULL) 01034 { 01035 strncpy(_nvml_vector.cmp_info.disabled_reason, "CUDART function cudaDeviceGetPCIBusId not found.",PAPI_MAX_STR_LEN); 01036 return ( PAPI_ENOSUPP ); 01037 } 01038 01039 dl3 = dlopen("libnvidia-ml.so", RTLD_NOW | RTLD_GLOBAL); 01040 if (!dl3) 01041 { 01042 strncpy(_nvml_vector.cmp_info.disabled_reason, "NVML runtime library libnvidia-ml.so not found.",PAPI_MAX_STR_LEN); 01043 return ( PAPI_ENOSUPP ); 01044 } 01045 nvmlDeviceGetClockInfoPtr = dlsym(dl3, "nvmlDeviceGetClockInfo"); 01046 if (dlerror() != NULL) 01047 { 01048 strncpy(_nvml_vector.cmp_info.disabled_reason, "NVML function nvmlDeviceGetClockInfo not found.",PAPI_MAX_STR_LEN); 01049 return ( PAPI_ENOSUPP ); 01050 } 01051 nvmlErrorStringPtr = dlsym(dl3, "nvmlErrorString"); 01052 if (dlerror() != NULL) 01053 { 01054 strncpy(_nvml_vector.cmp_info.disabled_reason, "NVML function nvmlErrorString not found.",PAPI_MAX_STR_LEN); 01055 return ( PAPI_ENOSUPP ); 01056 } 01057 nvmlDeviceGetDetailedEccErrorsPtr = dlsym(dl3, "nvmlDeviceGetDetailedEccErrors"); 01058 if (dlerror() != NULL) 01059 { 01060 strncpy(_nvml_vector.cmp_info.disabled_reason, "NVML function nvmlDeviceGetDetailedEccErrors not found.",PAPI_MAX_STR_LEN); 01061 return ( PAPI_ENOSUPP ); 01062 } 01063 nvmlDeviceGetFanSpeedPtr = dlsym(dl3, "nvmlDeviceGetFanSpeed"); 01064 if (dlerror() != NULL) 01065 { 01066 strncpy(_nvml_vector.cmp_info.disabled_reason, "NVML function nvmlDeviceGetFanSpeed not found.",PAPI_MAX_STR_LEN); 01067 return ( PAPI_ENOSUPP ); 01068 } 01069 nvmlDeviceGetMemoryInfoPtr = dlsym(dl3, "nvmlDeviceGetMemoryInfo"); 01070 if (dlerror() != NULL) 01071 { 01072 strncpy(_nvml_vector.cmp_info.disabled_reason, "NVML function nvmlDeviceGetMemoryInfo not found.",PAPI_MAX_STR_LEN); 01073 return ( PAPI_ENOSUPP ); 01074 } 01075 nvmlDeviceGetPerformanceStatePtr = dlsym(dl3, "nvmlDeviceGetPerformanceState"); 01076 if (dlerror() != NULL) 01077 { 01078 strncpy(_nvml_vector.cmp_info.disabled_reason, "NVML function nvmlDeviceGetPerformanceState not found.",PAPI_MAX_STR_LEN); 01079 return ( PAPI_ENOSUPP ); 01080 } 01081 nvmlDeviceGetPowerUsagePtr = dlsym(dl3, "nvmlDeviceGetPowerUsage"); 01082 if (dlerror() != NULL) 01083 { 01084 strncpy(_nvml_vector.cmp_info.disabled_reason, "NVML function nvmlDeviceGetPowerUsage not found.",PAPI_MAX_STR_LEN); 01085 return ( PAPI_ENOSUPP ); 01086 } 01087 nvmlDeviceGetTemperaturePtr = dlsym(dl3, "nvmlDeviceGetTemperature"); 01088 if (dlerror() != NULL) 01089 { 01090 strncpy(_nvml_vector.cmp_info.disabled_reason, "NVML function nvmlDeviceGetTemperature not found.",PAPI_MAX_STR_LEN); 01091 return ( PAPI_ENOSUPP ); 01092 } 01093 nvmlDeviceGetTotalEccErrorsPtr = dlsym(dl3, "nvmlDeviceGetTotalEccErrors"); 01094 if (dlerror() != NULL) 01095 { 01096 strncpy(_nvml_vector.cmp_info.disabled_reason, "NVML function nvmlDeviceGetTotalEccErrors not found.",PAPI_MAX_STR_LEN); 01097 return ( PAPI_ENOSUPP ); 01098 } 01099 nvmlDeviceGetUtilizationRatesPtr = dlsym(dl3, "nvmlDeviceGetUtilizationRates"); 01100 if (dlerror() != NULL) 01101 { 01102 strncpy(_nvml_vector.cmp_info.disabled_reason, "NVML function nvmlDeviceGetUtilizationRates not found.",PAPI_MAX_STR_LEN); 01103 return ( PAPI_ENOSUPP ); 01104 } 01105 nvmlDeviceGetHandleByIndexPtr = dlsym(dl3, "nvmlDeviceGetHandleByIndex"); 01106 if (dlerror() != NULL) 01107 { 01108 strncpy(_nvml_vector.cmp_info.disabled_reason, "NVML function nvmlDeviceGetHandleByIndex not found.",PAPI_MAX_STR_LEN); 01109 return ( PAPI_ENOSUPP ); 01110 } 01111 nvmlDeviceGetPciInfoPtr = dlsym(dl3, "nvmlDeviceGetPciInfo"); 01112 if (dlerror() != NULL) 01113 { 01114 strncpy(_nvml_vector.cmp_info.disabled_reason, "NVML function nvmlDeviceGetPciInfo not found.",PAPI_MAX_STR_LEN); 01115 return ( PAPI_ENOSUPP ); 01116 } 01117 nvmlDeviceGetNamePtr = dlsym(dl3, "nvmlDeviceGetName"); 01118 if (dlerror() != NULL) 01119 { 01120 strncpy(_nvml_vector.cmp_info.disabled_reason, "NVML function nvmlDeviceGetName not found.",PAPI_MAX_STR_LEN); 01121 return ( PAPI_ENOSUPP ); 01122 } 01123 nvmlDeviceGetInforomVersionPtr = dlsym(dl3, "nvmlDeviceGetInforomVersion"); 01124 if (dlerror() != NULL) 01125 { 01126 strncpy(_nvml_vector.cmp_info.disabled_reason, "NVML function nvmlDeviceGetInforomVersion not found.",PAPI_MAX_STR_LEN); 01127 return ( PAPI_ENOSUPP ); 01128 } 01129 nvmlDeviceGetEccModePtr = dlsym(dl3, "nvmlDeviceGetEccMode"); 01130 if (dlerror() != NULL) 01131 { 01132 strncpy(_nvml_vector.cmp_info.disabled_reason, "NVML function nvmlDeviceGetEccMode not found.",PAPI_MAX_STR_LEN); 01133 return ( PAPI_ENOSUPP ); 01134 } 01135 nvmlInitPtr = dlsym(dl3, "nvmlInit"); 01136 if (dlerror() != NULL) 01137 { 01138 strncpy(_nvml_vector.cmp_info.disabled_reason, "NVML function nvmlInit not found.",PAPI_MAX_STR_LEN); 01139 return ( PAPI_ENOSUPP ); 01140 } 01141 nvmlDeviceGetCountPtr = dlsym(dl3, "nvmlDeviceGetCount"); 01142 if (dlerror() != NULL) 01143 { 01144 strncpy(_nvml_vector.cmp_info.disabled_reason, "NVML function nvmlDeviceGetCount not found.",PAPI_MAX_STR_LEN); 01145 return ( PAPI_ENOSUPP ); 01146 } 01147 nvmlShutdownPtr = dlsym(dl3, "nvmlShutdown"); 01148 if (dlerror() != NULL) 01149 { 01150 strncpy(_nvml_vector.cmp_info.disabled_reason, "NVML function nvmlShutdown not found.",PAPI_MAX_STR_LEN); 01151 return ( PAPI_ENOSUPP ); 01152 } 01153 01154 return ( PAPI_OK ); 01155 } 01156 01157 01163 int 01164 _papi_nvml_init_control_state( hwd_control_state_t * ctl ) 01165 { 01166 SUBDBG( "nvml_init_control_state... %p\n", ctl ); 01167 nvml_control_state_t *nvml_ctl = ( nvml_control_state_t * ) ctl; 01168 memset( nvml_ctl, 0, sizeof ( nvml_control_state_t ) ); 01169 01170 return PAPI_OK; 01171 } 01172 01173 01175 int 01176 _papi_nvml_update_control_state( hwd_control_state_t *ctl, 01177 NativeInfo_t *native, 01178 int count, 01179 hwd_context_t *ctx ) 01180 { 01181 SUBDBG( "Enter: ctl: %p, ctx: %p\n", ctl, ctx ); 01182 int i, index; 01183 01184 nvml_control_state_t *nvml_ctl = ( nvml_control_state_t * ) ctl; 01185 (void) ctx; 01186 01187 01188 /* if no events, return */ 01189 if (count==0) return PAPI_OK; 01190 01191 for( i = 0; i < count; i++ ) { 01192 index = native[i].ni_event; 01193 nvml_ctl->which_counter[i]=index; 01194 /* We have no constraints on event position, so any event */ 01195 /* can be in any slot. */ 01196 native[i].ni_position = i; 01197 } 01198 nvml_ctl->num_events=count; 01199 return PAPI_OK; 01200 } 01202 int 01203 _papi_nvml_start( hwd_context_t *ctx, hwd_control_state_t *ctl ) 01204 { 01205 SUBDBG( "Enter: ctx: %p, ctl: %p\n", ctx, ctl ); 01206 01207 (void) ctx; 01208 (void) ctl; 01209 01210 /* anything that would need to be set at counter start time */ 01211 01212 /* reset */ 01213 /* start the counting */ 01214 01215 return PAPI_OK; 01216 } 01217 01218 01220 int 01221 _papi_nvml_stop( hwd_context_t *ctx, hwd_control_state_t *ctl ) 01222 { 01223 SUBDBG( "Enter: ctx: %p, ctl: %p\n", ctx, ctl ); 01224 01225 int i; 01226 (void) ctx; 01227 (void) ctl; 01228 int ret; 01229 01230 nvml_control_state_t* nvml_ctl = ( nvml_control_state_t*) ctl; 01231 01232 for (i=0;i<nvml_ctl->num_events;i++) { 01233 if ( PAPI_OK != 01234 ( ret = nvml_hardware_read( &nvml_ctl->counter[i], 01235 nvml_ctl->which_counter[i]) )) 01236 return ret; 01237 01238 } 01239 01240 return PAPI_OK; 01241 } 01242 01243 01245 int 01246 _papi_nvml_read( hwd_context_t *ctx, hwd_control_state_t *ctl, 01247 long long **events, int flags ) 01248 { 01249 SUBDBG( "Enter: ctx: %p, flags: %d\n", ctx, flags ); 01250 01251 (void) ctx; 01252 (void) flags; 01253 int i; 01254 int ret; 01255 nvml_control_state_t* nvml_ctl = ( nvml_control_state_t*) ctl; 01256 01257 01258 for (i=0;i<nvml_ctl->num_events;i++) { 01259 if ( PAPI_OK != 01260 ( ret = nvml_hardware_read( &nvml_ctl->counter[i], 01261 nvml_ctl->which_counter[i]) )) 01262 return ret; 01263 01264 } 01265 /* return pointer to the values we read */ 01266 *events = nvml_ctl->counter; 01267 return PAPI_OK; 01268 } 01269 01271 /* otherwise, the updated state is written to ESI->hw_start */ 01272 int 01273 _papi_nvml_write( hwd_context_t *ctx, hwd_control_state_t *ctl, 01274 long long *events ) 01275 { 01276 SUBDBG( "Enter: ctx: %p, ctl: %p\n", ctx, ctl ); 01277 01278 (void) ctx; 01279 (void) ctl; 01280 (void) events; 01281 01282 01283 /* You can change ECC mode and compute exclusivity modes on the cards */ 01284 /* But I don't see this as a function of a PAPI component at this time */ 01285 /* All implementation issues aside. */ 01286 return PAPI_OK; 01287 } 01288 01289 01291 /* If the eventset is not currently running, then the saved value in the */ 01292 /* EventSet is set to zero without calling this routine. */ 01293 int 01294 _papi_nvml_reset( hwd_context_t * ctx, hwd_control_state_t * ctl ) 01295 { 01296 SUBDBG( "Enter: ctx: %p, ctl: %p\n", ctx, ctl ); 01297 01298 (void) ctx; 01299 (void) ctl; 01300 01301 /* Reset the hardware */ 01302 nvml_hardware_reset( ); 01303 01304 return PAPI_OK; 01305 } 01306 01308 int 01309 _papi_nvml_shutdown_component() 01310 { 01311 SUBDBG( "Enter:\n" ); 01312 01313 if (nvml_native_table != NULL) 01314 papi_free(nvml_native_table); 01315 if (devices != NULL) 01316 papi_free(devices); 01317 if (features != NULL) 01318 papi_free(features); 01319 01320 (*nvmlShutdownPtr)(); 01321 01322 device_count = 0; 01323 num_events = 0; 01324 01325 // close the dynamic libraries needed by this component (opened in the init component call) 01326 dlclose(dl1); 01327 dlclose(dl2); 01328 dlclose(dl3); 01329 01330 return PAPI_OK; 01331 } 01332 01334 int 01335 _papi_nvml_shutdown_thread( hwd_context_t *ctx ) 01336 { 01337 SUBDBG( "Enter: ctx: %p\n", ctx ); 01338 01339 (void) ctx; 01340 01341 /* Last chance to clean up thread */ 01342 01343 return PAPI_OK; 01344 } 01345 01346 01347 01351 int 01352 _papi_nvml_ctl( hwd_context_t * ctx, int code, _papi_int_option_t * option ) 01353 { 01354 SUBDBG( "Enter: ctx: %p, code: %d\n", ctx, code ); 01355 01356 (void) ctx; 01357 (void) code; 01358 (void) option; 01359 01360 01361 /* FIXME. This should maybe set up more state, such as which counters are active and */ 01362 /* counter mappings. */ 01363 01364 return PAPI_OK; 01365 } 01366 01376 int 01377 _papi_nvml_set_domain( hwd_control_state_t * cntrl, int domain ) 01378 { 01379 SUBDBG( "Enter: cntrl: %p, domain: %d\n", cntrl, domain ); 01380 01381 (void) cntrl; 01382 01383 int found = 0; 01384 01385 if ( PAPI_DOM_USER & domain ) { 01386 SUBDBG( " PAPI_DOM_USER \n" ); 01387 found = 1; 01388 } 01389 if ( PAPI_DOM_KERNEL & domain ) { 01390 SUBDBG( " PAPI_DOM_KERNEL \n" ); 01391 found = 1; 01392 } 01393 if ( PAPI_DOM_OTHER & domain ) { 01394 SUBDBG( " PAPI_DOM_OTHER \n" ); 01395 found = 1; 01396 } 01397 if ( PAPI_DOM_ALL & domain ) { 01398 SUBDBG( " PAPI_DOM_ALL \n" ); 01399 found = 1; 01400 } 01401 if ( !found ) 01402 return ( PAPI_EINVAL ); 01403 01404 return PAPI_OK; 01405 } 01406 01407 01408 /**************************************************************/ 01409 /* Naming functions, used to translate event numbers to names */ 01410 /**************************************************************/ 01411 01412 01419 int 01420 _papi_nvml_ntv_enum_events( unsigned int *EventCode, int modifier ) 01421 { 01422 int index; 01423 01424 switch ( modifier ) { 01425 01426 /* return EventCode of first event */ 01427 case PAPI_ENUM_FIRST: 01428 /* return the first event that we support */ 01429 01430 *EventCode = 0; 01431 return PAPI_OK; 01432 01433 /* return EventCode of next available event */ 01434 case PAPI_ENUM_EVENTS: 01435 index = *EventCode; 01436 01437 /* Make sure we are in range */ 01438 if ( index < num_events - 1 ) { 01439 01440 /* This assumes a non-sparse mapping of the events */ 01441 *EventCode = *EventCode + 1; 01442 return PAPI_OK; 01443 } else { 01444 return PAPI_ENOEVNT; 01445 } 01446 break; 01447 01448 default: 01449 return PAPI_EINVAL; 01450 } 01451 01452 return PAPI_EINVAL; 01453 } 01454 01460 int 01461 _papi_nvml_ntv_code_to_name( unsigned int EventCode, char *name, int len ) 01462 { 01463 SUBDBG("Entry: EventCode: %#x, name: %s, len: %d\n", EventCode, name, len); 01464 int index; 01465 01466 index = EventCode; 01467 01468 /* Make sure we are in range */ 01469 if (index >= num_events) return PAPI_ENOEVNT; 01470 01471 strncpy( name, nvml_native_table[index].name, len ); 01472 01473 return PAPI_OK; 01474 } 01475 01481 int 01482 _papi_nvml_ntv_code_to_descr( unsigned int EventCode, char *descr, int len ) 01483 { 01484 int index; 01485 index = EventCode; 01486 01487 if (index >= num_events) return PAPI_ENOEVNT; 01488 01489 strncpy( descr, nvml_native_table[index].description, len ); 01490 01491 return PAPI_OK; 01492 } 01493 01495 papi_vector_t _nvml_vector = { 01496 .cmp_info = { 01497 /* default component information */ 01498 /* (unspecified values are initialized to 0) */ 01499 01500 .name = "nvml", 01501 .short_name="nvml", 01502 .version = "1.0", 01503 .description = "NVML provides the API for monitoring NVIDIA hardware (power usage, temperature, fan speed, etc)", 01504 .support_version = "n/a", 01505 .kernel_version = "n/a", 01506 01507 .num_preset_events = 0, 01508 .num_native_events = 0, /* set by init_component */ 01509 .default_domain = PAPI_DOM_USER, 01510 .available_domains = PAPI_DOM_USER, 01511 .default_granularity = PAPI_GRN_THR, 01512 .available_granularities = PAPI_GRN_THR, 01513 .hardware_intr_sig = PAPI_INT_SIGNAL, 01514 01515 01516 /* component specific cmp_info initializations */ 01517 .hardware_intr = 0, 01518 .precise_intr = 0, 01519 .posix1b_timers = 0, 01520 .kernel_profile = 0, 01521 .kernel_multiplex = 0, 01522 .fast_counter_read = 0, 01523 .fast_real_timer = 0, 01524 .fast_virtual_timer = 0, 01525 .attach = 0, 01526 .attach_must_ptrace = 0, 01527 .cntr_umasks = 0, 01528 .cpu = 0, 01529 .inherit = 0, 01530 }, 01531 01532 /* sizes of framework-opaque component-private structures */ 01533 .size = { 01534 .context = sizeof ( nvml_context_t ), 01535 .control_state = sizeof ( nvml_control_state_t ), 01536 .reg_value = sizeof ( nvml_register_t ), 01537 // .reg_alloc = sizeof ( nvml_reg_alloc_t ), 01538 }, 01539 01540 /* function pointers */ 01541 01542 /* Used for general PAPI interactions */ 01543 .start = _papi_nvml_start, 01544 .stop = _papi_nvml_stop, 01545 .read = _papi_nvml_read, 01546 .reset = _papi_nvml_reset, 01547 .write = _papi_nvml_write, 01548 .init_component = _papi_nvml_init_component, 01549 .init_thread = _papi_nvml_init_thread, 01550 .init_control_state = _papi_nvml_init_control_state, 01551 .update_control_state = _papi_nvml_update_control_state, 01552 .ctl = _papi_nvml_ctl, 01553 .shutdown_thread = _papi_nvml_shutdown_thread, 01554 .shutdown_component = _papi_nvml_shutdown_component, 01555 .set_domain = _papi_nvml_set_domain, 01556 .cleanup_eventset = NULL, 01557 /* called in add_native_events() */ 01558 .allocate_registers = NULL, 01559 01560 /* Used for overflow/profiling */ 01561 .dispatch_timer = NULL, 01562 .get_overflow_address = NULL, 01563 .stop_profiling = NULL, 01564 .set_overflow = NULL, 01565 .set_profile = NULL, 01566 01567 /* Name Mapping Functions */ 01568 .ntv_enum_events = _papi_nvml_ntv_enum_events, 01569 .ntv_name_to_code = NULL, 01570 .ntv_code_to_name = _papi_nvml_ntv_code_to_name, 01571 .ntv_code_to_descr = _papi_nvml_ntv_code_to_descr, 01572 01573 }; 01574