|
PAPI
5.0.1.0
|
00001 /****************************/ 00002 /* THIS IS OPEN SOURCE CODE */ 00003 /****************************/ 00004 00022 #include <stdio.h> 00023 #include <string.h> 00024 #include <stdlib.h> 00025 #include <inttypes.h> 00026 #include <string.h> 00027 #include <nvml.h> 00028 /* Headers required by PAPI */ 00029 #include "papi.h" 00030 #include "papi_internal.h" 00031 #include "papi_vector.h" 00032 #include "papi_memory.h" 00033 00034 #include "linux-nvml.h" 00035 00036 #include "nvml.h" 00037 #include "cuda.h" 00038 #include "cuda_runtime_api.h" 00039 00040 00041 /* Declare our vector in advance */ 00042 papi_vector_t _nvml_vector; 00043 00044 /* upto 25 events per card how many cards per system should we allow for?! */ 00045 #define NVML_MAX_COUNTERS 100 00046 00050 typedef struct nvml_control_state 00051 { 00052 int num_events; 00053 int which_counter[NVML_MAX_COUNTERS]; 00054 long long counter[NVML_MAX_COUNTERS]; 00055 } nvml_control_state_t; 00056 00058 typedef struct nvml_context 00059 { 00060 nvml_control_state_t state; 00061 } nvml_context_t; 00062 00064 static nvml_native_event_entry_t *nvml_native_table; 00065 00067 static int device_count = 0; 00068 00070 static int num_events = 0; 00071 00072 static nvmlDevice_t* devices; 00073 static int* features; 00074 00075 unsigned long long 00076 getClockSpeed( nvmlDevice_t dev, nvmlClockType_t which_one ) 00077 { 00078 unsigned int ret = 0; 00079 nvmlReturn_t bad; 00080 bad = nvmlDeviceGetClockInfo( dev, which_one, &ret ); 00081 00082 if ( NVML_SUCCESS != bad ) { 00083 SUBDBG( "something went wrong %s\n", nvmlErrorString(bad)); 00084 } 00085 00086 return (unsigned long long)ret; 00087 } 00088 00089 unsigned long long 00090 getEccLocalErrors( nvmlDevice_t dev, nvmlEccBitType_t bits, int which_one) 00091 { 00092 nvmlEccErrorCounts_t counts; 00093 00094 nvmlReturn_t bad; 00095 bad = nvmlDeviceGetDetailedEccErrors( dev, bits, NVML_VOLATILE_ECC , &counts); 00096 00097 if ( NVML_SUCCESS != bad ) { 00098 SUBDBG( "something went wrong %s\n", nvmlErrorString(bad)); 00099 } 00100 00101 00102 switch ( which_one ) { 00103 case LOCAL_ECC_REGFILE: 00104 return counts.registerFile; 00105 case LOCAL_ECC_L1: 00106 return counts.l1Cache; 00107 case LOCAL_ECC_L2: 00108 return counts.l2Cache; 00109 case LOCAL_ECC_MEM: 00110 return counts.deviceMemory; 00111 default: 00112 ; 00113 } 00114 return (unsigned long long)-1; 00115 } 00116 00117 unsigned long long 00118 getFanSpeed( nvmlDevice_t dev ) 00119 { 00120 unsigned int ret = 0; 00121 nvmlReturn_t bad; 00122 bad = nvmlDeviceGetFanSpeed( dev, &ret ); 00123 00124 if ( NVML_SUCCESS != bad ) { 00125 SUBDBG( "something went wrong %s\n", nvmlErrorString(bad)); 00126 } 00127 00128 00129 return (unsigned long long)ret; 00130 } 00131 00132 unsigned long long 00133 getMaxClockSpeed( nvmlDevice_t dev, nvmlClockType_t which_one) 00134 { 00135 unsigned int ret = 0; 00136 nvmlReturn_t bad; 00137 bad = nvmlDeviceGetClockInfo( dev, which_one, &ret ); 00138 00139 if ( NVML_SUCCESS != bad ) { 00140 SUBDBG( "something went wrong %s\n", nvmlErrorString(bad)); 00141 } 00142 00143 00144 return (unsigned long long) ret; 00145 } 00146 00147 unsigned long long 00148 getMemoryInfo( nvmlDevice_t dev, int which_one ) 00149 { 00150 nvmlMemory_t meminfo; 00151 nvmlReturn_t bad; 00152 bad = nvmlDeviceGetMemoryInfo( dev, &meminfo ); 00153 00154 if ( NVML_SUCCESS != bad ) { 00155 SUBDBG( "something went wrong %s\n", nvmlErrorString(bad)); 00156 } 00157 00158 switch (which_one) { 00159 case MEMINFO_TOTAL_MEMORY: 00160 return meminfo.total; 00161 case MEMINFO_UNALLOCED: 00162 return meminfo.free; 00163 case MEMINFO_ALLOCED: 00164 return meminfo.used; 00165 default: 00166 ; 00167 } 00168 return (unsigned long long)-1; 00169 } 00170 00171 unsigned long long 00172 getPState( nvmlDevice_t dev ) 00173 { 00174 unsigned int ret = 0; 00175 nvmlPstates_t state = NVML_PSTATE_15; 00176 nvmlReturn_t bad; 00177 bad = nvmlDeviceGetPerformanceState( dev, &state ); 00178 00179 if ( NVML_SUCCESS != bad ) { 00180 SUBDBG( "something went wrong %s\n", nvmlErrorString(bad)); 00181 } 00182 00183 00184 switch ( state ) { 00185 case NVML_PSTATE_15: 00186 ret++; 00187 case NVML_PSTATE_14: 00188 ret++; 00189 case NVML_PSTATE_13: 00190 ret++; 00191 case NVML_PSTATE_12: 00192 ret++; 00193 case NVML_PSTATE_11: 00194 ret++; 00195 case NVML_PSTATE_10: 00196 ret++; 00197 case NVML_PSTATE_9: 00198 ret++; 00199 case NVML_PSTATE_8: 00200 ret++; 00201 case NVML_PSTATE_7: 00202 ret++; 00203 case NVML_PSTATE_6: 00204 ret++; 00205 case NVML_PSTATE_5: 00206 ret++; 00207 case NVML_PSTATE_4: 00208 ret++; 00209 case NVML_PSTATE_3: 00210 ret++; 00211 case NVML_PSTATE_2: 00212 ret++; 00213 case NVML_PSTATE_1: 00214 ret++; 00215 case NVML_PSTATE_0: 00216 break; 00217 case NVML_PSTATE_UNKNOWN: 00218 default: 00219 /* This should never happen? 00220 * The API docs just state Unknown performance state... */ 00221 return (unsigned long long) -1; 00222 } 00223 00224 return (unsigned long long)ret; 00225 } 00226 00227 unsigned long long 00228 getPowerUsage( nvmlDevice_t dev ) 00229 { 00230 unsigned int power; 00231 nvmlReturn_t bad; 00232 bad = nvmlDeviceGetPowerUsage( dev, &power ); 00233 00234 if ( NVML_SUCCESS != bad ) { 00235 SUBDBG( "something went wrong %s\n", nvmlErrorString(bad)); 00236 } 00237 00238 00239 return (unsigned long long) power; 00240 } 00241 00242 unsigned long long 00243 getTemperature( nvmlDevice_t dev ) 00244 { 00245 unsigned int ret = 0; 00246 nvmlReturn_t bad; 00247 bad = nvmlDeviceGetTemperature( dev, NVML_TEMPERATURE_GPU, &ret ); 00248 00249 if ( NVML_SUCCESS != bad ) { 00250 SUBDBG( "something went wrong %s\n", nvmlErrorString(bad)); 00251 } 00252 00253 00254 return (unsigned long long)ret; 00255 } 00256 00257 unsigned long long 00258 getTotalEccErrors( nvmlDevice_t dev, nvmlEccBitType_t bits) 00259 { 00260 unsigned long long counts = 0; 00261 nvmlReturn_t bad; 00262 bad = nvmlDeviceGetTotalEccErrors( dev, bits, NVML_VOLATILE_ECC , &counts); 00263 00264 if ( NVML_SUCCESS != bad ) { 00265 SUBDBG( "something went wrong %s\n", nvmlErrorString(bad)); 00266 } 00267 00268 00269 return counts; 00270 } 00271 00272 /* 0 => gpu util 00273 1 => memory util 00274 */ 00275 unsigned long long 00276 getUtilization( nvmlDevice_t dev, int which_one ) 00277 { 00278 nvmlUtilization_t util; 00279 nvmlReturn_t bad; 00280 bad = nvmlDeviceGetUtilizationRates( dev, &util ); 00281 00282 if ( NVML_SUCCESS != bad ) { 00283 SUBDBG( "something went wrong %s\n", nvmlErrorString(bad)); 00284 } 00285 00286 00287 switch (which_one) { 00288 case GPU_UTILIZATION: 00289 return (unsigned long long) util.gpu; 00290 case MEMORY_UTILIZATION: 00291 return (unsigned long long) util.memory; 00292 default: 00293 ; 00294 } 00295 00296 return (unsigned long long) -1; 00297 } 00298 00299 static void 00300 nvml_hardware_reset( ) 00301 { 00302 /* nvmlDeviceSet* and nvmlDeviceClear* calls require root/admin access, so while 00303 * possible to implement a reset on the ECC counters, we pass */ 00304 /* 00305 int i; 00306 for ( i=0; i < device_count; i++ ) 00307 nvmlDeviceClearEccErrorCounts( device[i], NVML_VOLATILE_ECC ); 00308 */ 00309 } 00310 00312 /* You might replace this with code that accesses */ 00313 /* hardware or reads values from the operatings system. */ 00314 static int 00315 nvml_hardware_read( long long *value, int which_one) 00316 //, nvml_context_t *ctx) 00317 { 00318 nvml_native_event_entry_t *entry; 00319 nvmlDevice_t handle; 00320 int cudaIdx = -1; 00321 00322 entry = &nvml_native_table[which_one]; 00323 *value = (long long) -1; 00324 /* replace entry->resources with the current cuda_device->nvml device */ 00325 cudaGetDevice( &cudaIdx ); 00326 00327 if ( cudaIdx < 0 || cudaIdx > device_count ) 00328 return PAPI_EINVAL; 00329 00330 /* Make sure the device we are running on has the requested event */ 00331 if ( !HAS_FEATURE( features[cudaIdx] , entry->type) ) 00332 return PAPI_EINVAL; 00333 00334 handle = devices[cudaIdx]; 00335 00336 switch (entry->type) { 00337 case FEATURE_CLOCK_INFO: 00338 *value = getClockSpeed( handle, 00339 (nvmlClockType_t)entry->options.clock ); 00340 break; 00341 case FEATURE_ECC_LOCAL_ERRORS: 00342 *value = getEccLocalErrors( handle, 00343 (nvmlEccBitType_t)entry->options.ecc_opts.bits, 00344 (int)entry->options.ecc_opts.which_one); 00345 break; 00346 case FEATURE_FAN_SPEED: 00347 *value = getFanSpeed( handle ); 00348 break; 00349 case FEATURE_MAX_CLOCK: 00350 *value = getMaxClockSpeed( handle, 00351 (nvmlClockType_t)entry->options.clock ); 00352 break; 00353 case FEATURE_MEMORY_INFO: 00354 *value = getMemoryInfo( handle, 00355 (int)entry->options.which_one ); 00356 break; 00357 case FEATURE_PERF_STATES: 00358 *value = getPState( handle ); 00359 break; 00360 case FEATURE_POWER: 00361 *value = getPowerUsage( handle ); 00362 break; 00363 case FEATURE_TEMP: 00364 *value = getTemperature( handle ); 00365 break; 00366 case FEATURE_ECC_TOTAL_ERRORS: 00367 *value = getTotalEccErrors( handle, 00368 (nvmlEccBitType_t)entry->options.ecc_opts.bits ); 00369 break; 00370 case FEATURE_UTILIZATION: 00371 *value = getUtilization( handle, 00372 (int)entry->options.which_one ); 00373 break; 00374 default: 00375 return PAPI_EINVAL; 00376 } 00377 00378 return PAPI_OK; 00379 00380 00381 } 00382 00383 /********************************************************************/ 00384 /* Below are the functions required by the PAPI component interface */ 00385 /********************************************************************/ 00386 00388 int 00389 _papi_nvml_init_thread( hwd_context_t * ctx ) 00390 { 00391 (void) ctx; 00392 00393 SUBDBG( "_papi_nvml_init %p...", ctx ); 00394 00395 return PAPI_OK; 00396 } 00397 00398 static int 00399 detectDevices( ) 00400 { 00401 nvmlReturn_t ret; 00402 nvmlEnableState_t mode = NVML_FEATURE_DISABLED; 00403 nvmlDevice_t handle; 00404 nvmlPciInfo_t info; 00405 00406 cudaError_t cuerr; 00407 00408 char busId[16]; 00409 char name[64]; 00410 char inforomECC[16]; 00411 char inforomPower[16]; 00412 char names[device_count][64]; 00413 char nvml_busIds[device_count][16]; 00414 00415 float ecc_version = 0.0, power_version = 0.0; 00416 00417 int i = 0, 00418 j = 0; 00419 int isTesla = 0; 00420 int isFermi = 0; 00421 int isUnique = 1; 00422 00423 unsigned int temp = 0; 00424 00425 00426 /* list of nvml pci_busids */ 00427 for (i=0; i < device_count; i++) { 00428 ret = nvmlDeviceGetHandleByIndex( i, &handle ); 00429 if ( NVML_SUCCESS != ret ) { 00430 SUBDBG("nvmlDeviceGetHandleByIndex(%d) failed\n", i); 00431 return PAPI_ESYS; 00432 } 00433 00434 ret = nvmlDeviceGetPciInfo( handle, &info ); 00435 if ( NVML_SUCCESS != ret ) { 00436 SUBDBG("nvmlDeviceGetPciInfo() failed %s\n", nvmlErrorString(ret) ); 00437 return PAPI_ESYS; 00438 } 00439 00440 strncpy(nvml_busIds[i], info.busId, 16); 00441 00442 } 00443 00444 /* We want to key our list of nvmlDevice_ts by each device's cuda index */ 00445 for (i=0; i < device_count; i++) { 00446 cuerr = cudaDeviceGetPCIBusId( busId, 16, i ); 00447 if ( CUDA_SUCCESS != cuerr ) { 00448 SUBDBG("cudaDeviceGetPCIBusId failed.\n"); 00449 return PAPI_ESYS; 00450 } 00451 for (j=0; j < device_count; j++ ) { 00452 if ( !strncmp( busId, nvml_busIds[j], 16) ) { 00453 ret = nvmlDeviceGetHandleByIndex(j, &devices[i] ); 00454 if ( NVML_SUCCESS != ret ) { 00455 SUBDBG("nvmlDeviceGetHandleByIndex(%d, &devices[%d]) failed.\n", j, i); 00456 } 00457 return PAPI_ESYS; 00458 break; 00459 } 00460 } 00461 } 00462 00463 memset(names, 0x0, device_count*64); 00464 /* So for each card, check whats querable */ 00465 for (i=0; i < device_count; i++ ) { 00466 isTesla=0; 00467 isFermi=1; 00468 isUnique = 1; 00469 features[i] = 0; 00470 00471 ret = nvmlDeviceGetName( devices[i], name, 64 ); 00472 if ( NVML_SUCCESS != ret) { 00473 SUBDBG("nvmlDeviceGetName failed \n"); 00474 return PAPI_ESYS; 00475 } 00476 00477 for (j=0; j < i; j++ ) 00478 if ( 0 == strncmp( name, names[j], 64 ) ) { 00479 /* if we have a match, and IF everything is sane, 00480 * devices with the same name eg Tesla C2075 share features */ 00481 isUnique = 0; 00482 features[i] = features[j]; 00483 00484 } 00485 00486 if ( isUnique ) { 00487 ret = nvmlDeviceGetInforomVersion( devices[i], NVML_INFOROM_ECC, inforomECC, 16); 00488 if ( NVML_SUCCESS != ret ) { 00489 SUBDBG("nvmlGetInforomVersion carps %s\n", nvmlErrorString(ret ) ); 00490 isFermi = 0; 00491 } 00492 ret = nvmlDeviceGetInforomVersion( devices[i], NVML_INFOROM_POWER, inforomPower, 16); 00493 if ( NVML_SUCCESS != ret ) { 00494 /* This implies the card is older then Fermi */ 00495 SUBDBG("nvmlGetInforomVersion carps %s\n", nvmlErrorString(ret ) ); 00496 SUBDBG("Based upon the return to nvmlGetInforomVersion, we conclude this card is older then Fermi.\n"); 00497 isFermi = 0; 00498 } 00499 00500 ecc_version = strtof(inforomECC, NULL ); 00501 power_version = strtof( inforomPower, NULL); 00502 00503 ret = nvmlDeviceGetName( devices[i], name, 64 ); 00504 isTesla = ( NULL == strstr(name, "Tesla") ) ? 0:1; 00505 00506 /* For Tesla and Quadro products from Fermi and Kepler families. */ 00507 if ( isFermi ) { 00508 features[i] |= FEATURE_CLOCK_INFO; 00509 num_events += 3; 00510 } 00511 00512 /* For Tesla and Quadro products from Fermi and Kepler families. 00513 requires NVML_INFOROM_ECC 2.0 or higher for location-based counts 00514 requires NVML_INFOROM_ECC 1.0 or higher for all other ECC counts 00515 requires ECC mode to be enabled. */ 00516 if ( isFermi ) { 00517 ret = nvmlDeviceGetEccMode( devices[i], &mode, NULL ); 00518 if ( NVML_FEATURE_ENABLED == mode) { 00519 if ( ecc_version >= 2.0 ) { 00520 features[i] |= FEATURE_ECC_LOCAL_ERRORS; 00521 num_events += 8; /* {single bit, two bit errors} x { reg, l1, l2, memory } */ 00522 } 00523 if ( ecc_version >= 1.0 ) { 00524 features[i] |= FEATURE_ECC_TOTAL_ERRORS; 00525 num_events += 2; /* single bit errors, double bit errors */ 00526 } 00527 } 00528 } 00529 00530 /* For all discrete products with dedicated fans */ 00531 features[i] |= FEATURE_FAN_SPEED; 00532 num_events++; 00533 00534 /* For Tesla and Quadro products from Fermi and Kepler families. */ 00535 if ( isFermi ) { 00536 features[i] |= FEATURE_MAX_CLOCK; 00537 num_events += 3; 00538 } 00539 00540 /* For all products */ 00541 features[i] |= FEATURE_MEMORY_INFO; 00542 num_events += 3; /* total, free, used */ 00543 00544 /* For Tesla and Quadro products from the Fermi and Kepler families. */ 00545 if ( isFermi ) { 00546 features[i] |= FEATURE_PERF_STATES; 00547 num_events++; 00548 } 00549 00550 /* For "GF11x" Tesla and Quadro products from the Fermi family 00551 requires NVML_INFOROM_POWER 3.0 or higher 00552 For Tesla and Quadro products from the Kepler family 00553 does not require NVML_INFOROM_POWER */ 00554 if ( isFermi ) { 00555 ret = nvmlDeviceGetPowerUsage( devices[i], &temp); 00556 if ( NVML_SUCCESS == ret ) { 00557 features[i] |= FEATURE_POWER; 00558 num_events++; 00559 } 00560 } 00561 00562 /* For all discrete and S-class products. */ 00563 features[i] |= FEATURE_TEMP; 00564 num_events++; 00565 00566 /* For Tesla and Quadro products from the Fermi and Kepler families */ 00567 if (isFermi) { 00568 features[i] |= FEATURE_UTILIZATION; 00569 num_events += 2; 00570 } 00571 00572 strncpy( names[i], name, 64); 00573 00574 } 00575 } 00576 return PAPI_OK; 00577 00578 } 00579 00580 static void 00581 createNativeEvents( ) 00582 { 00583 char name[64]; 00584 char sanitized_name[PAPI_MAX_STR_LEN]; 00585 char names[device_count][64]; 00586 00587 int i, nameLen = 0, j; 00588 int isUnique = 1; 00589 00590 nvml_native_event_entry_t* entry; 00591 nvmlReturn_t ret; 00592 00593 nvml_native_table = (nvml_native_event_entry_t*) papi_malloc( 00594 sizeof(nvml_native_event_entry_t) * num_events ); 00595 memset( nvml_native_table, 0x0, sizeof(nvml_native_event_entry_t) * num_events ); 00596 entry = &nvml_native_table[0]; 00597 00598 for (i=0; i < device_count; i++ ) { 00599 memset( names[i], 0x0, 64 ); 00600 isUnique = 1; 00601 ret = nvmlDeviceGetName( devices[i], name, 64 ); 00602 00603 for (j=0; j < i; j++ ) 00604 { 00605 if ( 0 == strncmp( name, names[j], 64 ) ) 00606 isUnique = 0; 00607 } 00608 00609 if ( isUnique ) { 00610 nameLen = strlen(name); 00611 strncpy(sanitized_name, name, PAPI_MAX_STR_LEN ); 00612 for (j=0; j < nameLen; j++) 00613 if ( ' ' == sanitized_name[j] ) 00614 sanitized_name[j] = '_'; 00615 00616 00617 00618 if ( HAS_FEATURE( features[i], FEATURE_CLOCK_INFO ) ) { 00619 sprintf( entry->name, "%s:graphics_clock", sanitized_name ); 00620 strncpy(entry->description,"Graphics clock domain (MHz).", PAPI_MAX_STR_LEN ); 00621 entry->options.clock = NVML_CLOCK_GRAPHICS; 00622 entry->type = FEATURE_CLOCK_INFO; 00623 entry++; 00624 00625 sprintf( entry->name, "%s:sm_clock", sanitized_name); 00626 strncpy(entry->description,"SM clock domain (MHz).", PAPI_MAX_STR_LEN); 00627 entry->options.clock = NVML_CLOCK_SM; 00628 entry->type = FEATURE_CLOCK_INFO; 00629 entry++; 00630 00631 sprintf( entry->name, "%s:memory_clock", sanitized_name); 00632 strncpy(entry->description,"Memory clock domain (MHz).", PAPI_MAX_STR_LEN); 00633 entry->options.clock = NVML_CLOCK_MEM; 00634 entry->type = FEATURE_CLOCK_INFO; 00635 entry++; 00636 } 00637 00638 if ( HAS_FEATURE( features[i], FEATURE_ECC_LOCAL_ERRORS ) ) { 00639 sprintf(entry->name, "%s:l1_single_ecc_errors", sanitized_name); 00640 strncpy(entry->description,"L1 cache single bit ECC", PAPI_MAX_STR_LEN); 00641 entry->options.ecc_opts = (struct local_ecc){ 00642 .bits = NVML_SINGLE_BIT_ECC, 00643 .which_one = LOCAL_ECC_L1, 00644 }; 00645 entry->type = FEATURE_ECC_LOCAL_ERRORS; 00646 entry++; 00647 00648 sprintf(entry->name, "%s:l2_single_ecc_errors", sanitized_name); 00649 strncpy(entry->description,"L2 cache single bit ECC", PAPI_MAX_STR_LEN); 00650 entry->options.ecc_opts = (struct local_ecc){ 00651 .bits = NVML_SINGLE_BIT_ECC, 00652 .which_one = LOCAL_ECC_L2, 00653 }; 00654 entry->type = FEATURE_ECC_LOCAL_ERRORS; 00655 entry++; 00656 00657 sprintf(entry->name, "%s:memory_single_ecc_errors", sanitized_name); 00658 strncpy(entry->description,"Device memory single bit ECC", PAPI_MAX_STR_LEN); 00659 entry->options.ecc_opts = (struct local_ecc){ 00660 .bits = NVML_SINGLE_BIT_ECC, 00661 .which_one = LOCAL_ECC_MEM, 00662 }; 00663 entry->type = FEATURE_ECC_LOCAL_ERRORS; 00664 entry++; 00665 00666 sprintf(entry->name, "%s:regfile_single_ecc_errors", sanitized_name); 00667 strncpy(entry->description,"Register file single bit ECC", PAPI_MAX_STR_LEN); 00668 entry->options.ecc_opts = (struct local_ecc){ 00669 .bits = NVML_SINGLE_BIT_ECC, 00670 .which_one = LOCAL_ECC_REGFILE, 00671 }; 00672 entry->type = FEATURE_ECC_LOCAL_ERRORS; 00673 entry++; 00674 00675 sprintf(entry->name, "%s:1l_double_ecc_errors", sanitized_name); 00676 strncpy(entry->description,"L1 cache double bit ECC", PAPI_MAX_STR_LEN); 00677 entry->options.ecc_opts = (struct local_ecc){ 00678 .bits = NVML_DOUBLE_BIT_ECC, 00679 .which_one = LOCAL_ECC_L1, 00680 }; 00681 entry->type = FEATURE_ECC_LOCAL_ERRORS; 00682 entry++; 00683 00684 sprintf(entry->name, "%s:l2_double_ecc_errors", sanitized_name); 00685 strncpy(entry->description,"L2 cache double bit ECC", PAPI_MAX_STR_LEN); 00686 entry->options.ecc_opts = (struct local_ecc){ 00687 .bits = NVML_DOUBLE_BIT_ECC, 00688 .which_one = LOCAL_ECC_L2, 00689 }; 00690 entry->type = FEATURE_ECC_LOCAL_ERRORS; 00691 entry++; 00692 00693 sprintf(entry->name, "%s:memory_double_ecc_errors", sanitized_name); 00694 strncpy(entry->description,"Device memory double bit ECC", PAPI_MAX_STR_LEN); 00695 entry->options.ecc_opts = (struct local_ecc){ 00696 .bits = NVML_DOUBLE_BIT_ECC, 00697 .which_one = LOCAL_ECC_MEM, 00698 }; 00699 entry->type = FEATURE_ECC_LOCAL_ERRORS; 00700 entry++; 00701 00702 sprintf(entry->name, "%s:regfile_double_ecc_errors", sanitized_name); 00703 strncpy(entry->description,"Register file double bit ECC", PAPI_MAX_STR_LEN); 00704 entry->options.ecc_opts = (struct local_ecc){ 00705 .bits = NVML_DOUBLE_BIT_ECC, 00706 .which_one = LOCAL_ECC_REGFILE, 00707 }; 00708 entry->type = FEATURE_ECC_LOCAL_ERRORS; 00709 entry++; 00710 } 00711 00712 if ( HAS_FEATURE( features[i], FEATURE_FAN_SPEED ) ) { 00713 sprintf( entry->name, "%s:fan_speed", sanitized_name); 00714 strncpy(entry->description,"The fan speed expressed as a percent of the maximum, i.e. full speed is 100%", PAPI_MAX_STR_LEN); 00715 entry->type = FEATURE_FAN_SPEED; 00716 entry++; 00717 } 00718 00719 if ( HAS_FEATURE( features[i], FEATURE_MAX_CLOCK ) ) { 00720 sprintf( entry->name, "%s:graphics_max_clock", sanitized_name); 00721 strncpy(entry->description,"Maximal Graphics clock domain (MHz).", PAPI_MAX_STR_LEN); 00722 entry->options.clock = NVML_CLOCK_GRAPHICS; 00723 entry->type = FEATURE_MAX_CLOCK; 00724 entry++; 00725 00726 sprintf( entry->name, "%s:sm_max_clock", sanitized_name); 00727 strncpy(entry->description,"Maximal SM clock domain (MHz).", PAPI_MAX_STR_LEN); 00728 entry->options.clock = NVML_CLOCK_SM; 00729 entry->type = FEATURE_MAX_CLOCK; 00730 entry++; 00731 00732 sprintf( entry->name, "%s:memory_max_clock", sanitized_name); 00733 strncpy(entry->description,"Maximal Memory clock domain (MHz).", PAPI_MAX_STR_LEN); 00734 entry->options.clock = NVML_CLOCK_MEM; 00735 entry->type = FEATURE_MAX_CLOCK; 00736 entry++; 00737 } 00738 00739 if ( HAS_FEATURE( features[i], FEATURE_MEMORY_INFO ) ) { 00740 sprintf( entry->name, "%s:total_memory", sanitized_name); 00741 strncpy(entry->description,"Total installed FB memory (in bytes).", PAPI_MAX_STR_LEN); 00742 entry->options.which_one = MEMINFO_TOTAL_MEMORY; 00743 entry->type = FEATURE_MEMORY_INFO; 00744 entry++; 00745 00746 sprintf( entry->name, "%s:unallocated_memory", sanitized_name); 00747 strncpy(entry->description,"Uncallocated FB memory (in bytes).", PAPI_MAX_STR_LEN); 00748 entry->options.which_one = MEMINFO_UNALLOCED; 00749 entry->type = FEATURE_MEMORY_INFO; 00750 entry++; 00751 00752 sprintf( entry->name, "%s:allocated_memory", sanitized_name); 00753 strncpy(entry->description, "Allocated FB memory (in bytes). Note that the driver/GPU always sets aside a small amount of memory for bookkeeping.", PAPI_MAX_STR_LEN); 00754 entry->options.which_one = MEMINFO_ALLOCED; 00755 entry->type = FEATURE_MEMORY_INFO; 00756 entry++; 00757 } 00758 00759 if ( HAS_FEATURE( features[i], FEATURE_PERF_STATES ) ) { 00760 sprintf( entry->name, "%s:pstate", sanitized_name); 00761 strncpy(entry->description,"The performance state of the device.", PAPI_MAX_STR_LEN); 00762 entry->type = FEATURE_PERF_STATES; 00763 entry++; 00764 } 00765 00766 if ( HAS_FEATURE( features[i], FEATURE_POWER ) ) { 00767 sprintf( entry->name, "%s:power", sanitized_name); 00768 strncpy(entry->description,"Power usage reading for the device, in miliwatts. This is the power draw for the entire board, including GPU, memory, etc.\n The reading is accurate to within a range of +/-5 watts.", PAPI_MAX_STR_LEN); 00769 entry->type = FEATURE_POWER; 00770 entry++; 00771 } 00772 00773 if ( HAS_FEATURE( features[i], FEATURE_TEMP ) ) { 00774 sprintf( entry->name, "%s:temperature", sanitized_name); 00775 strncpy(entry->description,"Current temperature readings for the device, in degrees C.", PAPI_MAX_STR_LEN); 00776 entry->type = FEATURE_TEMP; 00777 entry++; 00778 } 00779 00780 if ( HAS_FEATURE( features[i], FEATURE_ECC_TOTAL_ERRORS ) ) { 00781 sprintf( entry->name, "%s:total_ecc_errors", sanitized_name); 00782 strncpy(entry->description,"Total single bit errors.", PAPI_MAX_STR_LEN); 00783 entry->options.ecc_opts = (struct local_ecc){ 00784 .bits = NVML_SINGLE_BIT_ECC, 00785 }; 00786 entry->type = FEATURE_ECC_TOTAL_ERRORS; 00787 entry++; 00788 00789 sprintf( entry->name, "%s:total_ecc_errors", sanitized_name); 00790 strncpy(entry->description,"Total double bit errors.", PAPI_MAX_STR_LEN); 00791 entry->options.ecc_opts = (struct local_ecc){ 00792 .bits = NVML_DOUBLE_BIT_ECC, 00793 }; 00794 entry->type = FEATURE_ECC_TOTAL_ERRORS; 00795 entry++; 00796 } 00797 00798 if ( HAS_FEATURE( features[i], FEATURE_UTILIZATION ) ) { 00799 sprintf( entry->name, "%s:gpu_utilization", sanitized_name); 00800 strncpy(entry->description,"Percent of time over the past second during which one or more kernels was executing on the GPU.", PAPI_MAX_STR_LEN); 00801 entry->options.which_one = GPU_UTILIZATION; 00802 entry->type = FEATURE_UTILIZATION; 00803 entry++; 00804 00805 sprintf( entry->name, "%s:memory_utilization", sanitized_name); 00806 strncpy(entry->description,"Percent of time over the past second during which global (device) memory was being read or written.", PAPI_MAX_STR_LEN); 00807 entry->options.which_one = MEMORY_UTILIZATION; 00808 entry->type = FEATURE_UTILIZATION; 00809 entry++; 00810 } 00811 strncpy( names[i], name, 64); 00812 } 00813 } 00814 } 00815 00820 int 00821 _papi_nvml_init_component( int cidx ) 00822 { 00823 nvmlReturn_t ret; 00824 cudaError_t cuerr; 00825 00826 int cuda_count = 0; 00827 unsigned int nvml_count = 0; 00828 00829 ret = nvmlInit(); 00830 if ( NVML_SUCCESS != ret ) { 00831 strcpy(_nvml_vector.cmp_info.disabled_reason, "The NVIDIA managament library failed to initialize."); 00832 goto disable; 00833 } 00834 00835 cuerr = cuInit( 0 ); 00836 if ( CUDA_SUCCESS != cuerr ) { 00837 strcpy(_nvml_vector.cmp_info.disabled_reason, "The CUDA library failed to initialize."); 00838 goto disable; 00839 } 00840 00841 /* Figure out the number of CUDA devices in the system */ 00842 ret = nvmlDeviceGetCount( &nvml_count ); 00843 if ( NVML_SUCCESS != ret ) { 00844 strcpy(_nvml_vector.cmp_info.disabled_reason, "Unable to get a count of devices from the NVIDIA managament library."); 00845 goto disable; 00846 } 00847 00848 cuerr = cudaGetDeviceCount( &cuda_count ); 00849 if ( CUDA_SUCCESS != cuerr ) { 00850 strcpy(_nvml_vector.cmp_info.disabled_reason, "Unable to get a device count from CUDA."); 00851 goto disable; 00852 } 00853 00854 /* We can probably recover from this, when we're clever */ 00855 if ( nvml_count != cuda_count ) { 00856 strcpy(_nvml_vector.cmp_info.disabled_reason, "Cuda and the NVIDIA managament library have different device counts."); 00857 goto disable; 00858 } 00859 00860 device_count = cuda_count; 00861 00862 /* A per device representation of what events are present */ 00863 features = (int*)papi_malloc(sizeof(int) * device_count ); 00864 00865 /* Handles to each device */ 00866 devices = (nvmlDevice_t*)papi_malloc(sizeof(nvmlDevice_t) * device_count); 00867 00868 /* Figure out what events are supported on each card. */ 00869 if ( (papi_errorcode = detectDevices( ) ) != PAPI_OK ) { 00870 papi_free(features); 00871 papi_free(devices); 00872 sprintf(_nvml_vector.cmp_info.disabled_reason, "An error occured in device feature detection, please check your NVIDIA Management Library and CUDA install." ); 00873 goto disable; 00874 } 00875 00876 /* The assumption is that if everything went swimmingly in detectDevices, 00877 all nvml calls here should be fine. */ 00878 createNativeEvents( ); 00879 00880 /* Export the total number of events available */ 00881 _nvml_vector.cmp_info.num_native_events = num_events; 00882 00883 /* Export the component id */ 00884 _nvml_vector.cmp_info.CmpIdx = cidx; 00885 00886 /* Export the number of 'counters' */ 00887 _nvml_vector.cmp_info.num_cntrs = num_events; 00888 _nvml_vector.cmp_info.num_mpx_cntrs = num_events; 00889 00890 return PAPI_OK; 00891 00892 disable: 00893 _nvml_vector.cmp_info.num_cntrs = 0; 00894 return PAPI_OK; 00895 } 00896 00897 00903 int 00904 _papi_nvml_init_control_state( hwd_control_state_t * ctl ) 00905 { 00906 SUBDBG( "nvml_init_control_state... %p\n", ctl ); 00907 nvml_control_state_t *nvml_ctl = ( nvml_control_state_t * ) ctl; 00908 memset( nvml_ctl, 0, sizeof ( nvml_control_state_t ) ); 00909 00910 return PAPI_OK; 00911 } 00912 00913 00915 int 00916 _papi_nvml_update_control_state( hwd_control_state_t *ctl, 00917 NativeInfo_t *native, 00918 int count, 00919 hwd_context_t *ctx ) 00920 { 00921 int i, index; 00922 00923 nvml_control_state_t *nvml_ctl = ( nvml_control_state_t * ) ctl; 00924 (void) ctx; 00925 00926 SUBDBG( "_papi_nvml_update_control_state %p %p...", ctl, ctx ); 00927 00928 /* if no events, return */ 00929 if (count==0) return PAPI_OK; 00930 00931 for( i = 0; i < count; i++ ) { 00932 index = native[i].ni_event; 00933 nvml_ctl->which_counter[i]=index; 00934 /* We have no constraints on event position, so any event */ 00935 /* can be in any slot. */ 00936 native[i].ni_position = i; 00937 } 00938 nvml_ctl->num_events=count; 00939 return PAPI_OK; 00940 } 00942 int 00943 _papi_nvml_start( hwd_context_t *ctx, hwd_control_state_t *ctl ) 00944 { 00945 00946 (void) ctx; 00947 (void) ctl; 00948 00949 SUBDBG( "nvml_start %p %p...", ctx, ctl ); 00950 /* anything that would need to be set at counter start time */ 00951 00952 /* reset */ 00953 /* start the counting */ 00954 00955 return PAPI_OK; 00956 } 00957 00958 00960 int 00961 _papi_nvml_stop( hwd_context_t *ctx, hwd_control_state_t *ctl ) 00962 { 00963 int i; 00964 (void) ctx; 00965 (void) ctl; 00966 int ret; 00967 SUBDBG( "nvml_stop %p %p...", ctx, ctl ); 00968 00969 nvml_control_state_t* nvml_ctl = ( nvml_control_state_t*) ctl; 00970 00971 for (i=0;i<nvml_ctl->num_events;i++) { 00972 if ( PAPI_OK != 00973 ( ret = nvml_hardware_read( &nvml_ctl->counter[i], 00974 nvml_ctl->which_counter[i]) )) 00975 return ret; 00976 00977 } 00978 00979 return PAPI_OK; 00980 } 00981 00982 00984 int 00985 _papi_nvml_read( hwd_context_t *ctx, hwd_control_state_t *ctl, 00986 long long **events, int flags ) 00987 { 00988 00989 (void) ctx; 00990 (void) flags; 00991 int i; 00992 int ret; 00993 nvml_control_state_t* nvml_ctl = ( nvml_control_state_t*) ctl; 00994 00995 SUBDBG( "nvml_read... %p %d", ctx, flags ); 00996 00997 for (i=0;i<nvml_ctl->num_events;i++) { 00998 if ( PAPI_OK != 00999 ( ret = nvml_hardware_read( &nvml_ctl->counter[i], 01000 nvml_ctl->which_counter[i]) )) 01001 return ret; 01002 01003 } 01004 /* return pointer to the values we read */ 01005 *events = nvml_ctl->counter; 01006 return PAPI_OK; 01007 } 01008 01010 /* otherwise, the updated state is written to ESI->hw_start */ 01011 int 01012 _papi_nvml_write( hwd_context_t *ctx, hwd_control_state_t *ctl, 01013 long long *events ) 01014 { 01015 01016 (void) ctx; 01017 (void) ctl; 01018 (void) events; 01019 01020 SUBDBG( "nvml_write... %p %p", ctx, ctl ); 01021 01022 /* You can change ECC mode and compute exclusivity modes on the cards */ 01023 /* But I don't see this as a function of a PAPI component at this time */ 01024 /* All implementation issues aside. */ 01025 return PAPI_OK; 01026 } 01027 01028 01030 /* If the eventset is not currently running, then the saved value in the */ 01031 /* EventSet is set to zero without calling this routine. */ 01032 int 01033 _papi_nvml_reset( hwd_context_t * ctx, hwd_control_state_t * ctl ) 01034 { 01035 (void) ctx; 01036 (void) ctl; 01037 01038 SUBDBG( "nvml_reset ctx=%p ctrl=%p...", ctx, ctl ); 01039 01040 /* Reset the hardware */ 01041 nvml_hardware_reset( ); 01042 01043 return PAPI_OK; 01044 } 01045 01047 int 01048 _papi_nvml_shutdown_component() 01049 { 01050 01051 SUBDBG( "nvml_shutdown_component..." ); 01052 01053 papi_free(nvml_native_table); 01054 papi_free(devices); 01055 01056 nvmlShutdown(); 01057 01058 device_count = 0; 01059 num_events = 0; 01060 01061 return PAPI_OK; 01062 } 01063 01065 int 01066 _papi_nvml_shutdown_thread( hwd_context_t *ctx ) 01067 { 01068 01069 (void) ctx; 01070 01071 SUBDBG( "nvml_shutdown_thread... %p", ctx ); 01072 01073 /* Last chance to clean up thread */ 01074 01075 return PAPI_OK; 01076 } 01077 01078 01079 01083 int 01084 _papi_nvml_ctl( hwd_context_t * ctx, int code, _papi_int_option_t * option ) 01085 { 01086 01087 (void) ctx; 01088 (void) code; 01089 (void) option; 01090 01091 SUBDBG( "nvml_ctl..." ); 01092 01093 /* FIXME. This should maybe set up more state, such as which counters are active and */ 01094 /* counter mappings. */ 01095 01096 return PAPI_OK; 01097 } 01098 01108 int 01109 _papi_nvml_set_domain( hwd_control_state_t * cntrl, int domain ) 01110 { 01111 (void) cntrl; 01112 01113 int found = 0; 01114 SUBDBG( "nvml_set_domain..." ); 01115 01116 if ( PAPI_DOM_USER & domain ) { 01117 SUBDBG( " PAPI_DOM_USER " ); 01118 found = 1; 01119 } 01120 if ( PAPI_DOM_KERNEL & domain ) { 01121 SUBDBG( " PAPI_DOM_KERNEL " ); 01122 found = 1; 01123 } 01124 if ( PAPI_DOM_OTHER & domain ) { 01125 SUBDBG( " PAPI_DOM_OTHER " ); 01126 found = 1; 01127 } 01128 if ( PAPI_DOM_ALL & domain ) { 01129 SUBDBG( " PAPI_DOM_ALL " ); 01130 found = 1; 01131 } 01132 if ( !found ) 01133 return ( PAPI_EINVAL ); 01134 01135 return PAPI_OK; 01136 } 01137 01138 01139 /**************************************************************/ 01140 /* Naming functions, used to translate event numbers to names */ 01141 /**************************************************************/ 01142 01143 01150 int 01151 _papi_nvml_ntv_enum_events( unsigned int *EventCode, int modifier ) 01152 { 01153 int index; 01154 01155 switch ( modifier ) { 01156 01157 /* return EventCode of first event */ 01158 case PAPI_ENUM_FIRST: 01159 /* return the first event that we support */ 01160 01161 *EventCode = 0; 01162 return PAPI_OK; 01163 01164 /* return EventCode of next available event */ 01165 case PAPI_ENUM_EVENTS: 01166 index = *EventCode; 01167 01168 /* Make sure we are in range */ 01169 if ( index < num_events - 1 ) { 01170 01171 /* This assumes a non-sparse mapping of the events */ 01172 *EventCode = *EventCode + 1; 01173 return PAPI_OK; 01174 } else { 01175 return PAPI_ENOEVNT; 01176 } 01177 break; 01178 01179 default: 01180 return PAPI_EINVAL; 01181 } 01182 01183 return PAPI_EINVAL; 01184 } 01185 01191 int 01192 _papi_nvml_ntv_code_to_name( unsigned int EventCode, char *name, int len ) 01193 { 01194 int index; 01195 01196 index = EventCode; 01197 01198 /* Make sure we are in range */ 01199 if (index >= num_events) return PAPI_ENOEVNT; 01200 01201 strncpy( name, nvml_native_table[index].name, len ); 01202 01203 return PAPI_OK; 01204 } 01205 01211 int 01212 _papi_nvml_ntv_code_to_descr( unsigned int EventCode, char *descr, int len ) 01213 { 01214 int index; 01215 index = EventCode; 01216 01217 if (index >= num_events) return PAPI_ENOEVNT; 01218 01219 strncpy( descr, nvml_native_table[index].description, len ); 01220 01221 return PAPI_OK; 01222 } 01223 01225 papi_vector_t _nvml_vector = { 01226 .cmp_info = { 01227 /* default component information */ 01228 /* (unspecified values are initialized to 0) */ 01229 01230 .name = "nvml", 01231 .short_name="nvml", 01232 .version = "1.0", 01233 .support_version = "n/a", 01234 .kernel_version = "n/a", 01235 01236 .num_preset_events = 0, 01237 .num_native_events = 0, /* set by init_component */ 01238 .default_domain = PAPI_DOM_USER, 01239 .available_domains = PAPI_DOM_USER, 01240 .default_granularity = PAPI_GRN_THR, 01241 .available_granularities = PAPI_GRN_THR, 01242 .hardware_intr_sig = PAPI_INT_SIGNAL, 01243 01244 01245 /* component specific cmp_info initializations */ 01246 .hardware_intr = 0, 01247 .precise_intr = 0, 01248 .posix1b_timers = 0, 01249 .kernel_profile = 0, 01250 .kernel_multiplex = 0, 01251 .fast_counter_read = 0, 01252 .fast_real_timer = 0, 01253 .fast_virtual_timer = 0, 01254 .attach = 0, 01255 .attach_must_ptrace = 0, 01256 .cntr_umasks = 0, 01257 .cpu = 0, 01258 .inherit = 0, 01259 }, 01260 01261 /* sizes of framework-opaque component-private structures */ 01262 .size = { 01263 .context = sizeof ( nvml_context_t ), 01264 .control_state = sizeof ( nvml_control_state_t ), 01265 .reg_value = sizeof ( nvml_register_t ), 01266 // .reg_alloc = sizeof ( nvml_reg_alloc_t ), 01267 }, 01268 01269 /* function pointers */ 01270 01271 /* Used for general PAPI interactions */ 01272 .start = _papi_nvml_start, 01273 .stop = _papi_nvml_stop, 01274 .read = _papi_nvml_read, 01275 .reset = _papi_nvml_reset, 01276 .write = _papi_nvml_write, 01277 .init_component = _papi_nvml_init_component, 01278 .init_thread = _papi_nvml_init_thread, 01279 .init_control_state = _papi_nvml_init_control_state, 01280 .update_control_state = _papi_nvml_update_control_state, 01281 .ctl = _papi_nvml_ctl, 01282 .shutdown_thread = _papi_nvml_shutdown_thread, 01283 .shutdown_component = _papi_nvml_shutdown_component, 01284 .set_domain = _papi_nvml_set_domain, 01285 .cleanup_eventset = NULL, 01286 /* called in add_native_events() */ 01287 .allocate_registers = NULL, 01288 01289 /* Used for overflow/profiling */ 01290 .dispatch_timer = NULL, 01291 .get_overflow_address = NULL, 01292 .stop_profiling = NULL, 01293 .set_overflow = NULL, 01294 .set_profile = NULL, 01295 01296 /* Name Mapping Functions */ 01297 .ntv_enum_events = _papi_nvml_ntv_enum_events, 01298 .ntv_name_to_code = NULL, 01299 .ntv_code_to_name = _papi_nvml_ntv_code_to_name, 01300 .ntv_code_to_descr = _papi_nvml_ntv_code_to_descr, 01301 01302 }; 01303