PAPI  5.7.0.0
linux-nvml.c
Go to the documentation of this file.
1 /****************************
2 THIS IS OPEN SOURCE CODE
3 
4 Part of the PAPI software library. Copyright (c) 2005 - 2017,
5 Innovative Computing Laboratory, Dept of Electrical Engineering &
6 Computer Science University of Tennessee, Knoxville, TN.
7 
8 The open source software license conforms to the 2-clause BSD License
9 template.
10 
11 ****************************/
12 
29 #include <dlfcn.h>
30 
31 #include <stdio.h>
32 #include <string.h>
33 #include <stdlib.h>
34 #include <inttypes.h>
35 #include <string.h>
36 /* Headers required by PAPI */
37 #include "papi.h"
38 #include "papi_internal.h"
39 #include "papi_vector.h"
40 #include "papi_memory.h"
41 
42 #include "linux-nvml.h"
43 
44 #include "nvml.h"
45 #include "cuda.h"
46 #include "cuda_runtime_api.h"
47 
48 void (*_dl_non_dynamic_init)(void) __attribute__((weak));
49 
50 /***** CHANGE PROTOTYPES TO DECLARE CUDA AND NVML LIBRARY SYMBOLS AS WEAK *****
51  * This is done so that a version of PAPI built with the nvml component can *
52  * be installed on a system which does not have the cuda libraries installed. *
53  * *
54  * If this is done without these prototypes, then all papi services on the *
55  * system without the cuda libraries installed will fail. The PAPI libraries *
56  * contain references to the cuda libraries which are not installed. The *
57  * load of PAPI commands fails because the cuda library references can not be *
58  * resolved. *
59  * *
60  * This also defines pointers to the cuda library functions that we call. *
61  * These function pointers will be resolved with dlopen/dlsym calls at *
62  * component initialization time. The component then calls the cuda library *
63  * functions through these function pointers. *
64  ********************************************************************************/
65 #undef CUDAAPI
66 #define CUDAAPI __attribute__((weak))
67 CUresult CUDAAPI cuInit(unsigned int);
68 
69 CUresult(*cuInitPtr)(unsigned int);
70 
71 #undef CUDARTAPI
72 #define CUDARTAPI __attribute__((weak))
73 cudaError_t CUDARTAPI cudaGetDevice(int *);
74 cudaError_t CUDARTAPI cudaGetDeviceCount(int *);
75 cudaError_t CUDARTAPI cudaDeviceGetPCIBusId(char *, int, int);
76 
77 cudaError_t (*cudaGetDevicePtr)(int *);
78 cudaError_t (*cudaGetDeviceCountPtr)(int *);
79 cudaError_t (*cudaDeviceGetPCIBusIdPtr)(char *, int, int);
80 
81 #undef DECLDIR
82 #define DECLDIR __attribute__((weak))
83 nvmlReturn_t DECLDIR nvmlDeviceGetClockInfo(nvmlDevice_t, nvmlClockType_t, unsigned int *);
84 const char* DECLDIR nvmlErrorString(nvmlReturn_t);
85 nvmlReturn_t DECLDIR nvmlDeviceGetDetailedEccErrors(nvmlDevice_t, nvmlEccBitType_t, nvmlEccCounterType_t, nvmlEccErrorCounts_t *);
86 nvmlReturn_t DECLDIR nvmlDeviceGetFanSpeed(nvmlDevice_t, unsigned int *);
87 nvmlReturn_t DECLDIR nvmlDeviceGetMemoryInfo(nvmlDevice_t, nvmlMemory_t *);
88 nvmlReturn_t DECLDIR nvmlDeviceGetPerformanceState(nvmlDevice_t, nvmlPstates_t *);
89 nvmlReturn_t DECLDIR nvmlDeviceGetPowerUsage(nvmlDevice_t, unsigned int *);
90 nvmlReturn_t DECLDIR nvmlDeviceGetTemperature(nvmlDevice_t, nvmlTemperatureSensors_t, unsigned int *);
91 nvmlReturn_t DECLDIR nvmlDeviceGetTotalEccErrors(nvmlDevice_t, nvmlEccBitType_t, nvmlEccCounterType_t, unsigned long long *);
92 nvmlReturn_t DECLDIR nvmlDeviceGetUtilizationRates(nvmlDevice_t, nvmlUtilization_t *);
93 nvmlReturn_t DECLDIR nvmlDeviceGetHandleByIndex(unsigned int, nvmlDevice_t *);
94 nvmlReturn_t DECLDIR nvmlDeviceGetPciInfo(nvmlDevice_t, nvmlPciInfo_t *);
95 nvmlReturn_t DECLDIR nvmlDeviceGetName(nvmlDevice_t, char *, unsigned int);
96 nvmlReturn_t DECLDIR nvmlDeviceGetInforomVersion(nvmlDevice_t, nvmlInforomObject_t, char *, unsigned int);
97 nvmlReturn_t DECLDIR nvmlDeviceGetEccMode(nvmlDevice_t, nvmlEnableState_t *, nvmlEnableState_t *);
98 nvmlReturn_t DECLDIR nvmlInit(void);
99 nvmlReturn_t DECLDIR nvmlDeviceGetCount(unsigned int *);
100 nvmlReturn_t DECLDIR nvmlShutdown(void);
101 nvmlReturn_t DECLDIR nvmlDeviceGetPowerManagementLimit(nvmlDevice_t device, unsigned int* limit);
102 nvmlReturn_t DECLDIR nvmlDeviceSetPowerManagementLimit(nvmlDevice_t device, unsigned int limit);
103 nvmlReturn_t DECLDIR nvmlDeviceGetPowerManagementLimitConstraints(nvmlDevice_t device, unsigned int* minLimit, unsigned int* maxLimit);
104 
105 nvmlReturn_t (*nvmlDeviceGetClockInfoPtr)(nvmlDevice_t, nvmlClockType_t, unsigned int *);
106 char* (*nvmlErrorStringPtr)(nvmlReturn_t);
107 nvmlReturn_t (*nvmlDeviceGetDetailedEccErrorsPtr)(nvmlDevice_t, nvmlEccBitType_t, nvmlEccCounterType_t, nvmlEccErrorCounts_t *);
108 nvmlReturn_t (*nvmlDeviceGetFanSpeedPtr)(nvmlDevice_t, unsigned int *);
109 nvmlReturn_t (*nvmlDeviceGetMemoryInfoPtr)(nvmlDevice_t, nvmlMemory_t *);
110 nvmlReturn_t (*nvmlDeviceGetPerformanceStatePtr)(nvmlDevice_t, nvmlPstates_t *);
111 nvmlReturn_t (*nvmlDeviceGetPowerUsagePtr)(nvmlDevice_t, unsigned int *);
112 nvmlReturn_t (*nvmlDeviceGetTemperaturePtr)(nvmlDevice_t, nvmlTemperatureSensors_t, unsigned int *);
113 nvmlReturn_t (*nvmlDeviceGetTotalEccErrorsPtr)(nvmlDevice_t, nvmlEccBitType_t, nvmlEccCounterType_t, unsigned long long *);
114 nvmlReturn_t (*nvmlDeviceGetUtilizationRatesPtr)(nvmlDevice_t, nvmlUtilization_t *);
115 nvmlReturn_t (*nvmlDeviceGetHandleByIndexPtr)(unsigned int, nvmlDevice_t *);
116 nvmlReturn_t (*nvmlDeviceGetPciInfoPtr)(nvmlDevice_t, nvmlPciInfo_t *);
117 nvmlReturn_t (*nvmlDeviceGetNamePtr)(nvmlDevice_t, char *, unsigned int);
118 nvmlReturn_t (*nvmlDeviceGetInforomVersionPtr)(nvmlDevice_t, nvmlInforomObject_t, char *, unsigned int);
119 nvmlReturn_t (*nvmlDeviceGetEccModePtr)(nvmlDevice_t, nvmlEnableState_t *, nvmlEnableState_t *);
120 nvmlReturn_t (*nvmlInitPtr)(void);
121 nvmlReturn_t (*nvmlDeviceGetCountPtr)(unsigned int *);
122 nvmlReturn_t (*nvmlShutdownPtr)(void);
123 nvmlReturn_t (*nvmlDeviceGetPowerManagementLimitPtr)(nvmlDevice_t device, unsigned int* limit);
124 nvmlReturn_t (*nvmlDeviceSetPowerManagementLimitPtr)(nvmlDevice_t device, unsigned int limit);
125 nvmlReturn_t (*nvmlDeviceGetPowerManagementLimitConstraintsPtr)(nvmlDevice_t device, unsigned int* minLimit, unsigned int* maxLimit);
126 
127 // file handles used to access cuda libraries with dlopen
128 static void* dl1 = NULL;
129 static void* dl2 = NULL;
130 static void* dl3 = NULL;
131 
132 static int linkCudaLibraries();
133 
134 /* Declare our vector in advance */
136 
137 /* upto 25 events per card how many cards per system should we allow for?! */
138 #define NVML_MAX_COUNTERS 100
139 
143 typedef struct nvml_control_state {
144  int num_events;
145  int which_counter[NVML_MAX_COUNTERS];
146  long long counter[NVML_MAX_COUNTERS];
148 
150 typedef struct nvml_context {
153 
156 
158 static int device_count = 0;
159 
161 static int num_events = 0;
162 
163 static nvmlDevice_t* devices = NULL;
164 static int* features = NULL;
165 static unsigned int *power_management_initial_limit = NULL;
166 static unsigned int *power_management_limit_constraint_min = NULL;
167 static unsigned int *power_management_limit_constraint_max = NULL;
168 
169 unsigned long long
170 getClockSpeed(nvmlDevice_t dev, nvmlClockType_t which_one)
171 {
172  unsigned int ret = 0;
173  nvmlReturn_t bad;
174  bad = (*nvmlDeviceGetClockInfoPtr)(dev, which_one, &ret);
175 
176  if (NVML_SUCCESS != bad) {
177  SUBDBG("something went wrong %s\n", (*nvmlErrorStringPtr)(bad));
178  return (unsigned long long) - 1;
179  }
180 
181  return (unsigned long long)ret;
182 }
183 
184 unsigned long long
185 getEccLocalErrors(nvmlDevice_t dev, nvmlEccBitType_t bits, int which_one)
186 {
187  nvmlEccErrorCounts_t counts;
188 
189  nvmlReturn_t bad;
190  bad = (*nvmlDeviceGetDetailedEccErrorsPtr)(dev, bits, NVML_VOLATILE_ECC , &counts);
191 
192  if (NVML_SUCCESS != bad) {
193  SUBDBG("something went wrong %s\n", (*nvmlErrorStringPtr)(bad));
194  return (unsigned long long) - 1;
195  }
196  switch (which_one) {
197  case LOCAL_ECC_REGFILE:
198  return counts.registerFile;
199  case LOCAL_ECC_L1:
200  return counts.l1Cache;
201  case LOCAL_ECC_L2:
202  return counts.l2Cache;
203  case LOCAL_ECC_MEM:
204  return counts.deviceMemory;
205  default:
206  ;
207  }
208  return (unsigned long long) - 1;
209 }
210 
211 unsigned long long
212 getFanSpeed(nvmlDevice_t dev)
213 {
214  unsigned int ret = 0;
215  nvmlReturn_t bad;
216  bad = (*nvmlDeviceGetFanSpeedPtr)(dev, &ret);
217 
218  if (NVML_SUCCESS != bad) {
219  SUBDBG("something went wrong %s\n", (*nvmlErrorStringPtr)(bad));
220  return (unsigned long long) - 1;
221  }
222  return (unsigned long long)ret;
223 }
224 
225 unsigned long long
226 getMaxClockSpeed(nvmlDevice_t dev, nvmlClockType_t which_one)
227 {
228  unsigned int ret = 0;
229  nvmlReturn_t bad;
230  bad = (*nvmlDeviceGetClockInfoPtr)(dev, which_one, &ret);
231 
232  if (NVML_SUCCESS != bad) {
233  SUBDBG("something went wrong %s\n", (*nvmlErrorStringPtr)(bad));
234  return (unsigned long long) - 1;
235  }
236  return (unsigned long long) ret;
237 }
238 
239 unsigned long long
240 getMemoryInfo(nvmlDevice_t dev, int which_one)
241 {
242  nvmlMemory_t meminfo;
243  nvmlReturn_t bad;
244  bad = (*nvmlDeviceGetMemoryInfoPtr)(dev, &meminfo);
245 
246  if (NVML_SUCCESS != bad) {
247  SUBDBG("something went wrong %s\n", (*nvmlErrorStringPtr)(bad));
248  return (unsigned long long) - 1;
249  }
250 
251  switch (which_one) {
253  return meminfo.total;
254  case MEMINFO_UNALLOCED:
255  return meminfo.free;
256  case MEMINFO_ALLOCED:
257  return meminfo.used;
258  default:
259  ;
260  }
261  return (unsigned long long) - 1;
262 }
263 
264 unsigned long long
265 getPState(nvmlDevice_t dev)
266 {
267  unsigned int ret = 0;
268  nvmlPstates_t state = NVML_PSTATE_15;
269  nvmlReturn_t bad;
270  bad = (*nvmlDeviceGetPerformanceStatePtr)(dev, &state);
271 
272  if (NVML_SUCCESS != bad) {
273  SUBDBG("something went wrong %s\n", (*nvmlErrorStringPtr)(bad));
274  return (unsigned long long) - 1;
275  }
276  switch (state) {
277  case NVML_PSTATE_15:
278  ret++;
279  // fall through
280  case NVML_PSTATE_14:
281  ret++;
282  // fall through
283  case NVML_PSTATE_13:
284  ret++;
285  // fall through
286  case NVML_PSTATE_12:
287  ret++;
288  // fall through
289  case NVML_PSTATE_11:
290  ret++;
291  // fall through
292  case NVML_PSTATE_10:
293  ret++;
294  // fall through
295  case NVML_PSTATE_9:
296  ret++;
297  // fall through
298  case NVML_PSTATE_8:
299  ret++;
300  // fall through
301  case NVML_PSTATE_7:
302  ret++;
303  // fall through
304  case NVML_PSTATE_6:
305  ret++;
306  // fall through
307  case NVML_PSTATE_5:
308  ret++;
309  // fall through
310  case NVML_PSTATE_4:
311  ret++;
312  // fall through
313  case NVML_PSTATE_3:
314  ret++;
315  // fall through
316  case NVML_PSTATE_2:
317  ret++;
318  // fall through
319  case NVML_PSTATE_1:
320  ret++;
321  // fall through
322  case NVML_PSTATE_0:
323  break;
324  // fall through
325  case NVML_PSTATE_UNKNOWN:
326  default:
327  /* This should never happen?
328  * The API docs just state Unknown performance state... */
329  return (unsigned long long) - 1;
330  }
331  return (unsigned long long)ret;
332 }
333 
334 unsigned long long
335 getPowerUsage(nvmlDevice_t dev)
336 {
337  unsigned int power;
338  nvmlReturn_t bad;
339  bad = (*nvmlDeviceGetPowerUsagePtr)(dev, &power);
340 
341  if (NVML_SUCCESS != bad) {
342  SUBDBG("something went wrong %s\n", (*nvmlErrorStringPtr)(bad));
343  return (unsigned long long) - 1;
344  }
345  return (unsigned long long) power;
346 }
347 
348 unsigned long long
349 getTemperature(nvmlDevice_t dev)
350 {
351  unsigned int ret = 0;
352  nvmlReturn_t bad;
353  bad = (*nvmlDeviceGetTemperaturePtr)(dev, NVML_TEMPERATURE_GPU, &ret);
354 
355  if (NVML_SUCCESS != bad) {
356  SUBDBG("something went wrong %s\n", (*nvmlErrorStringPtr)(bad));
357  return (unsigned long long) - 1;
358  }
359  return (unsigned long long)ret;
360 }
361 
362 unsigned long long
363 getTotalEccErrors(nvmlDevice_t dev, nvmlEccBitType_t bits)
364 {
365  unsigned long long counts = 0;
366  nvmlReturn_t bad;
367  bad = (*nvmlDeviceGetTotalEccErrorsPtr)(dev, bits, NVML_VOLATILE_ECC , &counts);
368 
369  if (NVML_SUCCESS != bad) {
370  SUBDBG("something went wrong %s\n", (*nvmlErrorStringPtr)(bad));
371  return (unsigned long long) - 1;
372  }
373  return counts;
374 }
375 
376 /* 0 => gpu util
377  1 => memory util
378  */
379 unsigned long long
380 getUtilization(nvmlDevice_t dev, int which_one)
381 {
382  nvmlUtilization_t util;
383  nvmlReturn_t bad;
384  bad = (*nvmlDeviceGetUtilizationRatesPtr)(dev, &util);
385 
386  if (NVML_SUCCESS != bad) {
387  SUBDBG("something went wrong %s\n", (*nvmlErrorStringPtr)(bad));
388  return (unsigned long long) - 1;
389  }
390 
391  switch (which_one) {
392  case GPU_UTILIZATION:
393  return (unsigned long long) util.gpu;
394  case MEMORY_UTILIZATION:
395  return (unsigned long long) util.memory;
396  default:
397  ;
398  }
399 
400  return (unsigned long long) - 1;
401 }
402 
403 unsigned long long getPowerManagementLimit(nvmlDevice_t dev)
404 {
405  unsigned int limit;
406  nvmlReturn_t rv;
407  rv = (*nvmlDeviceGetPowerManagementLimitPtr)(dev, &limit);
408  if (NVML_SUCCESS != rv) {
409  SUBDBG("something went wrong %s\n", (*nvmlErrorStringPtr)(rv));
410  return (unsigned long long) 0;
411  }
412  return (unsigned long long) limit;
413 }
414 
415 static void
417 {
418  /* nvmlDeviceSet* and nvmlDeviceClear* calls require root/admin access, so while
419  * possible to implement a reset on the ECC counters, we pass */
420  /*
421  for ( i=0; i < device_count; i++ )
422  nvmlDeviceClearEccErrorCounts( device[i], NVML_VOLATILE_ECC );
423  */
424  int i;
425  nvmlReturn_t ret;
426  unsigned int templimit = 0;
427  for (i = 0; i < device_count; i++) {
429  // if power management is available
430  if (power_management_initial_limit[i] != 0) {
431  ret = (*nvmlDeviceGetPowerManagementLimitPtr)(devices[i], &templimit);
432  if ((ret == NVML_SUCCESS) && (templimit != power_management_initial_limit[i])) {
433  SUBDBG("Reset power_management_limit on device %d to initial value of %d \n", i, power_management_initial_limit[i]);
434  // if power is not at its initial value
435  // reset to initial value
436  ret = (*nvmlDeviceSetPowerManagementLimitPtr)(devices[i], power_management_initial_limit[i]);
437  if (ret != NVML_SUCCESS)
438  SUBDBG("Unable to reset the NVML power management limit on device %i to %ull (return code %d) \n", i, power_management_initial_limit[i] , ret);
439  }
440  }
441  }
442  }
443 }
444 
446 /* You might replace this with code that accesses */
447 /* hardware or reads values from the operatings system. */
448 static int
449 nvml_hardware_read(long long *value, int which_one)
450 //, nvml_context_t *ctx)
451 {
453  nvmlDevice_t handle;
454  int cudaIdx = -1;
455 
456  entry = &nvml_native_table[which_one];
457  *value = (long long) - 1;
458  /* replace entry->resources with the current cuda_device->nvml device */
459  (*cudaGetDevicePtr)(&cudaIdx);
460 
461  if (cudaIdx < 0 || cudaIdx > device_count)
462  return PAPI_EINVAL;
463 
464  /* Make sure the device we are running on has the requested event */
465  if (!HAS_FEATURE(features[cudaIdx] , entry->type))
466  return PAPI_EINVAL;
467 
468  handle = devices[cudaIdx];
469 
470  switch (entry->type) {
471  case FEATURE_CLOCK_INFO:
472  *value = getClockSpeed(handle, (nvmlClockType_t)entry->options.clock);
473  break;
475  *value = getEccLocalErrors(handle,
476  (nvmlEccBitType_t)entry->options.ecc_opts.bits,
477  (int)entry->options.ecc_opts.which_one);
478  break;
479  case FEATURE_FAN_SPEED:
480  *value = getFanSpeed(handle);
481  break;
482  case FEATURE_MAX_CLOCK:
483  *value = getMaxClockSpeed(handle,
484  (nvmlClockType_t)entry->options.clock);
485  break;
486  case FEATURE_MEMORY_INFO:
487  *value = getMemoryInfo(handle,
488  (int)entry->options.which_one);
489  break;
490  case FEATURE_PERF_STATES:
491  *value = getPState(handle);
492  break;
493  case FEATURE_POWER:
494  *value = getPowerUsage(handle);
495  break;
496  case FEATURE_TEMP:
497  *value = getTemperature(handle);
498  break;
500  *value = getTotalEccErrors(handle,
501  (nvmlEccBitType_t)entry->options.ecc_opts.bits);
502  break;
503  case FEATURE_UTILIZATION:
504  *value = getUtilization(handle,
505  (int)entry->options.which_one);
506  break;
508  *value = getPowerManagementLimit(handle);
509  break;
510 
512  *value = power_management_limit_constraint_min[cudaIdx];
513  break;
514 
516  *value = power_management_limit_constraint_max[cudaIdx];
517  break;
518 
519  default:
520  return PAPI_EINVAL;
521  }
522  if (*value == (long long)(unsigned long long) - 1)
523  return PAPI_EINVAL;
524 
525  return PAPI_OK;
526 }
527 
529 /* You might replace this with code that accesses */
530 /* hardware or reads values from the operatings system. */
531 static int nvml_hardware_write(long long *value, int which_one)
532 {
534  nvmlDevice_t handle;
535  int cudaIdx = -1;
536  nvmlReturn_t nvret;
537 
538  entry = &nvml_native_table[which_one];
539  /* replace entry->resources with the current cuda_device->nvml device */
540  (*cudaGetDevicePtr)(&cudaIdx);
541 
542  if (cudaIdx < 0 || cudaIdx > device_count)
543  return PAPI_EINVAL;
544 
545  /* Make sure the device we are running on has the requested event */
546  if (!HAS_FEATURE(features[cudaIdx] , entry->type))
547  return PAPI_EINVAL;
548 
549  handle = devices[cudaIdx];
550 
551  switch (entry->type) {
553  unsigned int setToPower = (unsigned int) * value;
554  if (setToPower < power_management_limit_constraint_min[cudaIdx]) {
555  SUBDBG("Error: Desired power %u mW < minimum %u mW on device %d\n", setToPower, power_management_limit_constraint_min[cudaIdx], cudaIdx);
556  return PAPI_EINVAL;
557  }
558  if (setToPower > power_management_limit_constraint_max[cudaIdx]) {
559  SUBDBG("Error: Desired power %u mW > maximum %u mW on device %d\n", setToPower, power_management_limit_constraint_max[cudaIdx], cudaIdx);
560  return PAPI_EINVAL;
561  }
562  if ((nvret = (*nvmlDeviceSetPowerManagementLimitPtr)(handle, setToPower)) != NVML_SUCCESS) {
563  SUBDBG("Error: %s\n", (*nvmlErrorStringPtr)(nvret));
564  return PAPI_EINVAL;
565  }
566  }
567  break;
568 
569  default:
570  return PAPI_EINVAL;
571  }
572 
573  return PAPI_OK;
574 }
575 
576 /********************************************************************/
577 /* Below are the functions required by the PAPI component interface */
578 /********************************************************************/
579 
581 int
583 {
584  (void) ctx;
585 
586  SUBDBG("Enter: ctx: %p\n", ctx);
587 
588  return PAPI_OK;
589 }
590 
591 static int
593 {
594  nvmlReturn_t ret;
595  nvmlEnableState_t mode = NVML_FEATURE_DISABLED;
596  nvmlEnableState_t pendingmode = NVML_FEATURE_DISABLED;
597 
598  char name[64];
599  char inforomECC[16];
600  char names[device_count][64];
601 
602  float ecc_version = 0.0;
603 
604  int i = 0;
605 
606  unsigned int temp = 0;
607 
608  memset(names, 0x0, device_count * 64);
609 
610  /* So for each card, check whats querable */
611  for (i = 0; i < device_count; i++) {
612  features[i] = 0;
613 
614  ret = (*nvmlDeviceGetHandleByIndexPtr)(i, &devices[i]);
615  if (NVML_SUCCESS != ret) {
616  SUBDBG("nvmlDeviceGetHandleByIndex(%d, &devices[%d]) failed.\n", i, i);
617  return PAPI_ESYS;
618  }
619 
620  ret = (*nvmlDeviceGetNamePtr)(devices[i], name, sizeof(name) - 1);
621  if (NVML_SUCCESS != ret) {
622  SUBDBG("nvmlDeviceGetName failed \n");
623  strncpy(name, "deviceNameUnknown", 17);
624  }
625 
626  name[sizeof(name) - 1] = '\0'; // to safely use strstr operation below, the variable 'name' must be null terminated
627 
628  ret = (*nvmlDeviceGetInforomVersionPtr)(devices[i], NVML_INFOROM_ECC, inforomECC, 16);
629  if (NVML_SUCCESS != ret) {
630  SUBDBG("nvmlGetInforomVersion fails %s\n", (*nvmlErrorStringPtr)(ret));
631  } else {
632  ecc_version = strtof(inforomECC, NULL);
633  }
634 
635  if (getClockSpeed(devices[i], NVML_CLOCK_GRAPHICS) != (unsigned long long) - 1) {
637  num_events += 3;
638  }
639 
640  /* For Tesla and Quadro products from Fermi and Kepler families.
641  requires NVML_INFOROM_ECC 2.0 or higher for location-based counts
642  requires NVML_INFOROM_ECC 1.0 or higher for all other ECC counts
643  requires ECC mode to be enabled. */
644  ret = (*nvmlDeviceGetEccModePtr)(devices[i], &mode, &pendingmode);
645  if (NVML_SUCCESS == ret) {
646  if (NVML_FEATURE_ENABLED == mode) {
647  if (ecc_version >= 2.0) {
649  num_events += 8; /* {single bit, two bit errors} x { reg, l1, l2, memory } */
650  }
651  if (ecc_version >= 1.0) {
653  num_events += 2; /* single bit errors, double bit errors */
654  }
655  }
656  } else {
657  SUBDBG("nvmlDeviceGetEccMode does not appear to be supported. (nvml return code %d)\n", ret);
658  }
659 
660  /* Check if fan speed is available */
661  if (getFanSpeed(devices[i]) != (unsigned long long) - 1) {
663  num_events++;
664  }
665 
666  /* Check if clock data are available */
667  if (getMaxClockSpeed(devices[i], NVML_CLOCK_GRAPHICS) != (unsigned long long) - 1) {
669  num_events += 3;
670  }
671 
672  /* For all products */
674  num_events += 3; /* total, free, used */
675 
676  /* Check if performance state is available */
677  if (getPState(devices[i]) != (unsigned long long) - 1) {
679  num_events++;
680  }
681 
682  /* For "GF11x" Tesla and Quadro products from the Fermi family
683  requires NVML_INFOROM_POWER 3.0 or higher
684  For Tesla and Quadro products from the Kepler family
685  does not require NVML_INFOROM_POWER */
686  /* Just try reading power, if it works, enable it*/
687  ret = (*nvmlDeviceGetPowerUsagePtr)(devices[i], &temp);
688  if (NVML_SUCCESS == ret) {
690  num_events++;
691  } else {
692  SUBDBG("nvmlDeviceGetPowerUsage does not appear to be supported on this card. (nvml return code %d)\n", ret);
693  }
694 
695  /* Check if temperature data are available */
696  if (getTemperature(devices[i]) != (unsigned long long) - 1) {
697  features[i] |= FEATURE_TEMP;
698  num_events++;
699  }
700 
701  // For power_management_limit
702  {
703  // Just try the call to see if it works
704  unsigned int templimit = 0;
705  ret = (*nvmlDeviceGetPowerManagementLimitPtr)(devices[i], &templimit);
706  if (ret == NVML_SUCCESS && templimit > 0) {
707  power_management_initial_limit[i] = templimit;
709  num_events += 1;
710  } else {
712  SUBDBG("nvmlDeviceGetPowerManagementLimit not appear to be supported on this card. (NVML code %d)\n", ret);
713  }
714  }
715 
716  // For power_management_limit_constraints, minimum and maximum
717  {
718  unsigned int minLimit = 0, maxLimit = 0;
719  ret = (*nvmlDeviceGetPowerManagementLimitConstraintsPtr)(devices[i], &minLimit, &maxLimit);
720  if (ret == NVML_SUCCESS) {
723  num_events += 1;
726  num_events += 1;
727  } else {
730  }
731  SUBDBG("Done nvmlDeviceGetPowerManagementLimitConstraintsPtr\n");
732  }
733 
734  /* Check if temperature data are available */
735  if (getUtilization(devices[i], GPU_UTILIZATION) != (unsigned long long) - 1) {
737  num_events += 2;
738  }
739 
740  int retval = snprintf(names[i], sizeof(name), "%s:device:%d", name, i);
741  if (retval > (int)sizeof(name)) {
742  SUBDBG("Device name is too long %s:device%d", name, i);
743  return (PAPI_EINVAL);
744  }
745  names[i][sizeof(name) - 1] = '\0';
746  }
747  return PAPI_OK;
748 }
749 
750 static void
752 {
753  char name[64];
754  char sanitized_name[PAPI_MAX_STR_LEN];
755  char names[device_count][64];
756 
757  int i, nameLen = 0, j;
758 
760  nvmlReturn_t ret;
761 
765  entry = &nvml_native_table[0];
766 
767  for (i = 0; i < device_count; i++) {
768  memset(names[i], 0x0, 64);
769  ret = (*nvmlDeviceGetNamePtr)(devices[i], name, sizeof(name) - 1);
770  if (NVML_SUCCESS != ret) {
771  SUBDBG("nvmlDeviceGetName failed \n");
772  strncpy(name, "deviceNameUnknown", 17);
773  }
774  name[sizeof(name) - 1] = '\0'; // to safely use strlen operation below, the variable 'name' must be null terminated
775 
776  nameLen = strlen(name);
777  strncpy(sanitized_name, name, PAPI_MAX_STR_LEN);
778 
779  int retval = snprintf(sanitized_name, sizeof(name), "%s:device_%d", name, i);
780  if (retval > (int)sizeof(name)) {
781  SUBDBG("Device name is too long %s:device%d", name, i);
782  return;
783  }
784  sanitized_name[sizeof(name) - 1] = '\0';
785 
786  for (j = 0; j < nameLen; j++)
787  if (' ' == sanitized_name[j])
788  sanitized_name[j] = '_';
789 
791  sprintf(entry->name, "%s:graphics_clock", sanitized_name);
792  strncpy(entry->description, "Graphics clock domain (MHz).", PAPI_MAX_STR_LEN);
793  entry->options.clock = NVML_CLOCK_GRAPHICS;
794  entry->type = FEATURE_CLOCK_INFO;
795  entry++;
796 
797  sprintf(entry->name, "%s:sm_clock", sanitized_name);
798  strncpy(entry->description, "SM clock domain (MHz).", PAPI_MAX_STR_LEN);
799  entry->options.clock = NVML_CLOCK_SM;
800  entry->type = FEATURE_CLOCK_INFO;
801  entry++;
802 
803  sprintf(entry->name, "%s:memory_clock", sanitized_name);
804  strncpy(entry->description, "Memory clock domain (MHz).", PAPI_MAX_STR_LEN);
805  entry->options.clock = NVML_CLOCK_MEM;
806  entry->type = FEATURE_CLOCK_INFO;
807  entry++;
808  }
809 
811  sprintf(entry->name, "%s:l1_single_ecc_errors", sanitized_name);
812  strncpy(entry->description, "L1 cache single bit ECC", PAPI_MAX_STR_LEN);
813  entry->options.ecc_opts = (struct local_ecc) {
814  .bits = NVML_SINGLE_BIT_ECC,
815  .which_one = LOCAL_ECC_L1,
816  };
818  entry++;
819 
820  sprintf(entry->name, "%s:l2_single_ecc_errors", sanitized_name);
821  strncpy(entry->description, "L2 cache single bit ECC", PAPI_MAX_STR_LEN);
822  entry->options.ecc_opts = (struct local_ecc) {
823  .bits = NVML_SINGLE_BIT_ECC,
824  .which_one = LOCAL_ECC_L2,
825  };
827  entry++;
828 
829  sprintf(entry->name, "%s:memory_single_ecc_errors", sanitized_name);
830  strncpy(entry->description, "Device memory single bit ECC", PAPI_MAX_STR_LEN);
831  entry->options.ecc_opts = (struct local_ecc) {
832  .bits = NVML_SINGLE_BIT_ECC,
833  .which_one = LOCAL_ECC_MEM,
834  };
836  entry++;
837 
838  sprintf(entry->name, "%s:regfile_single_ecc_errors", sanitized_name);
839  strncpy(entry->description, "Register file single bit ECC", PAPI_MAX_STR_LEN);
840  entry->options.ecc_opts = (struct local_ecc) {
841  .bits = NVML_SINGLE_BIT_ECC,
842  .which_one = LOCAL_ECC_REGFILE,
843  };
845  entry++;
846 
847  sprintf(entry->name, "%s:1l_double_ecc_errors", sanitized_name);
848  strncpy(entry->description, "L1 cache double bit ECC", PAPI_MAX_STR_LEN);
849  entry->options.ecc_opts = (struct local_ecc) {
850  .bits = NVML_DOUBLE_BIT_ECC,
851  .which_one = LOCAL_ECC_L1,
852  };
854  entry++;
855 
856  sprintf(entry->name, "%s:l2_double_ecc_errors", sanitized_name);
857  strncpy(entry->description, "L2 cache double bit ECC", PAPI_MAX_STR_LEN);
858  entry->options.ecc_opts = (struct local_ecc) {
859  .bits = NVML_DOUBLE_BIT_ECC,
860  .which_one = LOCAL_ECC_L2,
861  };
863  entry++;
864 
865  sprintf(entry->name, "%s:memory_double_ecc_errors", sanitized_name);
866  strncpy(entry->description, "Device memory double bit ECC", PAPI_MAX_STR_LEN);
867  entry->options.ecc_opts = (struct local_ecc) {
868  .bits = NVML_DOUBLE_BIT_ECC,
869  .which_one = LOCAL_ECC_MEM,
870  };
872  entry++;
873 
874  sprintf(entry->name, "%s:regfile_double_ecc_errors", sanitized_name);
875  strncpy(entry->description, "Register file double bit ECC", PAPI_MAX_STR_LEN);
876  entry->options.ecc_opts = (struct local_ecc) {
877  .bits = NVML_DOUBLE_BIT_ECC,
878  .which_one = LOCAL_ECC_REGFILE,
879  };
881  entry++;
882  }
883 
885  sprintf(entry->name, "%s:fan_speed", sanitized_name);
886  strncpy(entry->description, "The fan speed expressed as a percent of the maximum, i.e. full speed is 100%", PAPI_MAX_STR_LEN);
887  entry->type = FEATURE_FAN_SPEED;
888  entry++;
889  }
890 
892  sprintf(entry->name, "%s:graphics_max_clock", sanitized_name);
893  strncpy(entry->description, "Maximal Graphics clock domain (MHz).", PAPI_MAX_STR_LEN);
894  entry->options.clock = NVML_CLOCK_GRAPHICS;
895  entry->type = FEATURE_MAX_CLOCK;
896  entry++;
897 
898  sprintf(entry->name, "%s:sm_max_clock", sanitized_name);
899  strncpy(entry->description, "Maximal SM clock domain (MHz).", PAPI_MAX_STR_LEN);
900  entry->options.clock = NVML_CLOCK_SM;
901  entry->type = FEATURE_MAX_CLOCK;
902  entry++;
903 
904  sprintf(entry->name, "%s:memory_max_clock", sanitized_name);
905  strncpy(entry->description, "Maximal Memory clock domain (MHz).", PAPI_MAX_STR_LEN);
906  entry->options.clock = NVML_CLOCK_MEM;
907  entry->type = FEATURE_MAX_CLOCK;
908  entry++;
909  }
910 
912  sprintf(entry->name, "%s:total_memory", sanitized_name);
913  strncpy(entry->description, "Total installed FB memory (in bytes).", PAPI_MAX_STR_LEN);
915  entry->type = FEATURE_MEMORY_INFO;
916  entry++;
917 
918  sprintf(entry->name, "%s:unallocated_memory", sanitized_name);
919  strncpy(entry->description, "Uncallocated FB memory (in bytes).", PAPI_MAX_STR_LEN);
921  entry->type = FEATURE_MEMORY_INFO;
922  entry++;
923 
924  sprintf(entry->name, "%s:allocated_memory", sanitized_name);
925  strncpy(entry->description, "Allocated FB memory (in bytes). Note that the driver/GPU always sets aside a small amount of memory for bookkeeping.", PAPI_MAX_STR_LEN);
927  entry->type = FEATURE_MEMORY_INFO;
928  entry++;
929  }
930 
932  sprintf(entry->name, "%s:pstate", sanitized_name);
933  strncpy(entry->description, "The performance state of the device.", PAPI_MAX_STR_LEN);
934  entry->type = FEATURE_PERF_STATES;
935  entry++;
936  }
937 
939  sprintf(entry->name, "%s:power", sanitized_name);
940  // set the power event units value to "mW" for miliwatts
941  strncpy(entry->units, "mW", PAPI_MIN_STR_LEN);
942  strncpy(entry->description, "Power usage reading for the device, in miliwatts. This is the power draw (+/-5 watts) for the entire board: GPU, memory, etc.", PAPI_MAX_STR_LEN);
943  entry->type = FEATURE_POWER;
944  entry++;
945  }
946 
948  sprintf(entry->name, "%s:temperature", sanitized_name);
949  strncpy(entry->description, "Current temperature readings for the device, in degrees C.", PAPI_MAX_STR_LEN);
950  entry->type = FEATURE_TEMP;
951  entry++;
952  }
953 
955  sprintf(entry->name, "%s:total_ecc_errors", sanitized_name);
956  strncpy(entry->description, "Total single bit errors.", PAPI_MAX_STR_LEN);
957  entry->options.ecc_opts = (struct local_ecc) {
958  .bits = NVML_SINGLE_BIT_ECC,
959  };
961  entry++;
962 
963  sprintf(entry->name, "%s:total_ecc_errors", sanitized_name);
964  strncpy(entry->description, "Total double bit errors.", PAPI_MAX_STR_LEN);
965  entry->options.ecc_opts = (struct local_ecc) {
966  .bits = NVML_DOUBLE_BIT_ECC,
967  };
969  entry++;
970  }
971 
973  sprintf(entry->name, "%s:gpu_utilization", sanitized_name);
974  strncpy(entry->description, "Percent of time over the past second during which one or more kernels was executing on the GPU.", PAPI_MAX_STR_LEN);
976  entry->type = FEATURE_UTILIZATION;
977  entry++;
978 
979  sprintf(entry->name, "%s:memory_utilization", sanitized_name);
980  strncpy(entry->description, "Percent of time over the past second during which global (device) memory was being read or written.", PAPI_MAX_STR_LEN);
982  entry->type = FEATURE_UTILIZATION;
983  entry++;
984  }
985 
987  sprintf(entry->name, "%s:power_management_limit", sanitized_name);
988  // set the power event units value to "mW" for milliwatts
989  strncpy(entry->units, "mW", PAPI_MIN_STR_LEN);
990  strncpy(entry->description, "Power management limit in milliwatts associated with the device. The power limit defines the upper boundary for the cards power draw. If the cards total power draw reaches this limit the power management algorithm kicks in. This should be writable (with appropriate privileges) on supported Kepler or later (unit milliWatts). ", PAPI_MAX_STR_LEN);
992  entry++;
993  }
995  sprintf(entry->name, "%s:power_management_limit_constraint_min", sanitized_name);
996  strncpy(entry->units, "mW", PAPI_MIN_STR_LEN);
997  strncpy(entry->description, "The minimum power management limit in milliwatts.", PAPI_MAX_STR_LEN);
999  entry++;
1000  }
1001 
1003  sprintf(entry->name, "%s:power_management_limit_constraint_max", sanitized_name);
1004  strncpy(entry->units, "mW", PAPI_MIN_STR_LEN);
1005  strncpy(entry->description, "The maximum power management limit in milliwatts.", PAPI_MAX_STR_LEN);
1007  entry++;
1008  }
1009 
1010  strncpy(names[i], name, sizeof(names[0]) - 1);
1011  names[i][sizeof(names[0]) - 1] = '\0';
1012  }
1013 } // create native events.
1014 
1015 
1016 // Triggered by PAPI_shutdown(), but also if init fails to complete; for example due
1017 // to a missing library. We still need to clean up. The dynamic libs (dlxxx routines)
1018 // may have open mallocs that need to be free()d.
1019 
1021 {
1022  SUBDBG("Enter:\n");
1025  if (devices != NULL) papi_free(devices);
1026  if (features != NULL) papi_free(features);
1030  if (nvmlShutdownPtr) (*nvmlShutdownPtr)(); // Call nvml shutdown if we got that far.
1031 
1032  device_count = 0;
1033  num_events = 0;
1034 
1035  // close the dynamic libraries needed by this component (opened in the init component call)
1036  if (dl3) {dlclose(dl3); dl3=NULL;}
1037  if (dl2) {dlclose(dl2); dl2=NULL;}
1038  if (dl1) {dlclose(dl1); dl1=NULL;}
1039 
1040  return PAPI_OK;
1041 }
1042 
1043 
1044 
1051 {
1052  SUBDBG("Entry: cidx: %d\n", cidx);
1053  nvmlReturn_t ret;
1054  cudaError_t cuerr;
1055  int papi_errorcode;
1056 
1057  int cuda_count = 0;
1058  unsigned int nvml_count = 0;
1059 
1060  /* link in the cuda and nvml libraries and resolve the symbols we need to use */
1061  if (linkCudaLibraries() != PAPI_OK) {
1062  SUBDBG("Dynamic link of CUDA libraries failed, component will be disabled.\n");
1063  SUBDBG("See disable reason in papi_component_avail output for more details.\n");
1064  _papi_nvml_shutdown_component(); // clean up any open dynLibs, mallocs, etc.
1065  return (PAPI_ENOSUPP);
1066  }
1067 
1068  ret = (*nvmlInitPtr)();
1069  if (NVML_SUCCESS != ret) {
1070  strcpy(_nvml_vector.cmp_info.disabled_reason, "The NVIDIA managament library failed to initialize.");
1071  _papi_nvml_shutdown_component(); // clean up any open dynLibs, mallocs, etc.
1072  return PAPI_ENOSUPP;
1073  }
1074 
1075  cuerr = (*cuInitPtr)(0);
1076  if (cudaSuccess != cuerr) {
1077  strcpy(_nvml_vector.cmp_info.disabled_reason, "The CUDA library failed to initialize.");
1078  _papi_nvml_shutdown_component(); // clean up any open dynLibs, mallocs, etc.
1079  return PAPI_ENOSUPP;
1080  }
1081 
1082  /* Figure out the number of CUDA devices in the system */
1083  ret = (*nvmlDeviceGetCountPtr)(&nvml_count);
1084  if (NVML_SUCCESS != ret) {
1085  strcpy(_nvml_vector.cmp_info.disabled_reason, "Unable to get a count of devices from the NVIDIA managament library.");
1086  _papi_nvml_shutdown_component(); // clean up any open dynLibs, mallocs, etc.
1087  return PAPI_ENOSUPP;
1088  }
1089 
1090  cuerr = (*cudaGetDeviceCountPtr)(&cuda_count);
1091  if (cudaSuccess != cuerr) {
1092  strcpy(_nvml_vector.cmp_info.disabled_reason, "Unable to get a device count from CUDA.");
1093  _papi_nvml_shutdown_component(); // clean up any open dynLibs, mallocs, etc.
1094  return PAPI_ENOSUPP;
1095  }
1096 
1097  /* We can probably recover from this, when we're clever */
1098  if ((cuda_count > 0) && (nvml_count != (unsigned int)cuda_count)) {
1099  strcpy(_nvml_vector.cmp_info.disabled_reason, "CUDA and the NVIDIA managament library have different device counts.");
1100  _papi_nvml_shutdown_component(); // clean up any open dynLibs, mallocs, etc.
1101  return PAPI_ENOSUPP;
1102  }
1103 
1104  device_count = cuda_count;
1105  SUBDBG("Need to setup NVML with %d devices\n", device_count);
1106 
1107  /* A per device representation of what events are present */
1108  features = (int*)papi_malloc(sizeof(int) * device_count);
1109 
1110  /* Handles to each device */
1111  devices = (nvmlDevice_t*)papi_malloc(sizeof(nvmlDevice_t) * device_count);
1112 
1113  /* For each device, store the intial power value to enable reset if power is altered */
1114  power_management_initial_limit = (unsigned int*)papi_malloc(sizeof(unsigned int) * device_count);
1115  power_management_limit_constraint_min = (unsigned int*)papi_malloc(sizeof(unsigned int) * device_count);
1116  power_management_limit_constraint_max = (unsigned int*)papi_malloc(sizeof(unsigned int) * device_count);
1117 
1118  /* Figure out what events are supported on each card. */
1119  if ((papi_errorcode = detectDevices()) != PAPI_OK) {
1121  papi_free(devices);
1122  sprintf(_nvml_vector.cmp_info.disabled_reason, "An error occured in device feature detection, please check your NVIDIA Management Library and CUDA install.");
1123  _papi_nvml_shutdown_component(); // clean up any open dynLibs, mallocs, etc.
1124  return PAPI_ENOSUPP;
1125  }
1126 
1127  /* The assumption is that if everything went swimmingly in detectDevices,
1128  all nvml calls here should be fine. */
1130 
1131  /* Export the total number of events available */
1133 
1134  /* Export the component id */
1136 
1137  /* Export the number of 'counters' */
1140 
1141  return PAPI_OK;
1142 }
1143 
1144 /*
1145  * Link the necessary CUDA libraries to use the cuda component. If any of them can not be found, then
1146  * the CUDA component will just be disabled. This is done at runtime so that a version of PAPI built
1147  * with the CUDA component can be installed and used on systems which have the CUDA libraries installed
1148  * and on systems where these libraries are not installed.
1149  */
1150 static int
1152 {
1153  /* Attempt to guess if we were statically linked to libc, if so bail */
1154  if (_dl_non_dynamic_init != NULL) {
1155  strncpy(_nvml_vector.cmp_info.disabled_reason, "NVML component does not support statically linking of libc.", PAPI_MAX_STR_LEN);
1156  return PAPI_ENOSUPP;
1157  }
1158 
1159  /* Need to link in the cuda libraries, if not found disable the component */
1160  dl1 = dlopen("libcuda.so", RTLD_NOW | RTLD_GLOBAL);
1161  if (!dl1) {
1162  strncpy(_nvml_vector.cmp_info.disabled_reason, "CUDA library libcuda.so not found.", PAPI_MAX_STR_LEN);
1163  return (PAPI_ENOSUPP);
1164  }
1165  cuInitPtr = dlsym(dl1, "cuInit");
1166  if (dlerror() != NULL) {
1167  strncpy(_nvml_vector.cmp_info.disabled_reason, "CUDA function cuInit not found.", PAPI_MAX_STR_LEN);
1168  return (PAPI_ENOSUPP);
1169  }
1170 
1171  dl2 = dlopen("libcudart.so", RTLD_NOW | RTLD_GLOBAL | RTLD_NODELETE);
1172  if (!dl2) {
1173  strncpy(_nvml_vector.cmp_info.disabled_reason, "CUDA runtime library libcudart.so not found.", PAPI_MAX_STR_LEN);
1174  return (PAPI_ENOSUPP);
1175  }
1176  cudaGetDevicePtr = dlsym(dl2, "cudaGetDevice");
1177  if (dlerror() != NULL) {
1178  strncpy(_nvml_vector.cmp_info.disabled_reason, "CUDART function cudaGetDevice not found.", PAPI_MAX_STR_LEN);
1179  return (PAPI_ENOSUPP);
1180  }
1181  cudaGetDeviceCountPtr = dlsym(dl2, "cudaGetDeviceCount");
1182  if (dlerror() != NULL) {
1183  strncpy(_nvml_vector.cmp_info.disabled_reason, "CUDART function cudaGetDeviceCount not found.", PAPI_MAX_STR_LEN);
1184  return (PAPI_ENOSUPP);
1185  }
1186  cudaDeviceGetPCIBusIdPtr = dlsym(dl2, "cudaDeviceGetPCIBusId");
1187  if (dlerror() != NULL) {
1188  strncpy(_nvml_vector.cmp_info.disabled_reason, "CUDART function cudaDeviceGetPCIBusId not found.", PAPI_MAX_STR_LEN);
1189  return (PAPI_ENOSUPP);
1190  }
1191 
1192  dl3 = dlopen("libnvidia-ml.so", RTLD_NOW | RTLD_GLOBAL);
1193  if (!dl3) {
1194  strncpy(_nvml_vector.cmp_info.disabled_reason, "NVML runtime library libnvidia-ml.so not found.", PAPI_MAX_STR_LEN);
1195  return (PAPI_ENOSUPP);
1196  }
1197  nvmlDeviceGetClockInfoPtr = dlsym(dl3, "nvmlDeviceGetClockInfo");
1198  if (dlerror() != NULL) {
1199  strncpy(_nvml_vector.cmp_info.disabled_reason, "NVML function nvmlDeviceGetClockInfo not found.", PAPI_MAX_STR_LEN);
1200  return (PAPI_ENOSUPP);
1201  }
1202  nvmlErrorStringPtr = dlsym(dl3, "nvmlErrorString");
1203  if (dlerror() != NULL) {
1204  strncpy(_nvml_vector.cmp_info.disabled_reason, "NVML function nvmlErrorString not found.", PAPI_MAX_STR_LEN);
1205  return (PAPI_ENOSUPP);
1206  }
1207  nvmlDeviceGetDetailedEccErrorsPtr = dlsym(dl3, "nvmlDeviceGetDetailedEccErrors");
1208  if (dlerror() != NULL) {
1209  strncpy(_nvml_vector.cmp_info.disabled_reason, "NVML function nvmlDeviceGetDetailedEccErrors not found.", PAPI_MAX_STR_LEN);
1210  return (PAPI_ENOSUPP);
1211  }
1212  nvmlDeviceGetFanSpeedPtr = dlsym(dl3, "nvmlDeviceGetFanSpeed");
1213  if (dlerror() != NULL) {
1214  strncpy(_nvml_vector.cmp_info.disabled_reason, "NVML function nvmlDeviceGetFanSpeed not found.", PAPI_MAX_STR_LEN);
1215  return (PAPI_ENOSUPP);
1216  }
1217  nvmlDeviceGetMemoryInfoPtr = dlsym(dl3, "nvmlDeviceGetMemoryInfo");
1218  if (dlerror() != NULL) {
1219  strncpy(_nvml_vector.cmp_info.disabled_reason, "NVML function nvmlDeviceGetMemoryInfo not found.", PAPI_MAX_STR_LEN);
1220  return (PAPI_ENOSUPP);
1221  }
1222  nvmlDeviceGetPerformanceStatePtr = dlsym(dl3, "nvmlDeviceGetPerformanceState");
1223  if (dlerror() != NULL) {
1224  strncpy(_nvml_vector.cmp_info.disabled_reason, "NVML function nvmlDeviceGetPerformanceState not found.", PAPI_MAX_STR_LEN);
1225  return (PAPI_ENOSUPP);
1226  }
1227  nvmlDeviceGetPowerUsagePtr = dlsym(dl3, "nvmlDeviceGetPowerUsage");
1228  if (dlerror() != NULL) {
1229  strncpy(_nvml_vector.cmp_info.disabled_reason, "NVML function nvmlDeviceGetPowerUsage not found.", PAPI_MAX_STR_LEN);
1230  return (PAPI_ENOSUPP);
1231  }
1232  nvmlDeviceGetTemperaturePtr = dlsym(dl3, "nvmlDeviceGetTemperature");
1233  if (dlerror() != NULL) {
1234  strncpy(_nvml_vector.cmp_info.disabled_reason, "NVML function nvmlDeviceGetTemperature not found.", PAPI_MAX_STR_LEN);
1235  return (PAPI_ENOSUPP);
1236  }
1237  nvmlDeviceGetTotalEccErrorsPtr = dlsym(dl3, "nvmlDeviceGetTotalEccErrors");
1238  if (dlerror() != NULL) {
1239  strncpy(_nvml_vector.cmp_info.disabled_reason, "NVML function nvmlDeviceGetTotalEccErrors not found.", PAPI_MAX_STR_LEN);
1240  return (PAPI_ENOSUPP);
1241  }
1242  nvmlDeviceGetUtilizationRatesPtr = dlsym(dl3, "nvmlDeviceGetUtilizationRates");
1243  if (dlerror() != NULL) {
1244  strncpy(_nvml_vector.cmp_info.disabled_reason, "NVML function nvmlDeviceGetUtilizationRates not found.", PAPI_MAX_STR_LEN);
1245  return (PAPI_ENOSUPP);
1246  }
1247  nvmlDeviceGetHandleByIndexPtr = dlsym(dl3, "nvmlDeviceGetHandleByIndex");
1248  if (dlerror() != NULL) {
1249  strncpy(_nvml_vector.cmp_info.disabled_reason, "NVML function nvmlDeviceGetHandleByIndex not found.", PAPI_MAX_STR_LEN);
1250  return (PAPI_ENOSUPP);
1251  }
1252  nvmlDeviceGetPciInfoPtr = dlsym(dl3, "nvmlDeviceGetPciInfo");
1253  if (dlerror() != NULL) {
1254  strncpy(_nvml_vector.cmp_info.disabled_reason, "NVML function nvmlDeviceGetPciInfo not found.", PAPI_MAX_STR_LEN);
1255  return (PAPI_ENOSUPP);
1256  }
1257  nvmlDeviceGetNamePtr = dlsym(dl3, "nvmlDeviceGetName");
1258  if (dlerror() != NULL) {
1259  strncpy(_nvml_vector.cmp_info.disabled_reason, "NVML function nvmlDeviceGetName not found.", PAPI_MAX_STR_LEN);
1260  return (PAPI_ENOSUPP);
1261  }
1262  nvmlDeviceGetInforomVersionPtr = dlsym(dl3, "nvmlDeviceGetInforomVersion");
1263  if (dlerror() != NULL) {
1264  strncpy(_nvml_vector.cmp_info.disabled_reason, "NVML function nvmlDeviceGetInforomVersion not found.", PAPI_MAX_STR_LEN);
1265  return (PAPI_ENOSUPP);
1266  }
1267  nvmlDeviceGetEccModePtr = dlsym(dl3, "nvmlDeviceGetEccMode");
1268  if (dlerror() != NULL) {
1269  strncpy(_nvml_vector.cmp_info.disabled_reason, "NVML function nvmlDeviceGetEccMode not found.", PAPI_MAX_STR_LEN);
1270  return (PAPI_ENOSUPP);
1271  }
1272  nvmlInitPtr = dlsym(dl3, "nvmlInit");
1273  if (dlerror() != NULL) {
1274  strncpy(_nvml_vector.cmp_info.disabled_reason, "NVML function nvmlInit not found.", PAPI_MAX_STR_LEN);
1275  return (PAPI_ENOSUPP);
1276  }
1277  nvmlDeviceGetCountPtr = dlsym(dl3, "nvmlDeviceGetCount");
1278  if (dlerror() != NULL) {
1279  strncpy(_nvml_vector.cmp_info.disabled_reason, "NVML function nvmlDeviceGetCount not found.", PAPI_MAX_STR_LEN);
1280  return (PAPI_ENOSUPP);
1281  }
1282  nvmlShutdownPtr = dlsym(dl3, "nvmlShutdown");
1283  if (dlerror() != NULL) {
1284  strncpy(_nvml_vector.cmp_info.disabled_reason, "NVML function nvmlShutdown not found.", PAPI_MAX_STR_LEN);
1285  return (PAPI_ENOSUPP);
1286  }
1287  nvmlDeviceGetPowerManagementLimitPtr = dlsym(dl3, "nvmlDeviceGetPowerManagementLimit");
1288  if (dlerror() != NULL) {
1289  strncpy(_nvml_vector.cmp_info.disabled_reason, "NVML function nvmlDeviceGetPowerManagementLimit not found.", PAPI_MAX_STR_LEN);
1290  return (PAPI_ENOSUPP);
1291  }
1292  nvmlDeviceSetPowerManagementLimitPtr = dlsym(dl3, "nvmlDeviceSetPowerManagementLimit");
1293  if (dlerror() != NULL) {
1294  strncpy(_nvml_vector.cmp_info.disabled_reason, "NVML function nvmlDeviceSetPowerManagementLimit not found.", PAPI_MAX_STR_LEN);
1295  return (PAPI_ENOSUPP);
1296  }
1297  nvmlDeviceGetPowerManagementLimitConstraintsPtr = dlsym(dl3, "nvmlDeviceGetPowerManagementLimitConstraints");
1298  if (dlerror() != NULL) {
1299  strncpy(_nvml_vector.cmp_info.disabled_reason, "NVML function nvmlDeviceGetPowerManagementLimitConstraints not found.", PAPI_MAX_STR_LEN);
1300  return (PAPI_ENOSUPP);
1301  }
1302  return (PAPI_OK);
1303 }
1304 
1310 int
1312 {
1313  SUBDBG("nvml_init_control_state... %p\n", ctl);
1314  nvml_control_state_t *nvml_ctl = (nvml_control_state_t *) ctl;
1315  memset(nvml_ctl, 0, sizeof(nvml_control_state_t));
1316 
1317  return PAPI_OK;
1318 }
1319 
1321 int
1324  int count,
1325  hwd_context_t *ctx)
1326 {
1327  SUBDBG("Enter: ctl: %p, ctx: %p\n", ctl, ctx);
1328  int i, index;
1329 
1330  nvml_control_state_t *nvml_ctl = (nvml_control_state_t *) ctl;
1331  (void) ctx;
1332 
1333  /* if no events, return */
1334  if (count == 0) return PAPI_OK;
1335 
1336  for (i = 0; i < count; i++) {
1337  index = native[i].ni_event;
1338  nvml_ctl->which_counter[i] = index;
1339  /* We have no constraints on event position, so any event */
1340  /* can be in any slot. */
1341  native[i].ni_position = i;
1342  }
1343  nvml_ctl->num_events = count;
1344  return PAPI_OK;
1345 }
1347 int
1349 {
1350  SUBDBG("Enter: ctx: %p, ctl: %p\n", ctx, ctl);
1351 
1352  (void) ctx;
1353  (void) ctl;
1354 
1355  /* anything that would need to be set at counter start time */
1356 
1357  /* reset */
1358  /* start the counting */
1359 
1360  return PAPI_OK;
1361 }
1362 
1364 int
1366 {
1367  SUBDBG("Enter: ctx: %p, ctl: %p\n", ctx, ctl);
1368 
1369  int i;
1370  (void) ctx;
1371  (void) ctl;
1372  int ret;
1373 
1374  nvml_control_state_t* nvml_ctl = (nvml_control_state_t*) ctl;
1375 
1376  for (i = 0; i < nvml_ctl->num_events; i++) {
1377  if (PAPI_OK !=
1378  (ret = nvml_hardware_read(&nvml_ctl->counter[i],
1379  nvml_ctl->which_counter[i])))
1380  return ret;
1381 
1382  }
1383 
1384  return PAPI_OK;
1385 }
1386 
1388 int
1390  long long **events, int flags)
1391 {
1392  SUBDBG("Enter: ctx: %p, flags: %d\n", ctx, flags);
1393 
1394  (void) ctx;
1395  (void) flags;
1396  int i;
1397  int ret;
1398  nvml_control_state_t* nvml_ctl = (nvml_control_state_t*) ctl;
1399 
1400  for (i = 0; i < nvml_ctl->num_events; i++) {
1401  if (PAPI_OK !=
1402  (ret = nvml_hardware_read(&nvml_ctl->counter[i],
1403  nvml_ctl->which_counter[i])))
1404  return ret;
1405 
1406  }
1407  /* return pointer to the values we read */
1408  *events = nvml_ctl->counter;
1409  return PAPI_OK;
1410 }
1411 
1413 /* otherwise, the updated state is written to ESI->hw_start */
1414 int
1416 {
1417  SUBDBG("Enter: ctx: %p, ctl: %p\n", ctx, ctl);
1418  (void) ctx;
1419  nvml_control_state_t* nvml_ctl = (nvml_control_state_t*) ctl;
1420  int i;
1421  int ret;
1422 
1423  /* You can change ECC mode and compute exclusivity modes on the cards */
1424  /* But I don't see this as a function of a PAPI component at this time */
1425  /* All implementation issues aside. */
1426 
1427  // Currently POWER_MANAGEMENT can be written
1428  for (i = 0; i < nvml_ctl->num_events; i++) {
1429  if (PAPI_OK != (ret = nvml_hardware_write(&events[i], nvml_ctl->which_counter[i])))
1430  return ret;
1431  }
1432 
1433  /* return pointer to the values we read */
1434  return PAPI_OK;
1435 }
1436 
1438 /* If the eventset is not currently running, then the saved value in the */
1439 /* EventSet is set to zero without calling this routine. */
1440 int
1442 {
1443  SUBDBG("Enter: ctx: %p, ctl: %p\n", ctx, ctl);
1444 
1445  (void) ctx;
1446  (void) ctl;
1447 
1448  /* Reset the hardware */
1450 
1451  return PAPI_OK;
1452 }
1453 
1455 int
1457 {
1458  SUBDBG("Enter: ctx: %p\n", ctx);
1459 
1460  (void) ctx;
1461 
1462  /* Last chance to clean up thread */
1463 
1464  return PAPI_OK;
1465 }
1466 
1470 int
1472 {
1473  SUBDBG("Enter: ctx: %p, code: %d\n", ctx, code);
1474 
1475  (void) ctx;
1476  (void) code;
1477  (void) option;
1478 
1479  /* FIXME. This should maybe set up more state, such as which counters are active and */
1480  /* counter mappings. */
1481 
1482  return PAPI_OK;
1483 }
1484 
1494 int
1496 {
1497  SUBDBG("Enter: cntrl: %p, domain: %d\n", cntrl, domain);
1498 
1499  (void) cntrl;
1500 
1501  int found = 0;
1502 
1503  if (PAPI_DOM_USER & domain) {
1504  SUBDBG(" PAPI_DOM_USER \n");
1505  found = 1;
1506  }
1507  if (PAPI_DOM_KERNEL & domain) {
1508  SUBDBG(" PAPI_DOM_KERNEL \n");
1509  found = 1;
1510  }
1511  if (PAPI_DOM_OTHER & domain) {
1512  SUBDBG(" PAPI_DOM_OTHER \n");
1513  found = 1;
1514  }
1515  if (PAPI_DOM_ALL & domain) {
1516  SUBDBG(" PAPI_DOM_ALL \n");
1517  found = 1;
1518  }
1519  if (!found)
1520  return (PAPI_EINVAL);
1521 
1522  return PAPI_OK;
1523 }
1524 
1525 /**************************************************************/
1526 /* Naming functions, used to translate event numbers to names */
1527 /**************************************************************/
1528 
1535 int
1536 _papi_nvml_ntv_enum_events(unsigned int *EventCode, int modifier)
1537 {
1538  int index;
1539 
1540  switch (modifier) {
1541 
1542  /* return EventCode of first event */
1543  case PAPI_ENUM_FIRST:
1544  /* return the first event that we support */
1545 
1546  *EventCode = 0;
1547  return PAPI_OK;
1548 
1549  /* return EventCode of next available event */
1550  case PAPI_ENUM_EVENTS:
1551  index = *EventCode;
1552 
1553  /* Make sure we are in range */
1554  if (index < num_events - 1) {
1555 
1556  /* This assumes a non-sparse mapping of the events */
1557  *EventCode = *EventCode + 1;
1558  return PAPI_OK;
1559  } else {
1560  return PAPI_ENOEVNT;
1561  }
1562  break;
1563 
1564  default:
1565  return PAPI_EINVAL;
1566  }
1567 
1568  return PAPI_EINVAL;
1569 }
1570 
1576 int
1577 _papi_nvml_ntv_code_to_name(unsigned int EventCode, char *name, int len)
1578 {
1579  SUBDBG("Entry: EventCode: %#x, name: %s, len: %d\n", EventCode, name, len);
1580  int index;
1581 
1582  index = EventCode;
1583 
1584  /* Make sure we are in range */
1585  if (index >= num_events) return PAPI_ENOEVNT;
1586 
1587  strncpy(name, nvml_native_table[index].name, len);
1588 
1589  return PAPI_OK;
1590 }
1591 
1597 int
1598 _papi_nvml_ntv_code_to_descr(unsigned int EventCode, char *descr, int len)
1599 {
1600  int index;
1601  index = EventCode;
1602 
1603  if (index >= num_events) return PAPI_ENOEVNT;
1604 
1605  strncpy(descr, nvml_native_table[index].description, len);
1606 
1607  return PAPI_OK;
1608 }
1609 
1614 int
1615 _papi_nvml_ntv_code_to_info(unsigned int EventCode, PAPI_event_info_t *info)
1616 {
1617 
1618  int index = EventCode;
1619 
1620  if ((index < 0) || (index >= num_events)) return PAPI_ENOEVNT;
1621 
1622  strncpy(info->symbol, nvml_native_table[index].name, sizeof(info->symbol) - 1);
1623  info->symbol[sizeof(info->symbol) - 1] = '\0';
1624 
1625  strncpy(info->units, nvml_native_table[index].units, sizeof(info->units) - 1);
1626  info->units[sizeof(info->units) - 1] = '\0';
1627 
1628  strncpy(info->long_descr, nvml_native_table[index].description, sizeof(info->long_descr) - 1);
1629  info->long_descr[sizeof(info->long_descr) - 1] = '\0';
1630 
1631 // info->data_type = nvml_native_table[index].return_type;
1632 
1633  return PAPI_OK;
1634 }
1635 
1638  .cmp_info = {
1639  /* default component information */
1640  /* (unspecified values are initialized to 0) */
1641 
1642  .name = "nvml",
1643  .short_name = "nvml",
1644  .version = "1.0",
1645  .description = "NVML provides the API for monitoring NVIDIA hardware (power usage, temperature, fan speed, etc)",
1646  .support_version = "n/a",
1647  .kernel_version = "n/a",
1648 
1649  .num_preset_events = 0,
1650  .num_native_events = 0, /* set by init_component */
1651  .default_domain = PAPI_DOM_USER,
1652  .available_domains = PAPI_DOM_USER,
1653  .default_granularity = PAPI_GRN_THR,
1654  .available_granularities = PAPI_GRN_THR,
1655  .hardware_intr_sig = PAPI_INT_SIGNAL,
1656 
1657  /* component specific cmp_info initializations */
1658  .hardware_intr = 0,
1659  .precise_intr = 0,
1660  .posix1b_timers = 0,
1661  .kernel_profile = 0,
1662  .kernel_multiplex = 0,
1663  .fast_counter_read = 0,
1664  .fast_real_timer = 0,
1665  .fast_virtual_timer = 0,
1666  .attach = 0,
1667  .attach_must_ptrace = 0,
1668  .cntr_umasks = 0,
1669  .cpu = 0,
1670  .inherit = 0,
1671  },
1672 
1673  /* sizes of framework-opaque component-private structures */
1674  .size = {
1675  .context = sizeof(nvml_context_t),
1676  .control_state = sizeof(nvml_control_state_t),
1677  .reg_value = sizeof(nvml_register_t),
1678  // .reg_alloc = sizeof ( nvml_reg_alloc_t ),
1679  },
1680 
1681  /* function pointers */
1682 
1683  /* Used for general PAPI interactions */
1684  .start = _papi_nvml_start,
1685  .stop = _papi_nvml_stop,
1686  .read = _papi_nvml_read,
1687  .reset = _papi_nvml_reset,
1688  .write = _papi_nvml_write,
1689  .init_component = _papi_nvml_init_component,
1690  .init_thread = _papi_nvml_init_thread,
1691  .init_control_state = _papi_nvml_init_control_state,
1692  .update_control_state = _papi_nvml_update_control_state,
1693  .ctl = _papi_nvml_ctl,
1694  .shutdown_thread = _papi_nvml_shutdown_thread,
1695  .shutdown_component = _papi_nvml_shutdown_component,
1696  .set_domain = _papi_nvml_set_domain,
1697  .cleanup_eventset = NULL,
1698  /* called in add_native_events() */
1699  .allocate_registers = NULL,
1700 
1701  /* Used for overflow/profiling */
1702  .dispatch_timer = NULL,
1703  .get_overflow_address = NULL,
1704  .stop_profiling = NULL,
1705  .set_overflow = NULL,
1706  .set_profile = NULL,
1707 
1708  /* Name Mapping Functions */
1709  .ntv_enum_events = _papi_nvml_ntv_enum_events,
1710  .ntv_name_to_code = NULL,
1711  .ntv_code_to_name = _papi_nvml_ntv_code_to_name,
1712  .ntv_code_to_descr = _papi_nvml_ntv_code_to_descr,
1713  .ntv_code_to_info = _papi_nvml_ntv_code_to_info,
1714 
1715 };
1716 
#define PAPI_OK
Definition: fpapi.h:105
nvmlReturn_t DECLDIR nvmlDeviceGetName(nvmlDevice_t device, char *name, unsigned int len)
Definition: benchSANVML.c:95
nvmlReturn_t DECLDIR nvmlDeviceGetDetailedEccErrors(nvmlDevice_t, nvmlEccBitType_t, nvmlEccCounterType_t, nvmlEccErrorCounts_t *)
char disabled_reason[PAPI_MAX_STR_LEN]
Definition: papi.h:637
void(* _dl_non_dynamic_init)(void)
Definition: linux-nvml.c:48
unsigned long long getPState(nvmlDevice_t dev)
Definition: linux-nvml.c:265
nvmlReturn_t(* nvmlDeviceGetCountPtr)(unsigned int *dest)
Definition: benchSANVML.c:66
int _papi_nvml_ntv_code_to_name(unsigned int EventCode, char *name, int len)
Definition: linux-nvml.c:1577
static const char * name
Definition: fork_overflow.c:31
#define FEATURE_NVML_POWER_MANAGEMENT_LIMIT_CONSTRAINT_MIN
Definition: linux-nvml.h:17
#define PAPI_DOM_KERNEL
Definition: fpapi.h:22
#define PAPI_EINVAL
Definition: fpapi.h:106
nvmlReturn_t(* nvmlDeviceGetNamePtr)(nvmlDevice_t, char *, unsigned int)
Definition: benchSANVML.c:73
static unsigned int * power_management_limit_constraint_min
Definition: linux-nvml.c:166
static unsigned int * power_management_initial_limit
Definition: linux-nvml.c:165
#define PAPI_GRN_THR
Definition: fpapi.h:67
char units[PAPI_MIN_STR_LEN]
Definition: papi.h:976
static int linkCudaLibraries()
Definition: linux-nvml.c:1151
#define papi_free(a)
Definition: papi_memory.h:35
#define PAPI_ENOSUPP
Definition: fpapi.h:123
unsigned long long getTotalEccErrors(nvmlDevice_t dev, nvmlEccBitType_t bits)
Definition: linux-nvml.c:363
nvmlReturn_t(* nvmlDeviceGetHandleByIndexPtr)(unsigned int, nvmlDevice_t *)
Definition: benchSANVML.c:70
#define FEATURE_ECC_LOCAL_ERRORS
Definition: linux-nvml.h:7
nvmlReturn_t DECLDIR nvmlDeviceGetTemperature(nvmlDevice_t, nvmlTemperatureSensors_t, unsigned int *)
cudaError_t CUDARTAPI cudaDeviceGetPCIBusId(char *, int, int)
int type
Definition: linux-nvml.h:53
unsigned long long getPowerUsage(nvmlDevice_t dev)
Definition: linux-nvml.c:335
#define papi_malloc(a)
Definition: papi_memory.h:34
nvmlReturn_t(* nvmlDeviceGetEccModePtr)(nvmlDevice_t, nvmlEnableState_t *, nvmlEnableState_t *)
Definition: benchSANVML.c:68
#define MEMINFO_TOTAL_MEMORY
Definition: linux-nvml.h:22
#define FEATURE_ECC_TOTAL_ERRORS
Definition: linux-nvml.h:14
static int num_events
Definition: linux-nvml.c:161
#define DECLDIR
struct local_ecc ecc_opts
Definition: linux-nvml.h:44
int _papi_nvml_read(hwd_context_t *ctx, hwd_control_state_t *ctl, long long **events, int flags)
Definition: linux-nvml.c:1389
int _papi_nvml_stop(hwd_context_t *ctx, hwd_control_state_t *ctl)
Definition: linux-nvml.c:1365
nvmlReturn_t(* nvmlDeviceGetInforomVersionPtr)(nvmlDevice_t, nvmlInforomObject_t, char *, unsigned int)
Definition: benchSANVML.c:71
#define FEATURE_FAN_SPEED
Definition: linux-nvml.h:8
int _papi_nvml_write(hwd_context_t *ctx, hwd_control_state_t *ctl, long long *events)
Definition: linux-nvml.c:1415
static int nvml_hardware_write(long long *value, int which_one)
Definition: linux-nvml.c:531
nvmlReturn_t(* nvmlDeviceGetPciInfoPtr)(nvmlDevice_t, nvmlPciInfo_t *)
Definition: benchSANVML.c:74
unsigned long long getMemoryInfo(nvmlDevice_t dev, int which_one)
Definition: linux-nvml.c:240
char long_descr[PAPI_HUGE_STR_LEN]
Definition: papi.h:970
nvmlEccBitType_t bits
Definition: linux-nvml.h:38
#define LOCAL_ECC_MEM
Definition: linux-nvml.h:29
nvmlReturn_t(* nvmlDeviceGetPowerManagementLimitConstraintsPtr)(nvmlDevice_t device, unsigned int *minLimit, unsigned int *maxLimit)
Definition: benchSANVML.c:76
int retval
Definition: zero_fork.c:53
nvmlReturn_t DECLDIR nvmlDeviceGetPciInfo(nvmlDevice_t, nvmlPciInfo_t *)
#define NVML_MAX_COUNTERS
PAPI_component_info_t cmp_info
Definition: papi_vector.h:20
cudaError_t(* cudaDeviceGetPCIBusIdPtr)(char *, int, int)
Definition: benchSANVML.c:55
char name[PAPI_MAX_STR_LEN]
Definition: linux-nvml.h:50
int _papi_nvml_shutdown_component()
Definition: linux-nvml.c:1020
static nvml_native_event_entry_t * nvml_native_table
Definition: linux-nvml.c:155
Return codes and api definitions.
static void * dl1
Definition: linux-cuda.c:110
nvmlReturn_t DECLDIR nvmlDeviceGetFanSpeed(nvmlDevice_t, unsigned int *)
papi_vector_t _nvml_vector
Definition: linux-nvml.c:1637
cudaError_t CUDARTAPI cudaGetDeviceCount(int *dest)
Definition: benchSANVML.c:58
nvmlReturn_t(* nvmlDeviceGetClockInfoPtr)(nvmlDevice_t, nvmlClockType_t, unsigned int *)
Definition: benchSANVML.c:65
#define PAPI_DOM_OTHER
Definition: fpapi.h:23
unsigned long long getPowerManagementLimit(nvmlDevice_t dev)
Definition: linux-nvml.c:403
#define MEMINFO_ALLOCED
Definition: linux-nvml.h:24
int _papi_nvml_init_thread(hwd_context_t *ctx)
Definition: linux-nvml.c:582
char events[MAX_EVENTS][BUFSIZ]
nvml_control_state_t
Definition: linux-nvml.c:147
int _papi_nvml_shutdown_thread(hwd_context_t *ctx)
Definition: linux-nvml.c:1456
nvmlReturn_t(* nvmlDeviceGetDetailedEccErrorsPtr)(nvmlDevice_t, nvmlEccBitType_t, nvmlEccCounterType_t, nvmlEccErrorCounts_t *)
Definition: benchSANVML.c:67
int _papi_nvml_ntv_enum_events(unsigned int *EventCode, int modifier)
Definition: linux-nvml.c:1536
#define PAPI_ESYS
Definition: fpapi.h:108
#define FEATURE_UTILIZATION
Definition: linux-nvml.h:15
#define FEATURE_CLOCK_INFO
Definition: linux-nvml.h:6
nvml_resource_options_t options
Definition: linux-nvml.h:49
nvmlReturn_t(* nvmlDeviceGetPowerManagementLimitPtr)(nvmlDevice_t device, unsigned int *limit)
Definition: benchSANVML.c:77
static int cidx
static void * dl2
Definition: linux-cuda.c:111
int nvml_register_t
Definition: linux-nvml.h:35
CUresult CUDAAPI cuInit(unsigned int myInt)
Definition: benchSANVML.c:48
nvmlReturn_t DECLDIR nvmlInit(void)
Definition: benchSANVML.c:109
static nvmlDevice_t * devices
Definition: linux-nvml.c:163
#define FEATURE_MAX_CLOCK
Definition: linux-nvml.h:9
Definition: linux-nvml.h:48
static int device_count
Definition: linux-nvml.c:158
unsigned long long getEccLocalErrors(nvmlDevice_t dev, nvmlEccBitType_t bits, int which_one)
Definition: linux-nvml.c:185
#define MEMORY_UTILIZATION
Definition: linux-nvml.h:32
#define FEATURE_PERF_STATES
Definition: linux-nvml.h:11
nvmlReturn_t DECLDIR nvmlDeviceGetPerformanceState(nvmlDevice_t, nvmlPstates_t *)
nvmlReturn_t(* nvmlDeviceGetTotalEccErrorsPtr)(nvmlDevice_t, nvmlEccBitType_t, nvmlEccCounterType_t, unsigned long long *)
Definition: benchSANVML.c:80
char symbol[PAPI_HUGE_STR_LEN]
Definition: papi.h:967
int _papi_nvml_start(hwd_context_t *ctx, hwd_control_state_t *ctl)
Definition: linux-nvml.c:1348
nvmlReturn_t DECLDIR nvmlDeviceGetHandleByIndex(unsigned int idx, nvmlDevice_t *dest)
Definition: benchSANVML.c:92
#define PAPI_DOM_USER
Definition: fpapi.h:21
#define PAPI_MIN_STR_LEN
Definition: fpapi.h:41
cudaError_t(* cudaGetDeviceCountPtr)(int *)
Definition: benchSANVML.c:54
nvmlClockType_t clock
Definition: linux-nvml.h:43
#define FEATURE_TEMP
Definition: linux-nvml.h:13
nvmlReturn_t(* nvmlDeviceGetUtilizationRatesPtr)(nvmlDevice_t, nvmlUtilization_t *)
Definition: benchSANVML.c:81
__attribute__((constructor))
Definition: init_fini.c:12
#define FEATURE_NVML_POWER_MANAGEMENT_LIMIT_CONSTRAINT_MAX
Definition: linux-nvml.h:18
static int native
static unsigned int * power_management_limit_constraint_max
Definition: linux-nvml.c:167
int _papi_nvml_ntv_code_to_descr(unsigned int EventCode, char *descr, int len)
Definition: linux-nvml.c:1598
#define SUBDBG(format, args...)
Definition: papi_debug.h:63
nvmlReturn_t DECLDIR nvmlDeviceGetInforomVersion(nvmlDevice_t, nvmlInforomObject_t, char *, unsigned int)
#define CUDAAPI
static int * features
Definition: linux-nvml.c:164
static void * dl3
Definition: linux-cuda.c:112
long long ret
Definition: iozone.c:1346
char name[PAPI_MAX_STR_LEN]
Definition: papi.h:630
int _papi_nvml_ntv_code_to_info(unsigned int EventCode, PAPI_event_info_t *info)
Definition: linux-nvml.c:1615
#define PAPI_INT_SIGNAL
Definition: papi_internal.h:53
int _papi_nvml_update_control_state(hwd_control_state_t *ctl, NativeInfo_t *native, int count, hwd_context_t *ctx)
Definition: linux-nvml.c:1322
#define FEATURE_POWER
Definition: linux-nvml.h:12
nvmlReturn_t(* nvmlDeviceGetPowerUsagePtr)(nvmlDevice_t, unsigned int *)
Definition: benchSANVML.c:78
nvmlReturn_t DECLDIR nvmlDeviceGetPowerUsage(nvmlDevice_t device, unsigned int *dest)
Definition: benchSANVML.c:102
nvmlReturn_t DECLDIR nvmlDeviceGetTotalEccErrors(nvmlDevice_t, nvmlEccBitType_t, nvmlEccCounterType_t, unsigned long long *)
nvmlReturn_t DECLDIR nvmlDeviceGetMemoryInfo(nvmlDevice_t, nvmlMemory_t *)
char units[PAPI_MIN_STR_LEN]
Definition: linux-nvml.h:51
nvmlReturn_t DECLDIR nvmlDeviceGetClockInfo(nvmlDevice_t, nvmlClockType_t, unsigned int *)
char description[PAPI_MAX_STR_LEN]
Definition: linux-nvml.h:52
nvmlReturn_t(* nvmlDeviceGetMemoryInfoPtr)(nvmlDevice_t, nvmlMemory_t *)
Definition: benchSANVML.c:72
nvmlReturn_t(* nvmlDeviceGetTemperaturePtr)(nvmlDevice_t, nvmlTemperatureSensors_t, unsigned int *)
Definition: benchSANVML.c:79
nvmlReturn_t(* nvmlShutdownPtr)(void)
Definition: benchSANVML.c:84
CUresult CUDAAPI(* cuInitPtr)(unsigned int)
Definition: benchSANVML.c:47
const char *DECLDIR nvmlErrorString(nvmlReturn_t)
static void nvml_hardware_reset()
Definition: linux-nvml.c:416
nvmlReturn_t DECLDIR nvmlDeviceGetPowerManagementLimit(nvmlDevice_t device, unsigned int *limit)
Definition: benchSANVML.c:98
unsigned long long getClockSpeed(nvmlDevice_t dev, nvmlClockType_t which_one)
Definition: linux-nvml.c:170
nvmlReturn_t DECLDIR nvmlDeviceGetUtilizationRates(nvmlDevice_t, nvmlUtilization_t *)
#define PAPI_ENOEVNT
Definition: fpapi.h:112
unsigned long long getTemperature(nvmlDevice_t dev)
Definition: linux-nvml.c:349
static int detectDevices()
Definition: linux-nvml.c:592
nvmlReturn_t DECLDIR nvmlDeviceSetPowerManagementLimit(nvmlDevice_t device, unsigned int limit)
Definition: benchSANVML.c:107
unsigned long long getFanSpeed(nvmlDevice_t dev)
Definition: linux-nvml.c:212
#define LOCAL_ECC_L1
Definition: linux-nvml.h:27
#define HAS_FEATURE(features, query)
Definition: linux-nvml.h:20
nvmlReturn_t(* nvmlDeviceSetPowerManagementLimitPtr)(nvmlDevice_t device, unsigned int limit)
Definition: benchSANVML.c:82
int _papi_nvml_set_domain(hwd_control_state_t *cntrl, int domain)
Definition: linux-nvml.c:1495
#define CUDARTAPI
nvmlReturn_t(* nvmlDeviceGetFanSpeedPtr)(nvmlDevice_t, unsigned int *)
Definition: benchSANVML.c:69
char *(* nvmlErrorStringPtr)(nvmlReturn_t)
Definition: benchSANVML.c:64
nvmlReturn_t(* nvmlInitPtr)(void)
Definition: benchSANVML.c:83
#define GPU_UTILIZATION
Definition: linux-nvml.h:31
int which_one
Definition: linux-nvml.h:39
#define FEATURE_POWER_MANAGEMENT
Definition: linux-nvml.h:16
unsigned long long getUtilization(nvmlDevice_t dev, int which_one)
Definition: linux-nvml.c:380
#define LOCAL_ECC_REGFILE
Definition: linux-nvml.h:26
int _papi_nvml_ctl(hwd_context_t *ctx, int code, _papi_int_option_t *option)
Definition: linux-nvml.c:1471
nvmlReturn_t(* nvmlDeviceGetPerformanceStatePtr)(nvmlDevice_t, nvmlPstates_t *)
Definition: benchSANVML.c:75
nvmlReturn_t DECLDIR nvmlDeviceGetEccMode(nvmlDevice_t, nvmlEnableState_t *, nvmlEnableState_t *)
nvmlReturn_t DECLDIR nvmlShutdown(void)
#define FEATURE_MEMORY_INFO
Definition: linux-nvml.h:10
nvmlReturn_t DECLDIR nvmlDeviceGetCount(unsigned int *dest)
Definition: benchSANVML.c:88
#define LOCAL_ECC_L2
Definition: linux-nvml.h:28
static void createNativeEvents()
Definition: linux-nvml.c:751
cudaError_t CUDARTAPI cudaGetDevice(int *dest)
Definition: benchSANVML.c:57
const char * names[NUM_EVENTS]
#define PAPI_DOM_ALL
Definition: fpapi.h:25
static long count
#define MEMINFO_UNALLOCED
Definition: linux-nvml.h:23
int _papi_nvml_reset(hwd_context_t *ctx, hwd_control_state_t *ctl)
Definition: linux-nvml.c:1441
int _papi_nvml_init_control_state(hwd_control_state_t *ctl)
Definition: linux-nvml.c:1311
cudaError_t(* cudaGetDevicePtr)(int *)
Definition: benchSANVML.c:53
int i
Definition: fileop.c:140
nvmlReturn_t DECLDIR nvmlDeviceGetPowerManagementLimitConstraints(nvmlDevice_t device, unsigned int *minLimit, unsigned int *maxLimit)
Definition: benchSANVML.c:100
#define PAPI_MAX_STR_LEN
Definition: fpapi.h:43
unsigned long long getMaxClockSpeed(nvmlDevice_t dev, nvmlClockType_t which_one)
Definition: linux-nvml.c:226
static int nvml_hardware_read(long long *value, int which_one)
Definition: linux-nvml.c:449
int _papi_nvml_init_component(int cidx)
Definition: linux-nvml.c:1050
nvml_control_state_t state
Definition: linux-nvml.c:151