28 #include "cuda_runtime_api.h" 46 #define CUDAAPI __attribute__((weak)) 51 #define CUDARTAPI __attribute__((weak)) 62 #define DECLDIR __attribute__((weak)) 64 char* (*nvmlErrorStringPtr)(nvmlReturn_t);
99 (*nvmlDeviceGetPowerManagementLimitPtr)(device, limit); }
101 (*nvmlDeviceGetPowerManagementLimitConstraintsPtr)(device, minLimit, maxLimit); }
103 (*nvmlDeviceGetPowerUsagePtr)(device, dest); }
108 (*nvmlDeviceSetPowerManagementLimitPtr)(device, limit); }
154 #define _prog_fprintf if (1) fprintf 155 #define _time_fprintf if (1) fprintf 169 static nvmlDevice_t*
devices = NULL;
180 #define mCheck_DL_Status( err, str ) \ 183 fprintf(stderr, str); \ 192 #define STRINGIFY(x) #x 193 #define TOSTRING(x) STRINGIFY(x) 194 #define mGet_DL_FPtr(libPtr, Name) \ 195 Name##Ptr = dlsym(libPtr, TOSTRING(Name)); \ 196 mCheck_DL_Status(dlerror()!=NULL, TOSTRING(libPtr) " Library function " \ 197 TOSTRING(Name) " not found."); 202 fprintf(stderr,
"NVML component does not support statically linked libc.");
207 dl1 = dlopen(
"libcuda.so", RTLD_NOW | RTLD_GLOBAL);
209 fprintf(stderr,
"CUDA library libcuda.so not found.");
213 dl2 = dlopen(
"libcudart.so", RTLD_NOW | RTLD_GLOBAL | RTLD_NODELETE);
215 fprintf(stderr,
"CUDA runtime library libcudart.so not found.");
219 dl3 = dlopen(
"libnvidia-ml.so", RTLD_NOW | RTLD_GLOBAL);
221 fprintf(stderr,
"NVML runtime library libnvidia-ml.so not found.");
269 int main (
int argc,
char **argv)
271 (void) argc; (void) argv;
272 #define hostnameLen 512 275 int cuda_count, nvml_count;
286 fprintf(stderr,
"Failed attempt to link to CUDA and NVML libraries.");
290 _prog_fprintf(stderr,
"Linked to CUDA and NVML libraries\n");
293 if (nvret != NVML_SUCCESS) {
294 fprintf(stderr,
"Failed nvmlInit(), ret=%i [%s].\n", nvret,
nvmlErrorString(nvret));
299 if (nvret != cudaSuccess) {
300 fprintf(stderr,
"Failed cuInit(0).\n");
305 if (nvret != NVML_SUCCESS) {
306 fprintf(stderr,
"nvmlDeviceGetCount failed; ret=%i.\n", nvret);
311 if (nvret != cudaSuccess) {
312 fprintf(stderr,
"cudaGetDeviceCount failed; ret=%i.\n", nvret);
319 fprintf(stderr,
"Failed system call, gethostname() " 320 "returned %i.",
ret);
325 fprintf(stderr,
"hostname: %s\n" 327 "cuda_count=%i\n", hostname, nvml_count, cuda_count);
329 nvmlDevice_t *handle = malloc(nvml_count *
sizeof(nvmlDevice_t));
331 unsigned int powerUsage, powerLimit, powerLimit2;
332 unsigned int minPower, maxPower;
335 for (
i=0;
i<nvml_count;
i++) {
337 if (nvret != NVML_SUCCESS) {
338 fprintf(stderr,
"nvmlDeviceGetHandleByIndex %i failed; nvret=%i [%s].\n",
i, nvret,
nvmlErrorString(nvret));
343 fprintf(stderr,
"Handle %i: %016lX\n",
i, handle[
i]);
347 fprintf(stderr,
"Name='%s'.\n",
name);
350 if (nvret != NVML_SUCCESS) {
351 fprintf(stderr,
"nvmlDeviceGetPowerUsage failed; nvret=%i [%s]\n", nvret,
nvmlErrorString(nvret));
353 fprintf(stderr,
"nvmlDeviceGetPowerUsage succeeded, value returned=%u mw.\n", powerUsage);
357 if (nvret != NVML_SUCCESS) {
358 fprintf(stderr,
"nvmlDeviceGetPowerManagementLimit failed; nvret=%i [%s]\n", nvret,
nvmlErrorString(nvret));
360 fprintf(stderr,
"nvmlDeviceGetPowerManagementLimit succeeded, value returned=%u mw.\n", powerLimit);
364 if (nvret != NVML_SUCCESS) {
365 fprintf(stderr,
"nvmlDeviceGetPowerManagementLimitConstraints failed; nvret=%i [%s]\n", nvret,
nvmlErrorString(nvret));
367 fprintf(stderr,
"nvmlDeviceGetPowerManagementLimitConstraints succeeded, values min=%u mw, max=%u mw.\n", minPower, maxPower);
371 unsigned int newPower=maxPower-100;
373 if (nvret != NVML_SUCCESS) {
374 fprintf(stderr,
"nvmlDeviceSetPowerManagementLimit to %i failed; nvret=%i [%s]\n", newPower, nvret,
nvmlErrorString(nvret));
376 fprintf(stderr,
"nvmlDeviceSetPowerManagementLimit to %i succeeded. (Routine call did not return error).\n", newPower);
380 if (nvret != NVML_SUCCESS) {
381 fprintf(stderr,
"nvmlDeviceGetPowerManagementLimit failed; nvret=%i [%s]\n", nvret,
nvmlErrorString(nvret));
383 fprintf(stderr,
"nvmlDeviceGetPowerManagementLimit call to check setting succeeded, value returned=%u mw.\n", powerLimit2);
384 if (powerLimit2 != newPower) {
385 fprintf(stderr,
"Note the check failed, the limit read is not the limit we tried to set.\n");
387 fprintf(stderr,
"Note the check is a success, the power limit was changed.\n");
392 if (nvret != NVML_SUCCESS) {
393 fprintf(stderr,
"nvmlDeviceSetPowerManagementLimit to restore %i failed; nvret=%i [%s]\n", powerLimit, nvret,
nvmlErrorString(nvret));
395 fprintf(stderr,
"nvmlDeviceSetPowerManagementLimit to restore %i succeeded.\n", powerLimit);
nvmlReturn_t DECLDIR nvmlDeviceGetName(nvmlDevice_t device, char *name, unsigned int len)
nvmlReturn_t DECLDIR nvmlDeviceGetDetailedEccErrors(nvmlDevice_t, nvmlEccBitType_t, nvmlEccCounterType_t, nvmlEccErrorCounts_t *)
nvmlReturn_t(* nvmlDeviceGetCountPtr)(unsigned int *dest)
nvmlReturn_t(* nvmlDeviceGetNamePtr)(nvmlDevice_t, char *, unsigned int)
unsigned long long * ullPtr
static unsigned int * power_management_limit_constraint_min
static unsigned int * power_management_initial_limit
nvmlReturn_t(* nvmlDeviceGetHandleByIndexPtr)(unsigned int, nvmlDevice_t *)
nvmlReturn_t DECLDIR nvmlDeviceGetTemperature(nvmlDevice_t, nvmlTemperatureSensors_t, unsigned int *)
cudaError_t CUDARTAPI cudaDeviceGetPCIBusId(char *, int, int)
nvmlReturn_t(* nvmlDeviceGetEccModePtr)(nvmlDevice_t, nvmlEnableState_t *, nvmlEnableState_t *)
nvmlReturn_t(* nvmlDeviceGetInforomVersionPtr)(nvmlDevice_t, nvmlInforomObject_t, char *, unsigned int)
nvmlReturn_t(* nvmlDeviceGetPciInfoPtr)(nvmlDevice_t, nvmlPciInfo_t *)
nvmlReturn_t(* nvmlDeviceGetPowerManagementLimitConstraintsPtr)(nvmlDevice_t device, unsigned int *minLimit, unsigned int *maxLimit)
nvmlReturn_t DECLDIR nvmlDeviceGetPciInfo(nvmlDevice_t, nvmlPciInfo_t *)
cudaError_t(* cudaDeviceGetPCIBusIdPtr)(char *, int, int)
nvmlReturn_t DECLDIR nvmlDeviceGetFanSpeed(nvmlDevice_t, unsigned int *)
cudaError_t CUDARTAPI cudaGetDeviceCount(int *dest)
nvmlReturn_t(* nvmlDeviceGetClockInfoPtr)(nvmlDevice_t, nvmlClockType_t, unsigned int *)
nvmlReturn_t(* nvmlDeviceGetDetailedEccErrorsPtr)(nvmlDevice_t, nvmlEccBitType_t, nvmlEccCounterType_t, nvmlEccErrorCounts_t *)
nvmlReturn_t(* nvmlDeviceGetPowerManagementLimitPtr)(nvmlDevice_t device, unsigned int *limit)
CUresult CUDAAPI cuInit(unsigned int myInt)
nvmlReturn_t DECLDIR nvmlInit(void)
static nvmlDevice_t * devices
nvmlReturn_t DECLDIR nvmlDeviceGetPerformanceState(nvmlDevice_t, nvmlPstates_t *)
nvmlReturn_t(* nvmlDeviceGetTotalEccErrorsPtr)(nvmlDevice_t, nvmlEccBitType_t, nvmlEccCounterType_t, unsigned long long *)
nvmlReturn_t DECLDIR nvmlDeviceGetHandleByIndex(unsigned int idx, nvmlDevice_t *dest)
cudaError_t(* cudaGetDeviceCountPtr)(int *)
nvmlReturn_t(* nvmlDeviceGetUtilizationRatesPtr)(nvmlDevice_t, nvmlUtilization_t *)
__attribute__((constructor))
int main(int argc, char **argv)
static unsigned int * power_management_limit_constraint_max
nvmlReturn_t DECLDIR nvmlDeviceGetInforomVersion(nvmlDevice_t, nvmlInforomObject_t, char *, unsigned int)
nvmlReturn_t(* nvmlDeviceGetPowerUsagePtr)(nvmlDevice_t, unsigned int *)
nvmlReturn_t DECLDIR nvmlDeviceGetPowerUsage(nvmlDevice_t device, unsigned int *dest)
nvmlReturn_t DECLDIR nvmlDeviceGetTotalEccErrors(nvmlDevice_t, nvmlEccBitType_t, nvmlEccCounterType_t, unsigned long long *)
nvmlReturn_t DECLDIR nvmlDeviceGetMemoryInfo(nvmlDevice_t, nvmlMemory_t *)
nvmlReturn_t DECLDIR nvmlDeviceGetClockInfo(nvmlDevice_t, nvmlClockType_t, unsigned int *)
nvmlReturn_t(* nvmlDeviceGetMemoryInfoPtr)(nvmlDevice_t, nvmlMemory_t *)
nvmlReturn_t(* nvmlDeviceGetTemperaturePtr)(nvmlDevice_t, nvmlTemperatureSensors_t, unsigned int *)
nvmlReturn_t(* nvmlShutdownPtr)(void)
static struct timeval t1 t2
CUresult CUDAAPI(* cuInitPtr)(unsigned int)
const char *DECLDIR nvmlErrorString(nvmlReturn_t)
int gettimeofday(void *ptr1, void *ptr2)
nvmlReturn_t DECLDIR nvmlDeviceGetPowerManagementLimit(nvmlDevice_t device, unsigned int *limit)
nvmlReturn_t DECLDIR nvmlDeviceGetUtilizationRates(nvmlDevice_t, nvmlUtilization_t *)
nvmlReturn_t DECLDIR nvmlDeviceSetPowerManagementLimit(nvmlDevice_t device, unsigned int limit)
void(* _dl_non_dynamic_init)(void)
nvmlReturn_t(* nvmlDeviceSetPowerManagementLimitPtr)(nvmlDevice_t device, unsigned int limit)
nvmlReturn_t(* nvmlDeviceGetFanSpeedPtr)(nvmlDevice_t, unsigned int *)
nvmlReturn_t(* nvmlInitPtr)(void)
#define mGet_DL_FPtr(libPtr, Name)
nvmlReturn_t(* nvmlDeviceGetPerformanceStatePtr)(nvmlDevice_t, nvmlPstates_t *)
nvmlReturn_t DECLDIR nvmlDeviceGetEccMode(nvmlDevice_t, nvmlEnableState_t *, nvmlEnableState_t *)
nvmlReturn_t DECLDIR nvmlShutdown(void)
nvmlReturn_t DECLDIR nvmlDeviceGetCount(unsigned int *dest)
cudaError_t CUDARTAPI cudaGetDevice(int *dest)
cudaError_t(* cudaGetDevicePtr)(int *)
nvmlReturn_t DECLDIR nvmlDeviceGetPowerManagementLimitConstraints(nvmlDevice_t device, unsigned int *minLimit, unsigned int *maxLimit)
int _local_linkDynamicLibraries(void)