33bool HIPDevice::have_precompiled_kernels()
35 string fatbins_path =
path_get(
"lib");
44void HIPDevice::set_error(
const string &
error)
49 fprintf(stderr,
"\nRefer to the Cycles GPU rendering documentation for possible solutions:\n");
51 "https://docs.blender.org/manual/en/latest/render/cycles/gpu_rendering.html\n\n");
57 :
GPUDevice(info, stats, profiler, headless)
60 static_assert(
sizeof(texMemObject) ==
sizeof(hipTextureObject_t));
61 static_assert(
sizeof(arrayMemObject) ==
sizeof(hArray));
71 need_texture_info =
false;
76 hipError_t result = hipInit(0);
77 if (result != hipSuccess) {
78 set_error(
string_printf(
"Failed to initialize HIP runtime (%s)", hipewErrorString(result)));
83 result = hipDeviceGet(&hipDevice, hipDevId);
84 if (result != hipSuccess) {
85 set_error(
string_printf(
"Failed to get HIP device handle from ordinal (%s)",
86 hipewErrorString(result)));
94 hip_assert(hipDeviceGetAttribute(&value, hipDeviceAttributeCanMapHostMemory, hipDevice));
95 can_map_host = value != 0;
98 hipDeviceGetAttribute(&pitch_alignment, hipDeviceAttributeTexturePitchAlignment, hipDevice));
100 unsigned int ctx_flags = hipDeviceLmemResizeToMax;
102 ctx_flags |= hipDeviceMapHost;
107 result = hipCtxCreate(&hipContext, ctx_flags, hipDevice);
109 if (result != hipSuccess) {
110 set_error(
string_printf(
"Failed to create HIP context (%s)", hipewErrorString(result)));
115 hipDeviceGetAttribute(&major, hipDeviceAttributeComputeCapabilityMajor, hipDevId);
116 hipDeviceGetAttribute(&minor, hipDeviceAttributeComputeCapabilityMinor, hipDevId);
117 hipDevArchitecture = major * 100 + minor * 10;
120 hip_assert(hipRuntimeGetVersion(&hipRuntimeVersion));
123 hipCtxPopCurrent(
NULL);
126HIPDevice::~HIPDevice()
130 hip_assert(hipModuleUnload(hipModule));
132 hip_assert(hipCtxDestroy(hipContext));
135bool HIPDevice::support_device(
const uint )
137 if (hipSupportsDevice(hipDevId)) {
142 hipDeviceProp_t props;
143 hipGetDeviceProperties(&props, hipDevId);
145 set_error(
string_printf(
"HIP backend requires AMD RDNA graphics card or up, but found %s.",
151bool HIPDevice::check_peer_access(
Device *peer_device)
153 if (peer_device ==
this) {
160 HIPDevice *
const peer_device_hip =
static_cast<HIPDevice *
>(peer_device);
163 hip_assert(hipDeviceCanAccessPeer(&can_access, hipDevice, peer_device_hip->hipDevice));
164 if (can_access == 0) {
169 hip_assert(hipDeviceGetP2PAttribute(
170 &can_access, hipDevP2PAttrHipArrayAccessSupported, hipDevice, peer_device_hip->hipDevice));
171 if (can_access == 0) {
177 const HIPContextScope scope(
this);
178 hipError_t result = hipCtxEnablePeerAccess(peer_device_hip->hipContext, 0);
179 if (result != hipSuccess) {
180 set_error(
string_printf(
"Failed to enable peer access on HIP context (%s)",
181 hipewErrorString(result)));
186 const HIPContextScope scope(peer_device_hip);
187 hipError_t result = hipCtxEnablePeerAccess(hipContext, 0);
188 if (result != hipSuccess) {
189 set_error(
string_printf(
"Failed to enable peer access on HIP context (%s)",
190 hipewErrorString(result)));
198bool HIPDevice::use_adaptive_compilation()
206string HIPDevice::compile_kernel_get_common_cflags(
const uint kernel_features)
209 const string source_path =
path_get(
"source");
210 const string include_path = source_path;
217 include_path.c_str());
218 if (use_adaptive_compilation()) {
219 cflags +=
" -D__KERNEL_FEATURES__=" +
to_string(kernel_features);
224string HIPDevice::compile_kernel(
const uint kernel_features,
const char *name,
const char *base)
228 hipDeviceGetAttribute(&major, hipDeviceAttributeComputeCapabilityMajor, hipDevId);
229 hipDeviceGetAttribute(&minor, hipDeviceAttributeComputeCapabilityMinor, hipDevId);
230 const std::string arch = hipDeviceArch(hipDevId);
233 if (!use_adaptive_compilation()) {
235 VLOG_INFO <<
"Testing for pre-compiled kernel " << fatbin <<
".";
237 VLOG_INFO <<
"Using precompiled kernel.";
243 string source_path =
path_get(
"source");
249 string common_cflags = compile_kernel_get_common_cflags(kernel_features);
252 const char *
const kernel_ext =
"genco";
255 options.append(
"Wno-parentheses-equality -Wno-unused-value -ffast-math");
257 options.append(
"Wno-parentheses-equality -Wno-unused-value -O3 -ffast-math");
260 options.append(
" -save-temps");
262 options.append(
" --offload-arch=").append(arch.c_str());
264 const string include_path = source_path;
266 "cycles_%s_%s_%s", name, arch.c_str(), kernel_md5.c_str());
268 VLOG_INFO <<
"Testing for locally compiled kernel " << fatbin <<
".";
270 VLOG_INFO <<
"Using locally compiled kernel.";
275 if (!use_adaptive_compilation() && have_precompiled_kernels()) {
276 if (!hipSupportsDevice(hipDevId)) {
278 string_printf(
"HIP backend requires compute capability 10.1 or up, but found %d.%d. "
279 "Your GPU is not supported.",
285 string_printf(
"HIP binary kernel for this graphics card compute "
286 "capability (%d.%d) not found.",
295 const char *
const hipcc = hipewCompilerPath();
298 "HIP hipcc compiler not found. "
299 "Install HIP toolkit in default location.");
303 const int hipcc_hip_version = hipewCompilerVersion();
304 VLOG_INFO <<
"Found hipcc " << hipcc <<
", HIP version " << hipcc_hip_version <<
".";
305 if (hipcc_hip_version < 40) {
307 "Unsupported HIP version %d.%d detected, "
308 "you need HIP 4.0 or newer.\n",
309 hipcc_hip_version / 10,
310 hipcc_hip_version % 10);
321 string command =
string_printf(
"%s -%s -I %s --%s %s -o \"%s\"",
324 include_path.c_str(),
329 printf(
"Compiling %sHIP kernel ...\n%s\n",
330 (use_adaptive_compilation()) ?
"adaptive " :
"",
334 command =
"call " + command;
336 if (system(command.c_str()) != 0) {
338 "Failed to execute compilation command, "
339 "see console for details.");
346 "HIP kernel compilation failed, "
347 "see console for details.");
351 printf(
"Kernel compilation finished in %.2lfs.\n",
time_dt() - starttime);
356bool HIPDevice::load_kernels(
const uint kernel_features)
363 if (use_adaptive_compilation()) {
364 VLOG_INFO <<
"Skipping HIP kernel reload for adaptive compilation, not currently supported.";
374 if (!support_device(kernel_features)) {
379 const char *kernel_name =
"kernel";
380 string fatbin = compile_kernel(kernel_features, kernel_name);
385 HIPContextScope scope(
this);
391 result = hipModuleLoadData(&hipModule, fatbin_data.c_str());
393 result = hipErrorFileNotFound;
395 if (result != hipSuccess)
397 "Failed to load HIP kernel from '%s' (%s)", fatbin.c_str(), hipewErrorString(result)));
399 if (result == hipSuccess) {
401 reserve_local_memory(kernel_features);
404 return (result == hipSuccess);
407void HIPDevice::reserve_local_memory(
const uint kernel_features)
412 size_t total = 0, free_before = 0, free_after = 0;
415 HIPContextScope scope(
this);
416 hipMemGetInfo(&free_before, &total);
430 HIPDeviceQueue queue(
this);
437 queue.init_execution();
438 queue.enqueue(test_kernel, 1, args);
443 HIPContextScope scope(
this);
444 hipMemGetInfo(&free_after, &total);
452 const size_t keep_mb = 1024;
454 while (free_after > keep_mb * 1024 * 1024LL) {
456 hip_assert(hipMalloc(&tmp, 10 * 1024 * 1024LL));
457 hipMemGetInfo(&free_after, &total);
462void HIPDevice::get_device_memory_info(
size_t &total,
size_t &
free)
464 HIPContextScope scope(
this);
466 hipMemGetInfo(&
free, &total);
469bool HIPDevice::alloc_device(
void *&device_pointer,
size_t size)
471 HIPContextScope scope(
this);
473 hipError_t mem_alloc_result = hipMalloc((hipDeviceptr_t *)&device_pointer, size);
474 return mem_alloc_result == hipSuccess;
477void HIPDevice::free_device(
void *device_pointer)
479 HIPContextScope scope(
this);
481 hip_assert(hipFree((hipDeviceptr_t)device_pointer));
484bool HIPDevice::alloc_host(
void *&shared_pointer,
size_t size)
486 HIPContextScope scope(
this);
488 hipError_t mem_alloc_result = hipHostMalloc(
489 &shared_pointer, size, hipHostMallocMapped | hipHostMallocWriteCombined);
491 return mem_alloc_result == hipSuccess;
494void HIPDevice::free_host(
void *shared_pointer)
496 HIPContextScope scope(
this);
498 hipHostFree(shared_pointer);
501void HIPDevice::transform_host_pointer(
void *&device_pointer,
void *&shared_pointer)
503 HIPContextScope scope(
this);
505 hip_assert(hipHostGetDevicePointer((hipDeviceptr_t *)&device_pointer, shared_pointer, 0));
508void HIPDevice::copy_host_to_device(
void *device_pointer,
void *host_pointer,
size_t size)
510 const HIPContextScope scope(
this);
512 hip_assert(hipMemcpyHtoD((hipDeviceptr_t)device_pointer, host_pointer, size));
518 assert(!
"mem_alloc not supported for textures.");
521 assert(!
"mem_alloc not supported for global memory.");
542 generic_copy_to(mem);
546void HIPDevice::mem_copy_from(
device_memory &mem,
size_t y,
size_t w,
size_t h,
size_t elem)
549 assert(!
"mem_copy_from not supported for textures.");
552 const size_t size = elem *
w * h;
553 const size_t offset = elem * y *
w;
556 const HIPContextScope scope(
this);
557 hip_assert(hipMemcpyDtoH(
579 const HIPContextScope scope(
this);
605void HIPDevice::const_copy_to(
const char *name,
void *host,
size_t size)
607 HIPContextScope scope(
this);
611 hip_assert(hipModuleGetGlobal(&mem, &bytes, hipModule,
"kernel_params"));
615# define KERNEL_DATA_ARRAY(data_type, data_name) \
616 if (strcmp(name, #data_name) == 0) { \
617 hip_assert(hipMemcpyHtoD(mem + offsetof(KernelParamsHIP, data_name), host, size)); \
622# include "kernel/data_arrays.h"
623# undef KERNEL_DATA_ARRAY
630 generic_copy_to(mem);
645 HIPContextScope scope(
this);
650 hipTextureAddressMode address_mode = hipAddressModeWrap;
653 address_mode = hipAddressModeWrap;
656 address_mode = hipAddressModeClamp;
659 address_mode = hipAddressModeBorder;
662 address_mode = hipAddressModeMirror;
669 hipTextureFilterMode filter_mode;
671 filter_mode = hipFilterModePoint;
674 filter_mode = hipFilterModeLinear;
681 format = HIP_AD_FORMAT_UNSIGNED_INT8;
684 format = HIP_AD_FORMAT_UNSIGNED_INT16;
687 format = HIP_AD_FORMAT_UNSIGNED_INT32;
690 format = HIP_AD_FORMAT_SIGNED_INT32;
693 format = HIP_AD_FORMAT_FLOAT;
696 format = HIP_AD_FORMAT_HALF;
704 hArray array_3d =
NULL;
706 size_t dst_pitch = src_pitch;
710 cmem = &device_mem_map[&mem];
715 cmem->array =
reinterpret_cast<arrayMemObject
>(array_3d);
718 dst_pitch =
align_up(src_pitch, pitch_alignment);
723 HIP_ARRAY3D_DESCRIPTOR desc;
736 hip_assert(hipArray3DCreate((hArray *)&array_3d, &desc));
743 memset(¶m, 0,
sizeof(HIP_MEMCPY3D));
744 param.dstMemoryType = get_memory_type(hipMemoryTypeArray);
745 param.dstArray = array_3d;
746 param.srcMemoryType = get_memory_type(hipMemoryTypeHost);
748 param.srcPitch = src_pitch;
749 param.WidthInBytes = param.srcPitch;
753 hip_assert(hipDrvMemcpy3D(¶m));
760 cmem = &device_mem_map[&mem];
762 cmem->array =
reinterpret_cast<arrayMemObject
>(array_3d);
766 dst_pitch =
align_up(src_pitch, pitch_alignment);
769 cmem = generic_alloc(mem, dst_size - mem.
memory_size());
775 memset(¶m, 0,
sizeof(param));
776 param.dstMemoryType = get_memory_type(hipMemoryTypeDevice);
778 param.dstPitch = dst_pitch;
779 param.srcMemoryType = get_memory_type(hipMemoryTypeHost);
781 param.srcPitch = src_pitch;
782 param.WidthInBytes = param.srcPitch;
785 hip_assert(hipDrvMemcpy2DUnaligned(¶m));
789 cmem = generic_alloc(mem);
799 if (slot >= texture_info.size()) {
802 texture_info.resize(slot + 128);
806 texture_info[slot] = mem.
info;
807 need_texture_info =
true;
815 hipResourceDesc resDesc;
816 memset(&resDesc, 0,
sizeof(resDesc));
819 resDesc.resType = hipResourceTypeArray;
820 resDesc.res.array.h_Array = array_3d;
824 resDesc.resType = hipResourceTypePitch2D;
826 resDesc.res.pitch2D.format =
format;
830 resDesc.res.pitch2D.pitchInBytes = dst_pitch;
833 resDesc.resType = hipResourceTypeLinear;
835 resDesc.res.linear.format =
format;
840 hipTextureDesc texDesc;
841 memset(&texDesc, 0,
sizeof(texDesc));
842 texDesc.addressMode[0] = address_mode;
843 texDesc.addressMode[1] = address_mode;
844 texDesc.addressMode[2] = address_mode;
845 texDesc.filterMode = filter_mode;
846 texDesc.flags = HIP_TRSF_NORMALIZED_COORDINATES;
849 cmem = &device_mem_map[&mem];
851 if (hipTexObjectCreate(&cmem->texobject, &resDesc, &texDesc,
NULL) != hipSuccess) {
853 "Failed to create texture. Maximum GPU texture size or available GPU memory was likely "
857 texture_info[slot].data = (
uint64_t)cmem->texobject;
867 HIPContextScope scope(
this);
869 DCHECK(device_mem_map.find(&mem) != device_mem_map.end());
870 const Mem &cmem = device_mem_map[&mem];
872 if (cmem.texobject) {
874 hipTexObjectDestroy(cmem.texobject);
879 device_mem_map.erase(device_mem_map.find(&mem));
881 else if (cmem.array) {
883 hipArrayDestroy(
reinterpret_cast<hArray
>(cmem.array));
888 device_mem_map.erase(device_mem_map.find(&mem));
897unique_ptr<DeviceQueue> HIPDevice::gpu_queue_create()
899 return make_unique<HIPDeviceQueue>(
this);
902bool HIPDevice::should_use_graphics_interop()
918 HIPContextScope scope(
this);
920 int num_all_devices = 0;
921 hip_assert(hipGetDeviceCount(&num_all_devices));
923 if (num_all_devices == 0) {
928 uint num_gl_devices = 0;
929 hipGLGetDevices(&num_gl_devices, gl_devices.data(), num_all_devices, hipGLDeviceListAll);
931 for (hipDevice_t gl_device : gl_devices) {
932 if (gl_device == hipDevice) {
941int HIPDevice::get_num_multiprocessors()
943 return get_device_default_attribute(hipDeviceAttributeMultiprocessorCount, 0);
946int HIPDevice::get_max_num_threads_per_multiprocessor()
948 return get_device_default_attribute(hipDeviceAttributeMaxThreadsPerMultiProcessor, 0);
951bool HIPDevice::get_device_attribute(hipDeviceAttribute_t attribute,
int *value)
953 HIPContextScope scope(
this);
955 return hipDeviceGetAttribute(value, attribute, hipDevice) == hipSuccess;
958int HIPDevice::get_device_default_attribute(hipDeviceAttribute_t attribute,
int default_value)
961 if (!get_device_attribute(attribute, &value)) {
962 return default_value;
967hipMemoryType HIPDevice::get_memory_type(hipMemoryType mem_type)
969 return get_hip_memory_type(mem_type, hipRuntimeVersion);
void BLI_kdtree_nd_ free(KDTree *tree)
static DBVT_INLINE btScalar size(const btDbvtVolume &a)
SIMD_FORCE_INLINE const btScalar & w() const
Return the w value.
virtual void set_error(const string &error)
void mem_free(size_t size)
void mem_alloc(size_t size)
bool is_resident(Device *sub_device) const
size_t memory_elements_size(int elements)
device_ptr device_pointer
static constexpr size_t datatype_size(DataType datatype)
CCL_NAMESPACE_BEGIN struct Options options
#define KERNEL_DATA_ARRAY(type, name)
DebugFlags & DebugFlags()
#define CCL_NAMESPACE_END
static const char * to_string(const Interpolation &interp)
#define KERNEL_FEATURE_NODE_RAYTRACE
#define KERNEL_FEATURE_MNEE
@ DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE
@ DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE
@ DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_MNEE
#define DCHECK(expression)
string util_md5_string(const string &str)
static void error(const char *str)
string path_cache_get(const string &sub)
string path_get(const string &sub)
string path_files_md5_hash(const string &dir)
string path_join(const string &dir, const string &file)
bool path_exists(const string &path)
void path_create_directories(const string &filepath)
bool path_read_compressed_text(const string &path, string &text)
unsigned __int64 uint64_t
string string_human_readable_size(size_t size)
string string_human_readable_number(size_t num)
CCL_NAMESPACE_BEGIN string string_printf(const char *format,...)
std::unique_lock< std::mutex > thread_scoped_lock
CCL_NAMESPACE_BEGIN double time_dt()
@ IMAGE_DATA_TYPE_NANOVDB_FP16
@ IMAGE_DATA_TYPE_NANOVDB_FLOAT
@ IMAGE_DATA_TYPE_NANOVDB_FLOAT3
@ IMAGE_DATA_TYPE_NANOVDB_FPN
ccl_device_inline size_t align_up(size_t offset, size_t alignment)