35bool HIPDevice::have_precompiled_kernels()
37 string fatbins_path =
path_get(
"lib");
46void HIPDevice::set_error(
const string &
error)
51 LOG_ERROR <<
"Refer to the Cycles GPU rendering documentation for possible solutions:\n"
52 "https://docs.blender.org/manual/en/latest/render/cycles/gpu_rendering.html\n";
58 :
GPUDevice(info, stats, profiler, headless)
61 static_assert(
sizeof(texMemObject) ==
sizeof(hipTextureObject_t));
62 static_assert(
sizeof(arrayMemObject) ==
sizeof(hArray));
72 need_texture_info =
false;
77 hipError_t
result = hipInit(0);
78 if (
result != hipSuccess) {
79 set_error(
string_printf(
"Failed to initialize HIP runtime (%s)", hipewErrorString(
result)));
84 result = hipDeviceGet(&hipDevice, hipDevId);
85 if (
result != hipSuccess) {
86 set_error(
string_printf(
"Failed to get HIP device handle from ordinal (%s)",
87 hipewErrorString(
result)));
95 hip_assert(hipDeviceGetAttribute(&value, hipDeviceAttributeCanMapHostMemory, hipDevice));
96 can_map_host = value != 0;
99 hipDeviceGetAttribute(&pitch_alignment, hipDeviceAttributeTexturePitchAlignment, hipDevice));
101 unsigned int ctx_flags = hipDeviceLmemResizeToMax;
103 ctx_flags |= hipDeviceMapHost;
108 result = hipCtxCreate(&hipContext, ctx_flags, hipDevice);
110 if (
result != hipSuccess) {
116 hipDeviceGetAttribute(&major, hipDeviceAttributeComputeCapabilityMajor, hipDevId);
117 hipDeviceGetAttribute(&minor, hipDeviceAttributeComputeCapabilityMinor, hipDevId);
118 hipDevArchitecture = major * 100 + minor * 10;
121 hip_assert(hipRuntimeGetVersion(&hipRuntimeVersion));
124 hipCtxPopCurrent(
nullptr);
127HIPDevice::~HIPDevice()
131 hip_assert(hipModuleUnload(hipModule));
133 hip_assert(hipCtxDestroy(hipContext));
136bool HIPDevice::support_device(
const uint )
138 if (hipSupportsDevice(hipDevId)) {
142 hipDeviceProp_t props;
143 hipGetDeviceProperties(&props, hipDevId);
145 set_error(
string_printf(
"HIP backend requires AMD RDNA graphics card or up, but found %s.",
150bool HIPDevice::check_peer_access(
Device *peer_device)
152 if (peer_device ==
this) {
159 HIPDevice *
const peer_device_hip =
static_cast<HIPDevice *
>(peer_device);
162 hip_assert(hipDeviceCanAccessPeer(&can_access, hipDevice, peer_device_hip->hipDevice));
163 if (can_access == 0) {
168 hip_assert(hipDeviceGetP2PAttribute(
169 &can_access, hipDevP2PAttrHipArrayAccessSupported, hipDevice, peer_device_hip->hipDevice));
170 if (can_access == 0) {
176 const HIPContextScope scope(
this);
177 hipError_t
result = hipCtxEnablePeerAccess(peer_device_hip->hipContext, 0);
178 if (
result != hipSuccess) {
179 set_error(
string_printf(
"Failed to enable peer access on HIP context (%s)",
180 hipewErrorString(
result)));
185 const HIPContextScope scope(peer_device_hip);
186 hipError_t
result = hipCtxEnablePeerAccess(hipContext, 0);
187 if (
result != hipSuccess) {
188 set_error(
string_printf(
"Failed to enable peer access on HIP context (%s)",
189 hipewErrorString(
result)));
197bool HIPDevice::use_adaptive_compilation()
205string HIPDevice::compile_kernel_get_common_cflags(
const uint kernel_features)
208 const string source_path =
path_get(
"source");
209 const string include_path = source_path;
215 include_path.c_str());
216 if (use_adaptive_compilation()) {
217 cflags +=
" -D__KERNEL_FEATURES__=" +
to_string(kernel_features);
220 const char *extra_cflags = getenv(
"CYCLES_HIP_EXTRA_CFLAGS");
222 cflags += string(
" ") + string(extra_cflags);
226 cflags +=
" -DWITH_NANOVDB";
229# ifdef WITH_CYCLES_DEBUG
230 cflags +=
" -DWITH_CYCLES_DEBUG";
236string HIPDevice::compile_kernel(
const uint kernel_features,
const char *
name,
const char *base)
240 hipDeviceGetAttribute(&major, hipDeviceAttributeComputeCapabilityMajor, hipDevId);
241 hipDeviceGetAttribute(&minor, hipDeviceAttributeComputeCapabilityMinor, hipDevId);
242 const std::string arch = hipDeviceArch(hipDevId);
245 if (!use_adaptive_compilation()) {
247 LOG_INFO <<
"Testing for pre-compiled kernel " << fatbin <<
".";
249 LOG_INFO <<
"Using precompiled kernel.";
255 string source_path =
path_get(
"source");
261 string common_cflags = compile_kernel_get_common_cflags(kernel_features);
264 const char *
const kernel_ext =
"genco";
265 std::string
options =
"-Wno-parentheses-equality -Wno-unused-value -ffast-math -std=c++17";
268 options.append(
" -save-temps");
270 if (major == 9 && minor == 0) {
274 options.append(
" --offload-arch=").append(arch);
276 const string include_path = source_path;
278 "cycles_%s_%s_%s",
name, arch.c_str(), kernel_md5.c_str());
280 LOG_INFO <<
"Testing for locally compiled kernel " << fatbin <<
".";
282 LOG_INFO <<
"Using locally compiled kernel.";
287 if (!use_adaptive_compilation() && have_precompiled_kernels()) {
288 if (!hipSupportsDevice(hipDevId)) {
290 string_printf(
"HIP backend requires compute capability 10.1 or up, but found %d.%d. "
291 "Your GPU is not supported.",
297 string_printf(
"HIP binary kernel for this graphics card compute "
298 "capability (%d.%d) not found.",
307 const char *
const hipcc = hipewCompilerPath();
308 if (hipcc ==
nullptr) {
310 "HIP hipcc compiler not found. "
311 "Install HIP toolkit in default location.");
315 const int hipcc_hip_version = hipewCompilerVersion();
316 LOG_INFO <<
"Found hipcc " << hipcc <<
", HIP version " << hipcc_hip_version <<
".";
325 string command =
string_printf(
"%s %s -I \"%s\" --%s \"%s\" -o \"%s\" %s",
328 include_path.c_str(),
332 common_cflags.c_str());
334 LOG_INFO_IMPORTANT <<
"Compiling " << ((use_adaptive_compilation()) ?
"adaptive " :
"")
335 <<
"HIP kernel ... " << command;
338 command =
"call " + command;
340 if (system(command.c_str()) != 0) {
342 "Failed to execute compilation command, "
343 "see console for details.");
350 "HIP kernel compilation failed, "
351 "see console for details.");
355 LOG_INFO_IMPORTANT <<
"Kernel compilation finished in " << std::fixed << std::setprecision(2)
356 <<
time_dt() - starttime <<
"s";
361bool HIPDevice::load_kernels(
const uint kernel_features)
368 if (use_adaptive_compilation()) {
369 LOG_INFO <<
"Skipping HIP kernel reload for adaptive compilation, not currently supported.";
375 if (hipContext ==
nullptr) {
380 if (!support_device(kernel_features)) {
385 const char *kernel_name =
"kernel";
386 string fatbin = compile_kernel(kernel_features, kernel_name);
387 if (fatbin.empty()) {
392 HIPContextScope scope(
this);
398 result = hipModuleLoadData(&hipModule, fatbin_data.c_str());
401 result = hipErrorFileNotFound;
404 if (
result != hipSuccess) {
406 "Failed to load HIP kernel from '%s' (%s)", fatbin.c_str(), hipewErrorString(
result)));
409 if (
result == hipSuccess) {
411 reserve_local_memory(kernel_features);
414 return (
result == hipSuccess);
417void HIPDevice::reserve_local_memory(
const uint kernel_features)
422 size_t total = 0, free_before = 0, free_after = 0;
425 HIPContextScope scope(
this);
426 hipMemGetInfo(&free_before, &total);
440 HIPDeviceQueue queue(
this);
447 queue.init_execution();
448 queue.enqueue(test_kernel, 1, args);
453 HIPContextScope scope(
this);
454 hipMemGetInfo(&free_after, &total);
462 const size_t keep_mb = 1024;
464 while (free_after > keep_mb * 1024 * 1024LL) {
466 hip_assert(hipMalloc(&tmp, 10 * 1024 * 1024LL));
467 hipMemGetInfo(&free_after, &total);
472void HIPDevice::get_device_memory_info(
size_t &total,
size_t &
free)
474 HIPContextScope scope(
this);
476 hipMemGetInfo(&
free, &total);
479bool HIPDevice::alloc_device(
void *&device_pointer,
const size_t size)
481 HIPContextScope scope(
this);
483 hipError_t mem_alloc_result = hipMalloc((hipDeviceptr_t *)&device_pointer,
size);
484 return mem_alloc_result == hipSuccess;
487void HIPDevice::free_device(
void *device_pointer)
489 HIPContextScope scope(
this);
491 hip_assert(hipFree((hipDeviceptr_t)device_pointer));
494bool HIPDevice::shared_alloc(
void *&shared_pointer,
const size_t size)
496 HIPContextScope scope(
this);
498 hipError_t mem_alloc_result = hipHostMalloc(
499 &shared_pointer,
size, hipHostMallocMapped | hipHostMallocWriteCombined);
501 return mem_alloc_result == hipSuccess;
504void HIPDevice::shared_free(
void *shared_pointer)
506 HIPContextScope scope(
this);
508 hipHostFree(shared_pointer);
511void *HIPDevice::shared_to_device_pointer(
const void *shared_pointer)
513 HIPContextScope scope(
this);
514 void *device_pointer =
nullptr;
516 hipHostGetDevicePointer((hipDeviceptr_t *)&device_pointer, (
void *)shared_pointer, 0));
517 return device_pointer;
520void HIPDevice::copy_host_to_device(
void *device_pointer,
void *host_pointer,
const size_t size)
522 const HIPContextScope scope(
this);
524 hip_assert(hipMemcpyHtoD((hipDeviceptr_t)device_pointer, host_pointer,
size));
530 assert(!
"mem_alloc not supported for textures.");
533 assert(!
"mem_alloc not supported for global memory.");
551 generic_copy_to(mem);
554 generic_copy_to(mem);
570 assert(!
"mem_move_to_host only supported for texture and global memory");
574void HIPDevice::mem_copy_from(
575 device_memory &mem,
const size_t y,
size_t w,
const size_t h,
size_t elem)
578 assert(!
"mem_copy_from not supported for textures.");
581 const size_t size = elem *
w * h;
582 const size_t offset = elem *
y *
w;
585 const HIPContextScope scope(
this);
586 hip_assert(hipMemcpyDtoH(
605 const HIPContextScope scope(
this);
631void HIPDevice::const_copy_to(
const char *
name,
void *host,
const size_t size)
633 HIPContextScope scope(
this);
637 hip_assert(hipModuleGetGlobal(&mem, &bytes, hipModule,
"kernel_params"));
641# define KERNEL_DATA_ARRAY(data_type, data_name) \
642 if (strcmp(name, #data_name) == 0) { \
643 hip_assert(hipMemcpyHtoD(mem + offsetof(KernelParamsHIP, data_name), host, size)); \
648# include "kernel/data_arrays.h"
649# undef KERNEL_DATA_ARRAY
656 generic_copy_to(mem);
666 generic_copy_to(mem);
669 generic_copy_to(mem);
687static hip_Memcpy2D tex_2d_copy_param(
const device_texture &mem,
const int pitch_alignment)
690 const size_t src_pitch = tex_src_pitch(mem);
691 const size_t dst_pitch =
align_up(src_pitch, pitch_alignment);
694 memset(¶m, 0,
sizeof(param));
695 param.dstMemoryType = hipMemoryTypeDevice;
697 param.dstPitch = dst_pitch;
698 param.srcMemoryType = hipMemoryTypeHost;
700 param.srcPitch = src_pitch;
701 param.WidthInBytes = param.srcPitch;
709 HIPContextScope scope(
this);
711 hipTextureAddressMode address_mode = hipAddressModeWrap;
714 address_mode = hipAddressModeWrap;
717 address_mode = hipAddressModeClamp;
720 address_mode = hipAddressModeBorder;
723 address_mode = hipAddressModeMirror;
730 hipTextureFilterMode filter_mode;
732 filter_mode = hipFilterModePoint;
735 filter_mode = hipFilterModeLinear;
742 format = HIP_AD_FORMAT_UNSIGNED_INT8;
745 format = HIP_AD_FORMAT_UNSIGNED_INT16;
748 format = HIP_AD_FORMAT_UNSIGNED_INT32;
751 format = HIP_AD_FORMAT_SIGNED_INT32;
754 format = HIP_AD_FORMAT_FLOAT;
757 format = HIP_AD_FORMAT_HALF;
768 cmem = &device_mem_map[&mem];
773 const size_t dst_pitch =
align_up(tex_src_pitch(mem), pitch_alignment);
774 const size_t dst_size = dst_pitch * mem.
data_height;
776 cmem = generic_alloc(mem, dst_size - mem.
memory_size());
781 const hip_Memcpy2D param = tex_2d_copy_param(mem, pitch_alignment);
782 hip_assert(hipDrvMemcpy2DUnaligned(¶m));
786 cmem = generic_alloc(mem);
799 hipResourceDesc resDesc;
800 memset(&resDesc, 0,
sizeof(resDesc));
803 const size_t dst_pitch =
align_up(tex_src_pitch(mem), pitch_alignment);
805 resDesc.resType = hipResourceTypePitch2D;
807 resDesc.res.pitch2D.format =
format;
811 resDesc.res.pitch2D.pitchInBytes = dst_pitch;
814 resDesc.resType = hipResourceTypeLinear;
816 resDesc.res.linear.format =
format;
821 hipTextureDesc texDesc;
822 memset(&texDesc, 0,
sizeof(texDesc));
823 texDesc.addressMode[0] = address_mode;
824 texDesc.addressMode[1] = address_mode;
825 texDesc.addressMode[2] = address_mode;
826 texDesc.filterMode = filter_mode;
827 texDesc.flags = HIP_TRSF_NORMALIZED_COORDINATES;
830 cmem = &device_mem_map[&mem];
832 if (hipTexObjectCreate(&cmem->texobject, &resDesc, &texDesc,
nullptr) != hipSuccess) {
834 "Failed to create texture. Maximum GPU texture size or available GPU memory was likely "
848 if (slot >= texture_info.size()) {
850 texture_info.resize(slot + 128);
852 texture_info[slot] = tex_info;
853 need_texture_info =
true;
865 bool texture_allocated =
false;
868 texture_allocated = mem.
slot < texture_info.size() && texture_info[mem.
slot].data != 0;
870 if (!texture_allocated) {
877 HIPContextScope scope(
this);
878 const hip_Memcpy2D param = tex_2d_copy_param(mem, pitch_alignment);
879 hip_assert(hipDrvMemcpy2DUnaligned(¶m));
882 generic_copy_to(mem);
889 HIPContextScope scope(
this);
893 auto it = device_mem_map.find(&mem);
894 if (it == device_mem_map.end()) {
898 const Mem &cmem = it->second;
906 if (cmem.texobject) {
908 hipTexObjectDestroy(cmem.texobject);
913 device_mem_map.erase(device_mem_map.find(&mem));
915 else if (cmem.array) {
917 hipArrayDestroy(
reinterpret_cast<hArray
>(cmem.array));
922 device_mem_map.erase(device_mem_map.find(&mem));
932 return make_unique<HIPDeviceQueue>(
this);
944 HIPContextScope scope(
this);
946 switch (interop_device.
type) {
957 int num_all_devices = 0;
958 hip_assert(hipGetDeviceCount(&num_all_devices));
960 if (num_all_devices == 0) {
965 uint num_gl_devices = 0;
966 hipGLGetDevices(&num_gl_devices, gl_devices.data(), num_all_devices, hipGLDeviceListAll);
969 for (hipDevice_t gl_device : gl_devices) {
970 if (gl_device == hipDevice) {
978 LOG_INFO <<
"Graphics interop: found matching OpenGL device for HIP";
981 LOG_INFO <<
"Graphics interop: no matching OpenGL device for HIP";
997int HIPDevice::get_num_multiprocessors()
999 return get_device_default_attribute(hipDeviceAttributeMultiprocessorCount, 0);
1002int HIPDevice::get_max_num_threads_per_multiprocessor()
1004 return get_device_default_attribute(hipDeviceAttributeMaxThreadsPerMultiProcessor, 0);
1007bool HIPDevice::get_device_attribute(hipDeviceAttribute_t attribute,
int *value)
1009 HIPContextScope scope(
this);
1011 return hipDeviceGetAttribute(value, attribute, hipDevice) == hipSuccess;
1014int HIPDevice::get_device_default_attribute(hipDeviceAttribute_t attribute,
1015 const int default_value)
1018 if (!get_device_attribute(attribute, &value)) {
1019 return default_value;
void BLI_kdtree_nd_ free(KDTree *tree)
BMesh const char void * data
unsigned long long int uint64_t
static DBVT_INLINE btScalar size(const btDbvtVolume &a)
SIMD_FORCE_INLINE const btScalar & w() const
Return the w value.
virtual void set_error(const string &error)
void mem_free(const size_t size)
bool is_resident(Device *sub_device) const
size_t memory_elements_size(const int elements)
bool is_shared(Device *sub_device) const
device_ptr device_pointer
static constexpr size_t datatype_size(DataType datatype)
CCL_NAMESPACE_BEGIN struct Options options
#define KERNEL_DATA_ARRAY(type, name)
DebugFlags & DebugFlags()
#define KERNEL_FEATURE_NODE_RAYTRACE
#define KERNEL_FEATURE_MNEE
#define CCL_NAMESPACE_END
static const char * to_string(const Interpolation &interp)
#define assert(assertion)
@ DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE
@ DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE
@ DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_MNEE
#define LOG_INFO_IMPORTANT
string util_md5_string(const string &str)
static void error(const char *str)
string path_cache_get(const string &sub)
string path_get(const string &sub)
string path_files_md5_hash(const string &dir)
string path_join(const string &dir, const string &file)
bool path_exists(const string &path)
void path_create_directories(const string &filepath)
bool path_read_compressed_text(const string &path, string &text)
string string_human_readable_size(size_t size)
string string_human_readable_number(size_t num)
CCL_NAMESPACE_BEGIN string string_printf(const char *format,...)
ccl_device_inline bool is_nanovdb_type(int type)
std::unique_lock< std::mutex > thread_scoped_lock
CCL_NAMESPACE_BEGIN double time_dt()
ccl_device_inline size_t align_up(const size_t offset, const size_t alignment)