34static void get_hiprt_transform(
float matrix[][4],
Transform &tfm)
38 matrix[row][
col++] = tfm.
x.
x;
39 matrix[row][
col++] = tfm.
x.
y;
40 matrix[row][
col++] = tfm.
x.
z;
41 matrix[row][
col++] = tfm.
x.
w;
44 matrix[row][
col++] = tfm.
y.
x;
45 matrix[row][
col++] = tfm.
y.
y;
46 matrix[row][
col++] = tfm.
y.
z;
47 matrix[row][
col++] = tfm.
y.
w;
50 matrix[row][
col++] = tfm.
z.
x;
51 matrix[row][
col++] = tfm.
z.
y;
52 matrix[row][
col++] = tfm.
z.
z;
53 matrix[row][
col++] = tfm.
z.
w;
63HIPRTDevice::HIPRTDevice(
const DeviceInfo &info,
67 : HIPDevice(info, stats, profiler, headless),
71 scratch_buffer_size(0),
73 prim_visibility(this,
"prim_visibility",
MEM_GLOBAL),
74 instance_transform_matrix(this,
"instance_transform_matrix",
MEM_READ_ONLY),
76 user_instance_id(this,
"user_instance_id",
MEM_GLOBAL),
79 custom_prim_info(this,
"custom_prim_info",
MEM_GLOBAL),
80 custom_prim_info_offset(this,
"custom_prim_info_offset",
MEM_GLOBAL),
82 prim_time_offset(this,
"prim_time_offset",
MEM_GLOBAL)
84 HIPContextScope scope(
this);
85 global_stack_buffer = {0};
86 hiprtContextCreationInput hiprt_context_input = {
nullptr};
87 hiprt_context_input.ctxt = hipContext;
88 hiprt_context_input.device = hipDevice;
89 hiprt_context_input.deviceType = hiprtDeviceAMD;
90 hiprtError rt_result = hiprtCreateContext(
91 HIPRT_API_VERSION, hiprt_context_input, &hiprt_context);
93 if (rt_result != hiprtSuccess) {
94 set_error(
"Failed to create HIPRT context");
98 rt_result = hiprtCreateFuncTable(
99 hiprt_context, Max_Primitive_Type, Max_Intersect_Filter_Function, functions_table);
101 if (rt_result != hiprtSuccess) {
102 set_error(
"Failed to create HIPRT Function Table");
107 hiprtSetLogLevel(hiprtLogLevelInfo | hiprtLogLevelWarn | hiprtLogLevelError);
110 hiprtSetLogLevel(hiprtLogLevelNone);
114HIPRTDevice::~HIPRTDevice()
116 HIPContextScope scope(
this);
117 free_bvh_memory_delayed();
118 user_instance_id.free();
119 prim_visibility.free();
120 hiprt_blas_ptr.free();
122 instance_transform_matrix.free();
123 transform_headers.free();
124 custom_prim_info_offset.free();
125 custom_prim_info.free();
126 prim_time_offset.free();
129 hiprtDestroyGlobalStackBuffer(hiprt_context, global_stack_buffer);
130 hiprtDestroyFuncTable(hiprt_context, functions_table);
131 hiprtDestroyScene(hiprt_context, scene);
132 hiprtDestroyContext(hiprt_context);
137 return make_unique<HIPRTDeviceQueue>(
this);
140string HIPRTDevice::compile_kernel_get_common_cflags(
const uint kernel_features)
142 string cflags = HIPDevice::compile_kernel_get_common_cflags(kernel_features);
144 cflags +=
" -D __HIPRT__ ";
149string HIPRTDevice::compile_kernel(
const uint kernel_features,
const char *
name,
const char *base)
152 hipDeviceGetAttribute(&major, hipDeviceAttributeComputeCapabilityMajor, hipDevId);
153 hipDeviceGetAttribute(&minor, hipDeviceAttributeComputeCapabilityMinor, hipDevId);
154 const std::string arch = hipDeviceArch(hipDevId);
156 if (!use_adaptive_compilation()) {
158 LOG_INFO <<
"Testing for pre-compiled kernel " << fatbin <<
".";
160 LOG_INFO <<
"Using precompiled kernel.";
165 string source_path =
path_get(
"source");
168 string common_cflags = compile_kernel_get_common_cflags(kernel_features);
171 const string include_path = source_path;
173 "cycles_%s_%s_%s.hipfb",
name, arch.c_str(), kernel_md5.c_str());
175 const string hiprt_include_path =
path_join(source_path,
"kernel/device/hiprt");
177 LOG_INFO <<
"Testing for locally compiled kernel " << fatbin <<
".";
179 LOG_INFO <<
"Using locally compiled kernel.";
184 if (!use_adaptive_compilation() && have_precompiled_kernels()) {
185 if (!hipSupportsDevice(hipDevId)) {
187 string_printf(
"HIP backend requires compute capability 10.1 or up, but found %d.%d. "
188 "Your GPU is not supported.",
194 string_printf(
"HIP binary kernel for this graphics card compute "
195 "capability (%d.%d) not found.",
203 const char *
const hipcc = hipewCompilerPath();
204 if (hipcc ==
nullptr) {
206 "HIP hipcc compiler not found. "
207 "Install HIP toolkit in default location.");
211 const int hipcc_hip_version = hipewCompilerVersion();
212 LOG_INFO <<
"Found hipcc " << hipcc <<
", HIP version " << hipcc_hip_version <<
".";
213 if (hipcc_hip_version < 40) {
214 LOG_WARNING <<
"Unsupported HIP version " << hipcc_hip_version / 10 <<
"."
215 << hipcc_hip_version % 10 <<
", you need HIP 4.0 or newer.\n";
224 const char *
const kernel_ext =
"genco";
226 options.append(
"-Wno-parentheses-equality -Wno-unused-value -ffast-math -O3 -std=c++17");
227 options.append(
" --offload-arch=").append(arch.c_str());
233 string compile_command =
string_printf(
"%s %s -I %s -I %s --%s %s -o \"%s\" %s",
236 include_path.c_str(),
237 hiprt_include_path.c_str(),
241 common_cflags.c_str());
243 LOG_INFO_IMPORTANT <<
"Compiling " << ((use_adaptive_compilation()) ?
"adaptive " :
"")
244 <<
"HIP-RT kernel ... " << compile_command;
247 compile_command =
"call " + compile_command;
249 if (system(compile_command.c_str()) != 0) {
251 "Failed to execute linking command, "
252 "see console for details.");
256 LOG_INFO_IMPORTANT <<
"Kernel compilation finished in " << std::fixed << std::setprecision(2)
257 <<
time_dt() - starttime <<
"s";
262bool HIPRTDevice::load_kernels(
const uint kernel_features)
265 if (use_adaptive_compilation()) {
266 LOG_INFO <<
"Skipping HIP kernel reload for adaptive compilation, not currently supported.";
271 if (hipContext ==
nullptr) {
275 if (!support_device(kernel_features)) {
286 const char *kernel_name =
"kernel";
287 string fatbin = compile_kernel(kernel_features, kernel_name);
288 if (fatbin.empty()) {
293 HIPContextScope scope(
this);
299 result = hipModuleLoadData(&hipModule, fatbin_data.c_str());
302 result = hipErrorFileNotFound;
305 if (
result != hipSuccess) {
307 "Failed to load HIP kernel from '%s' (%s)", fatbin.c_str(), hipewErrorString(
result)));
310 if (
result == hipSuccess) {
319 HIPRTDeviceQueue queue(
this);
326 queue.init_execution();
327 queue.enqueue(test_kernel, 1, args);
332 return (
result == hipSuccess);
335void HIPRTDevice::const_copy_to(
const char *
name,
void *host,
const size_t size)
337 HIPContextScope scope(
this);
341 if (strcmp(
name,
"data") == 0) {
343 KernelData *
const data = (KernelData *)host;
344 *(hiprtScene *)&
data->device_bvh = scene;
347 hip_assert(hipModuleGetGlobal(&mem, &bytes, hipModule,
"kernel_params"));
350# define KERNEL_DATA_ARRAY(data_type, data_name) \
351 if (strcmp(name, #data_name) == 0) { \
352 hip_assert(hipMemcpyHtoD(mem + offsetof(KernelParamsHIPRT, data_name), host, size)); \
364# include "kernel/data_arrays.h"
365# undef KERNEL_DATA_ARRAY
368hiprtGeometryBuildInput HIPRTDevice::prepare_triangle_blas(BVHHIPRT *bvh,
Mesh *mesh)
370 hiprtGeometryBuildInput geom_input;
377 const size_t num_verts = mesh->get_verts().size();
378 const size_t num_steps = mesh->get_motion_steps();
383 if (bvh->params.num_motion_triangle_steps == 0 || bvh->params.use_spatial_split) {
384 bvh->custom_primitive_bound.alloc(num_triangles);
385 bvh->custom_prim_info.resize(num_triangles);
386 for (
uint j = 0; j < num_triangles; j++) {
395 bvh->custom_primitive_bound[num_bounds] =
bounds;
396 bvh->custom_prim_info[num_bounds].x = j;
403 const int num_bvh_steps = bvh->params.num_motion_triangle_steps * 2 + 1;
404 const float num_bvh_steps_inv_1 = 1.0f / (num_bvh_steps - 1);
406 bvh->custom_primitive_bound.alloc(num_triangles * num_bvh_steps);
407 bvh->custom_prim_info.resize(num_triangles * num_bvh_steps);
408 bvh->prims_time.resize(num_triangles * num_bvh_steps);
410 for (
uint j = 0; j < num_triangles; j++) {
415 prev_bounds.
grow(prev_verts[0]);
416 prev_bounds.
grow(prev_verts[1]);
417 prev_bounds.
grow(prev_verts[2]);
419 for (
int bvh_step = 1; bvh_step < num_bvh_steps; ++bvh_step) {
420 const float curr_time = (
float)(bvh_step)*num_bvh_steps_inv_1;
424 curr_bounds.
grow(curr_verts[0]);
425 curr_bounds.
grow(curr_verts[1]);
426 curr_bounds.
grow(curr_verts[2]);
430 const float prev_time = (
float)(bvh_step - 1) * num_bvh_steps_inv_1;
431 bvh->custom_primitive_bound[num_bounds] =
bounds;
432 bvh->custom_prim_info[num_bounds].x = j;
434 bvh->prims_time[num_bounds].x = curr_time;
435 bvh->prims_time[num_bounds].y = prev_time;
438 prev_bounds = curr_bounds;
443 bvh->custom_prim_aabb.aabbCount = num_bounds;
444 bvh->custom_prim_aabb.aabbStride =
sizeof(
BoundBox);
445 bvh->custom_primitive_bound.copy_to_device();
446 bvh->custom_prim_aabb.aabbs = (
void *)bvh->custom_primitive_bound.device_pointer;
448 geom_input.type = hiprtPrimitiveTypeAABBList;
449 geom_input.primitive.aabbList = bvh->custom_prim_aabb;
450 geom_input.geomType = Motion_Triangle;
452 if (bvh->custom_primitive_bound.device_pointer == 0) {
453 set_error(
"Failed to allocate triangle custom_primitive_bound for BLAS");
457 size_t triangle_size = mesh->get_triangles().size();
458 int *triangle_data = mesh->get_triangles().data();
460 size_t vertex_size = mesh->get_verts().size();
461 float *vertex_data =
reinterpret_cast<float *
>(mesh->get_verts().
data());
464 bvh->triangle_mesh.triangleStride = 3 *
sizeof(int);
465 bvh->triangle_mesh.vertexCount = vertex_size;
466 bvh->triangle_mesh.vertexStride =
sizeof(
float3);
469 int *triangle_index_data = bvh->triangle_index.resize(triangle_size);
470 float *vertex_data_data = bvh->vertex_data.resize(vertex_size * 4);
472 if (triangle_index_data && vertex_data_data) {
473 std::copy_n(triangle_data, triangle_size, triangle_index_data);
474 std::copy_n(vertex_data, vertex_size * 4, vertex_data_data);
475 static_assert(
sizeof(
float3) ==
sizeof(
float) * 4);
477 bvh->triangle_index.copy_to_device();
478 bvh->vertex_data.copy_to_device();
481 bvh->triangle_mesh.triangleIndices = (
void *)(bvh->triangle_index.device_pointer);
482 bvh->triangle_mesh.vertices = (
void *)(bvh->vertex_data.device_pointer);
484 geom_input.type = hiprtPrimitiveTypeTriangleMesh;
485 geom_input.primitive.triangleMesh = bvh->triangle_mesh;
487 if (bvh->triangle_index.device_pointer == 0 || bvh->vertex_data.device_pointer == 0) {
488 set_error(
"Failed to allocate triangle data for BLAS");
495hiprtGeometryBuildInput HIPRTDevice::prepare_curve_blas(BVHHIPRT *bvh,
Hair *hair)
497 hiprtGeometryBuildInput geom_input;
502 const Attribute *curve_attr_mP =
nullptr;
508 if (curve_attr_mP ==
nullptr || bvh->params.num_motion_curve_steps == 0) {
509 bvh->custom_prim_info.resize(num_segments);
510 bvh->custom_primitive_bound.alloc(num_segments);
513 size_t num_boxes = bvh->params.num_motion_curve_steps * 2 * num_segments;
514 bvh->custom_prim_info.resize(num_boxes);
515 bvh->prims_time.resize(num_boxes);
516 bvh->custom_primitive_bound.alloc(num_boxes);
520 float3 *curve_keys = hair->get_curve_keys().data();
522 for (
uint j = 0; j < num_curves; j++) {
524 const float *curve_radius = hair->get_curve_radius().data();
526 for (
int k = 0; k < curve.
num_keys - 1; k++) {
527 if (curve_attr_mP ==
nullptr) {
529 current_keys[0] = curve_keys[
max(first_key + k - 1, first_key)];
530 current_keys[1] = curve_keys[first_key + k];
531 current_keys[2] = curve_keys[first_key + k + 1];
532 current_keys[3] = curve_keys[
min(first_key + k + 2, first_key + curve.
num_keys - 1)];
534 if (current_keys[0].
x == current_keys[1].
x && current_keys[1].
x == current_keys[2].
x &&
535 current_keys[2].
x == current_keys[3].
x && current_keys[0].
y == current_keys[1].
y &&
536 current_keys[1].
y == current_keys[2].
y && current_keys[2].
y == current_keys[3].
y &&
537 current_keys[0].
z == current_keys[1].
z && current_keys[1].
z == current_keys[2].
z &&
538 current_keys[2].
z == current_keys[3].
z)
547 bvh->custom_prim_info[num_bounds].x = j;
548 bvh->custom_prim_info[num_bounds].y = type;
549 bvh->custom_primitive_bound[num_bounds] =
bounds;
554 const size_t num_steps = hair->get_motion_steps();
556 const size_t num_keys = hair->get_curve_keys().size();
558 if (bvh->params.num_motion_curve_steps == 0 || bvh->params.use_spatial_split) {
566 bvh->custom_prim_info[num_bounds].x = j;
567 bvh->custom_prim_info[num_bounds].y = type;
568 bvh->custom_primitive_bound[num_bounds] =
bounds;
573 const int num_bvh_steps = bvh->params.num_motion_curve_steps * 2 + 1;
574 const float num_bvh_steps_inv_1 = 1.0f / (num_bvh_steps - 1);
591 for (
int bvh_step = 1; bvh_step < num_bvh_steps; ++bvh_step) {
592 const float curr_time = (
float)(bvh_step)*num_bvh_steps_inv_1;
610 const float prev_time = (
float)(bvh_step - 1) * num_bvh_steps_inv_1;
612 bvh->custom_prim_info[num_bounds].x = j;
613 bvh->custom_prim_info[num_bounds].y = packed_type;
614 bvh->custom_primitive_bound[num_bounds] =
bounds;
615 bvh->prims_time[num_bounds].x = prev_time;
616 bvh->prims_time[num_bounds].y = curr_time;
619 prev_bounds = curr_bounds;
626 bvh->custom_prim_aabb.aabbCount = num_bounds;
627 bvh->custom_prim_aabb.aabbStride =
sizeof(
BoundBox);
628 bvh->custom_primitive_bound.copy_to_device();
629 bvh->custom_prim_aabb.aabbs = (
void *)bvh->custom_primitive_bound.device_pointer;
631 geom_input.type = hiprtPrimitiveTypeAABBList;
632 geom_input.primitive.aabbList = bvh->custom_prim_aabb;
633 geom_input.geomType =
Curve;
635 if (bvh->custom_primitive_bound.device_pointer == 0) {
636 set_error(
"Failed to allocate curve custom_primitive_bound for BLAS");
642hiprtGeometryBuildInput HIPRTDevice::prepare_point_blas(BVHHIPRT *bvh,
PointCloud *pointcloud)
644 hiprtGeometryBuildInput geom_input;
646 const Attribute *point_attr_mP =
nullptr;
651 const float3 *points_data = pointcloud->get_points().data();
652 const float *radius_data = pointcloud->get_radius().data();
653 const size_t num_points = pointcloud->
num_points();
654 const float4 *motion_data = (point_attr_mP) ? point_attr_mP->
data_float4() :
nullptr;
655 const size_t num_steps = pointcloud->get_motion_steps();
659 if (point_attr_mP ==
nullptr) {
660 bvh->custom_prim_info.resize(num_points);
661 bvh->custom_primitive_bound.alloc(num_points);
662 for (
uint j = 0; j < num_points; j++) {
667 bvh->custom_primitive_bound[num_bounds] =
bounds;
668 bvh->custom_prim_info[num_bounds].x = j;
674 else if (bvh->params.num_motion_point_steps == 0 || bvh->params.use_spatial_split) {
675 bvh->custom_prim_info.resize(num_points);
676 bvh->custom_primitive_bound.alloc(num_points);
678 for (
uint j = 0; j < num_points; j++) {
686 bvh->custom_primitive_bound[num_bounds] =
bounds;
687 bvh->custom_prim_info[num_bounds].x = j;
694 const int num_bvh_steps = bvh->params.num_motion_point_steps * 2 + 1;
695 const float num_bvh_steps_inv_1 = 1.0f / (num_bvh_steps - 1);
697 bvh->custom_prim_info.resize(num_points * num_bvh_steps);
698 bvh->custom_primitive_bound.alloc(num_points * num_bvh_steps);
699 bvh->prims_time.resize(num_points * num_bvh_steps);
701 for (
uint j = 0; j < num_points; j++) {
703 const size_t num_steps = pointcloud->get_motion_steps();
707 points_data, radius_data, point_steps, num_points, num_steps, 0.0f, j);
711 for (
int bvh_step = 1; bvh_step < num_bvh_steps; ++bvh_step) {
712 const float curr_time = (
float)(bvh_step)*num_bvh_steps_inv_1;
714 points_data, radius_data, point_steps, num_points, num_steps, curr_time, j);
720 const float prev_time = (
float)(bvh_step - 1) * num_bvh_steps_inv_1;
721 bvh->custom_primitive_bound[num_bounds] =
bounds;
722 bvh->custom_prim_info[num_bounds].x = j;
724 bvh->prims_time[num_bounds].x = prev_time;
725 bvh->prims_time[num_bounds].y = curr_time;
728 prev_bounds = curr_bounds;
733 bvh->custom_prim_aabb.aabbCount = num_bounds;
734 bvh->custom_prim_aabb.aabbStride =
sizeof(
BoundBox);
735 bvh->custom_primitive_bound.copy_to_device();
736 bvh->custom_prim_aabb.aabbs = (
void *)bvh->custom_primitive_bound.device_pointer;
738 geom_input.type = hiprtPrimitiveTypeAABBList;
739 geom_input.primitive.aabbList = bvh->custom_prim_aabb;
740 geom_input.geomType =
Point;
742 if (bvh->custom_primitive_bound.device_pointer == 0) {
743 set_error(
"Failed to allocate point custom_primitive_bound for BLAS");
749void HIPRTDevice::build_blas(BVHHIPRT *bvh,
Geometry *geom, hiprtBuildOptions
options)
751 hiprtGeometryBuildInput geom_input = {};
756 Mesh *mesh =
static_cast<Mesh *
>(geom);
762 geom_input = prepare_triangle_blas(bvh, mesh);
767 Hair *
const hair =
static_cast<Hair *const
>(geom);
773 geom_input = prepare_curve_blas(bvh, hair);
783 geom_input = prepare_point_blas(bvh, pointcloud);
791 assert(geom_input.geomType != hiprtInvalidValue);
798 size_t blas_scratch_buffer_size = 0;
799 hiprtError rt_err = hiprtGetGeometryBuildTemporaryBufferSize(
800 hiprt_context, geom_input,
options, blas_scratch_buffer_size);
802 if (rt_err != hiprtSuccess) {
803 set_error(
"Failed to get scratch buffer size for BLAS");
807 rt_err = hiprtCreateGeometry(hiprt_context, geom_input,
options, bvh->hiprt_geom);
809 if (rt_err != hiprtSuccess) {
810 set_error(
"Failed to create BLAS");
815 if (blas_scratch_buffer_size > scratch_buffer_size) {
816 scratch_buffer.alloc(blas_scratch_buffer_size);
817 scratch_buffer.zero_to_device();
818 if (!scratch_buffer.device_pointer) {
819 hiprtDestroyGeometry(hiprt_context, bvh->hiprt_geom);
820 bvh->hiprt_geom =
nullptr;
821 set_error(
"Failed to allocate scratch buffer for BLAS");
824 scratch_buffer_size = blas_scratch_buffer_size;
826 bvh->geom_input = geom_input;
827 rt_err = hiprtBuildGeometry(hiprt_context,
828 hiprtBuildOperationBuild,
831 (
void *)(scratch_buffer.device_pointer),
835 if (rt_err != hiprtSuccess) {
836 set_error(
"Failed to build BLAS");
840hiprtScene HIPRTDevice::build_tlas(BVHHIPRT *bvh,
846 size_t num_object = objects.size();
847 if (num_object == 0) {
851 hiprtBuildOperation build_operation =
refit ? hiprtBuildOperationUpdate :
852 hiprtBuildOperationBuild;
856 unordered_map<Geometry *, int2> prim_info_map;
857 size_t custom_prim_offset = 0;
859 unordered_map<Geometry *, int> prim_time_map;
861 size_t num_instances = 0;
862 int blender_instance_id = 0;
864 user_instance_id.alloc(num_object);
865 prim_visibility.alloc(num_object);
866 hiprt_blas_ptr.alloc(num_object);
867 blas_ptr.alloc(num_object);
868 transform_headers.alloc(num_object);
869 custom_prim_info_offset.alloc(num_object);
870 prim_time_offset.alloc(num_object);
872 for (
Object *ob : objects) {
874 if (ob->is_traceable()) {
875 mask = ob->visibility_for_tracing();
878 Transform current_transform = ob->get_tfm();
879 Geometry *geom = ob->get_geometry();
882 BVHHIPRT *current_bvh =
static_cast<BVHHIPRT *
>(geom->
bvh.get());
883 bool is_valid_geometry = current_bvh->geom_input.geomType != hiprtInvalidValue;
884 hiprtGeometry hiprt_geom_current = current_bvh->hiprt_geom;
886 hiprtFrameMatrix hiprt_transform_matrix = {{{0}}};
888 get_hiprt_transform(hiprt_transform_matrix.matrix, identity_matrix);
890 if (is_valid_geometry) {
891 bool is_custom_prim = current_bvh->custom_prim_info.size() > 0;
893 if (is_custom_prim) {
895 bool has_motion_blur = current_bvh->prims_time.size() > 0;
897 unordered_map<Geometry *, int2>::iterator it = prim_info_map.find(geom);
899 if (prim_info_map.find(geom) != prim_info_map.end()) {
901 custom_prim_info_offset[blender_instance_id] = it->second;
903 if (has_motion_blur) {
905 prim_time_offset[blender_instance_id] = prim_time_map[geom];
909 int offset = bvh->custom_prim_info.size();
911 prim_info_map[geom].x = offset;
912 prim_info_map[geom].y = custom_prim_offset;
914 bvh->custom_prim_info.resize(offset + current_bvh->custom_prim_info.size());
915 memcpy(bvh->custom_prim_info.data() + offset,
916 current_bvh->custom_prim_info.data(),
917 current_bvh->custom_prim_info.size() *
sizeof(
int2));
919 custom_prim_info_offset[blender_instance_id].x = offset;
920 custom_prim_info_offset[blender_instance_id].y = custom_prim_offset;
923 custom_prim_offset += ((
Hair *)geom)->num_curves();
926 custom_prim_offset += ((
PointCloud *)geom)->num_points();
929 custom_prim_offset += ((
Mesh *)geom)->num_triangles();
932 if (has_motion_blur) {
933 int time_offset = bvh->prims_time.size();
934 prim_time_map[geom] = time_offset;
936 bvh->prims_time.resize(time_offset + current_bvh->prims_time.size());
937 memcpy(bvh->prims_time.data() + time_offset,
938 current_bvh->prims_time.data(),
939 current_bvh->prims_time.size() *
sizeof(
float2));
941 prim_time_offset[blender_instance_id] = time_offset;
944 prim_time_offset[blender_instance_id] = -1;
949 custom_prim_info_offset[blender_instance_id] = {-1, -1};
952 hiprtTransformHeader current_header = {0};
953 current_header.frameCount = 1;
954 current_header.frameIndex = transform_matrix.
size();
955 if (use_motion_blur && ob->get_motion().size()) {
956 int motion_size = ob->get_motion().size();
960 float time_iternval = 1 / (
float)(motion_size - 1);
961 current_header.frameCount = motion_size;
964 tfm_hiprt_mb.resize(motion_size);
965 for (
int i = 0;
i < motion_size;
i++) {
966 get_hiprt_transform(tfm_hiprt_mb[
i].matrix, tfm_array[
i]);
967 tfm_hiprt_mb[
i].time = (
float)
i * time_iternval;
972 if (transform_applied) {
973 current_transform = identity_matrix;
975 get_hiprt_transform(hiprt_transform_matrix.matrix, current_transform);
979 transform_headers[num_instances] = current_header;
981 user_instance_id[num_instances] = blender_instance_id;
982 prim_visibility[num_instances] =
mask;
983 hiprt_blas_ptr[num_instances].geometry = hiprt_geom_current;
984 hiprt_blas_ptr[num_instances].type = hiprtInstanceTypeGeometry;
987 blas_ptr[blender_instance_id] = (
uint64_t)hiprt_geom_current;
988 blender_instance_id++;
991 size_t table_ptr_size = 0;
992 hipDeviceptr_t table_device_ptr;
994 hip_assert(hipModuleGetGlobal(&table_device_ptr, &table_ptr_size, hipModule,
"kernel_params"));
999 size_t kernel_param_offset[4];
1000 int table_index = 0;
1006 for (
int index = 0; index < table_index; index++) {
1007 hip_assert(hipMemcpyHtoD(table_device_ptr + kernel_param_offset[index],
1008 (
void *)&functions_table,
1015 if (num_instances == 0) {
1019 int frame_count = transform_matrix.
size();
1020 hiprtSceneBuildInput scene_input_ptr = {
nullptr};
1021 scene_input_ptr.instanceCount = num_instances;
1022 scene_input_ptr.frameCount = frame_count;
1023 scene_input_ptr.frameType = hiprtFrameTypeMatrix;
1025 user_instance_id.copy_to_device();
1026 prim_visibility.copy_to_device();
1027 hiprt_blas_ptr.copy_to_device();
1028 blas_ptr.copy_to_device();
1029 transform_headers.copy_to_device();
1031 if (user_instance_id.device_pointer == 0 || prim_visibility.device_pointer == 0 ||
1032 hiprt_blas_ptr.device_pointer == 0 || blas_ptr.device_pointer == 0 ||
1033 transform_headers.device_pointer == 0)
1035 set_error(
"Failed to allocate object buffers for TLAS");
1041 hiprtFrameMatrix *instance_transform_matrix_data = instance_transform_matrix.resize(
1043 if (instance_transform_matrix_data ==
nullptr) {
1044 set_error(
"Failed to allocate host instance_transform_matrix for TLAS");
1048 std::copy_n(transform_matrix.
data(), frame_count, instance_transform_matrix_data);
1049 instance_transform_matrix.copy_to_device();
1051 if (instance_transform_matrix.device_pointer == 0) {
1052 set_error(
"Failed to allocate instance_transform_matrix for TLAS");
1057 scene_input_ptr.instanceMasks = (
void *)prim_visibility.device_pointer;
1058 scene_input_ptr.instances = (
void *)hiprt_blas_ptr.device_pointer;
1059 scene_input_ptr.instanceTransformHeaders = (
void *)transform_headers.device_pointer;
1060 scene_input_ptr.instanceFrames = (
void *)instance_transform_matrix.device_pointer;
1062 hiprtScene scene =
nullptr;
1064 hiprtError rt_err = hiprtCreateScene(hiprt_context, scene_input_ptr,
options, scene);
1066 if (rt_err != hiprtSuccess) {
1067 set_error(
"Failed to create TLAS");
1071 size_t tlas_scratch_buffer_size;
1072 rt_err = hiprtGetSceneBuildTemporaryBufferSize(
1073 hiprt_context, scene_input_ptr,
options, tlas_scratch_buffer_size);
1075 if (rt_err != hiprtSuccess) {
1076 set_error(
"Failed to get scratch buffer size for TLAS");
1077 hiprtDestroyScene(hiprt_context, scene);
1081 if (tlas_scratch_buffer_size > scratch_buffer_size) {
1082 scratch_buffer.alloc(tlas_scratch_buffer_size);
1083 scratch_buffer.zero_to_device();
1084 if (scratch_buffer.device_pointer == 0) {
1085 set_error(
"Failed to allocate scratch buffer for TLAS");
1086 hiprtDestroyScene(hiprt_context, scene);
1091 rt_err = hiprtBuildScene(hiprt_context,
1095 (
void *)scratch_buffer.device_pointer,
1099 scratch_buffer.free();
1100 scratch_buffer_size = 0;
1102 if (rt_err != hiprtSuccess) {
1103 set_error(
"Failed to build TLAS");
1104 hiprtDestroyScene(hiprt_context, scene);
1108 if (bvh->custom_prim_info.size()) {
1110 const size_t data_size = bvh->custom_prim_info.size();
1111 int2 *custom_prim_info_data = custom_prim_info.resize(data_size);
1112 if (custom_prim_info_data ==
nullptr) {
1113 set_error(
"Failed to allocate host custom_prim_info_data for TLAS");
1114 hiprtDestroyScene(hiprt_context, scene);
1118 std::copy_n(bvh->custom_prim_info.data(), data_size, custom_prim_info_data);
1120 custom_prim_info.copy_to_device();
1121 custom_prim_info_offset.copy_to_device();
1122 if (custom_prim_info.device_pointer == 0 || custom_prim_info_offset.device_pointer == 0) {
1123 set_error(
"Failed to allocate custom_prim_info_offset for TLAS");
1124 hiprtDestroyScene(hiprt_context, scene);
1129 if (bvh->prims_time.size()) {
1131 const size_t data_size = bvh->prims_time.size();
1132 float2 *prims_time_data = prims_time.resize(data_size);
1133 if (prims_time_data ==
nullptr) {
1134 set_error(
"Failed to allocate host prims_time for TLAS");
1135 hiprtDestroyScene(hiprt_context, scene);
1139 std::copy_n(bvh->prims_time.data(), data_size, prims_time_data);
1141 prims_time.copy_to_device();
1142 prim_time_offset.copy_to_device();
1144 if (prim_time_offset.device_pointer == 0 || prims_time.device_pointer == 0) {
1145 set_error(
"Failed to allocate prims_time for TLAS");
1146 hiprtDestroyScene(hiprt_context, scene);
1154void HIPRTDevice::free_bvh_memory_delayed()
1157 if (stale_bvh.size()) {
1158 for (
int bvh_index = 0; bvh_index < stale_bvh.size(); bvh_index++) {
1159 hiprtGeometry hiprt_geom = stale_bvh[bvh_index];
1160 hiprtDestroyGeometry(hiprt_context, hiprt_geom);
1161 hiprt_geom =
nullptr;
1167void HIPRTDevice::release_bvh(
BVH *bvh)
1169 BVHHIPRT *current_bvh =
static_cast<BVHHIPRT *
>(bvh);
1172 stale_bvh.push_back(current_bvh->hiprt_geom);
1180 free_bvh_memory_delayed();
1181 progress.
set_substatus(
"Building HIPRT acceleration structure");
1184 options.buildFlags = hiprtBuildFlagBitPreferHighQualityBuild;
1186 BVHHIPRT *bvh_rt =
static_cast<BVHHIPRT *
>(bvh);
1187 HIPContextScope scope(
this);
1189 if (!bvh_rt->is_tlas()) {
1191 assert(geometry.size() == 1);
1192 build_blas(bvh_rt, geometry[0],
options);
1197 hiprtDestroyScene(hiprt_context, scene);
1200 scene = build_tlas(bvh_rt, bvh_rt->objects,
options,
refit);
BMesh const char void * data
unsigned long long int uint64_t
static DBVT_INLINE btScalar size(const btDbvtVolume &a)
static btDbvtVolume bounds(btDbvtNode **leaves, int count)
void refit(btStridingMeshInterface *triangles, const btVector3 &aabbMin, const btVector3 &aabbMax)
SIMD_FORCE_INLINE const btScalar & z() const
Return the z value.
Attribute * find(ustring name) const
bool is_pointcloud() const
virtual bool has_motion_blur() const
Curve get_curve(const size_t i) const
size_t num_curves() const
size_t num_segments() const
PrimitiveType primitive_type() const override
void set_substatus(const string &substatus_)
void push_back_slow(const T &t)
CCL_NAMESPACE_BEGIN struct Options options
#define KERNEL_DATA_ARRAY(type, name)
#define PRIMITIVE_PACK_SEGMENT(type, segment)
#define KERNEL_FEATURE_OBJECT_MOTION
#define KERNEL_FEATURE_NODE_RAYTRACE
#define KERNEL_FEATURE_MNEE
#define CCL_NAMESPACE_END
#define assert(assertion)
VecBase< float, D > step(VecOp< float, D >, VecOp< float, D >) RET
@ ATTR_STD_MOTION_VERTEX_POSITION
@ DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE
@ DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE
@ DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_MNEE
#define LOG_INFO_IMPORTANT
ccl_device_inline float2 mask(const MaskType mask, const float2 a)
string util_md5_string(const string &str)
string path_cache_get(const string &sub)
string path_get(const string &sub)
string path_files_md5_hash(const string &dir)
string path_join(const string &dir, const string &file)
bool path_exists(const string &path)
void path_create_directories(const string &filepath)
bool path_read_compressed_text(const string &path, string &text)
CCL_NAMESPACE_BEGIN string string_printf(const char *format,...)
__forceinline void grow(const float3 &pt)
void bounds_grow(const int k, const float3 *curve_keys, const float *curve_radius, BoundBox &bounds) const
void cardinal_motion_keys(const float3 *curve_keys, const float *curve_radius, const float4 *key_steps, const size_t num_curve_keys, const size_t num_steps, const float time, size_t k0, size_t k1, size_t k2, size_t k3, float4 r_keys[4]) const
void bounds_grow(const float3 *verts, BoundBox &bounds) const
void motion_verts(const float3 *verts, const float3 *vert_steps, const size_t num_verts, const size_t num_steps, const float time, float3 r_verts[3]) const
bool has_motion_blur() const override
size_t num_triangles() const
Triangle get_triangle(const size_t i) const
PrimitiveType primitive_type() const override
void bounds_grow(const float3 *points, const float *radius, BoundBox &bounds) const
float4 motion_key(const float3 *points, const float *radius, const float4 *point_steps, const size_t num_points, const size_t num_steps, const float time, size_t p) const
Point get_point(const int i) const
size_t num_points() const
std::unique_lock< std::mutex > thread_scoped_lock
CCL_NAMESPACE_BEGIN double time_dt()