32static void get_hiprt_transform(
float matrix[][4],
Transform &tfm)
36 matrix[row][
col++] = tfm.
x.x;
37 matrix[row][
col++] = tfm.
x.y;
38 matrix[row][
col++] = tfm.
x.z;
39 matrix[row][
col++] = tfm.
x.w;
42 matrix[row][
col++] = tfm.
y.x;
43 matrix[row][
col++] = tfm.
y.y;
44 matrix[row][
col++] = tfm.
y.z;
45 matrix[row][
col++] = tfm.
y.w;
48 matrix[row][
col++] = tfm.
z.x;
49 matrix[row][
col++] = tfm.
z.y;
50 matrix[row][
col++] = tfm.
z.z;
51 matrix[row][
col++] = tfm.
z.w;
61HIPRTDevice::HIPRTDevice(
const DeviceInfo &info,
65 : HIPDevice(info, stats, profiler, headless),
69 scratch_buffer_size(0),
78 custom_prim_info_offset(
this,
"custom_prim_info_offset",
MEM_GLOBAL),
82 HIPContextScope scope(
this);
83 global_stack_buffer = {0};
84 hiprtContextCreationInput hiprt_context_input = {
nullptr};
85 hiprt_context_input.ctxt = hipContext;
86 hiprt_context_input.device = hipDevice;
87 hiprt_context_input.deviceType = hiprtDeviceAMD;
88 hiprtError rt_result = hiprtCreateContext(
89 HIPRT_API_VERSION, hiprt_context_input, &hiprt_context);
91 if (rt_result != hiprtSuccess) {
92 set_error(
"Failed to create HIPRT context");
96 rt_result = hiprtCreateFuncTable(
97 hiprt_context, Max_Primitive_Type, Max_Intersect_Filter_Function, functions_table);
99 if (rt_result != hiprtSuccess) {
100 set_error(
"Failed to create HIPRT Function Table");
105 hiprtSetLogLevel(hiprtLogLevelInfo | hiprtLogLevelWarn | hiprtLogLevelError);
108 hiprtSetLogLevel(hiprtLogLevelNone);
112HIPRTDevice::~HIPRTDevice()
114 HIPContextScope scope(
this);
115 user_instance_id.free();
116 prim_visibility.free();
117 hiprt_blas_ptr.free();
119 instance_transform_matrix.free();
120 transform_headers.free();
121 custom_prim_info_offset.free();
122 custom_prim_info.free();
123 prim_time_offset.free();
126 hiprtDestroyGlobalStackBuffer(hiprt_context, global_stack_buffer);
127 hiprtDestroyFuncTable(hiprt_context, functions_table);
128 hiprtDestroyScene(hiprt_context, scene);
129 hiprtDestroyContext(hiprt_context);
134 return make_unique<HIPRTDeviceQueue>(
this);
137string HIPRTDevice::compile_kernel_get_common_cflags(
const uint kernel_features)
139 string cflags = HIPDevice::compile_kernel_get_common_cflags(kernel_features);
141 cflags +=
" -D __HIPRT__ ";
146string HIPRTDevice::compile_kernel(
const uint kernel_features,
const char *name,
const char *base)
149 hipDeviceGetAttribute(&major, hipDeviceAttributeComputeCapabilityMajor, hipDevId);
150 hipDeviceGetAttribute(&minor, hipDeviceAttributeComputeCapabilityMinor, hipDevId);
151 const std::string arch = hipDeviceArch(hipDevId);
153 if (!use_adaptive_compilation()) {
155 VLOG(1) <<
"Testing for pre-compiled kernel " << fatbin <<
".";
157 VLOG(1) <<
"Using precompiled kernel.";
162 string source_path =
path_get(
"source");
165 string common_cflags = compile_kernel_get_common_cflags(kernel_features);
168 const string include_path = source_path;
170 "cycles_%s_%s_%s.hipfb", name, arch.c_str(), kernel_md5.c_str());
172 const string hiprt_include_path =
path_join(source_path,
"kernel/device/hiprt");
174 VLOG(1) <<
"Testing for locally compiled kernel " << fatbin <<
".";
176 VLOG(1) <<
"Using locally compiled kernel.";
181 if (!use_adaptive_compilation() && have_precompiled_kernels()) {
182 if (!hipSupportsDevice(hipDevId)) {
184 string_printf(
"HIP backend requires compute capability 10.1 or up, but found %d.%d. "
185 "Your GPU is not supported.",
191 string_printf(
"HIP binary kernel for this graphics card compute "
192 "capability (%d.%d) not found.",
200 const char *
const hipcc = hipewCompilerPath();
201 if (hipcc ==
nullptr) {
203 "HIP hipcc compiler not found. "
204 "Install HIP toolkit in default location.");
208 const int hipcc_hip_version = hipewCompilerVersion();
209 VLOG_INFO <<
"Found hipcc " << hipcc <<
", HIP version " << hipcc_hip_version <<
".";
210 if (hipcc_hip_version < 40) {
212 "Unsupported HIP version %d.%d detected, "
213 "you need HIP 4.0 or newer.\n",
214 hipcc_hip_version / 10,
215 hipcc_hip_version % 10);
224 const char *
const kernel_ext =
"genco";
227 "-Wno-parentheses-equality -Wno-unused-value -ffast-math -O3 -std=c++17 -D __HIPRT__");
228 options.append(
" --offload-arch=").append(arch.c_str());
229 if (hipNeedPreciseMath(arch)) {
231 " -fhip-fp32-correctly-rounded-divide-sqrt -fno-gpu-approx-transcendentals "
232 "-fgpu-flush-denormals-to-zero -ffp-contract=off");
235 options.append(
" -D WITH_NANOVDB");
238 printf(
"Compiling %s and caching to %s", source_path.c_str(), fatbin.c_str());
242 string compile_command =
string_printf(
"%s %s -I %s -I %s --%s %s -o \"%s\"",
245 include_path.c_str(),
246 hiprt_include_path.c_str(),
252 compile_command =
"call " + compile_command;
254 if (system(compile_command.c_str()) != 0) {
256 "Failed to execute linking command, "
257 "see console for details.");
261 printf(
"Kernel compilation finished in %.2lfs.\n",
time_dt() - starttime);
266bool HIPRTDevice::load_kernels(
const uint kernel_features)
269 if (use_adaptive_compilation()) {
270 VLOG(1) <<
"Skipping HIP kernel reload for adaptive compilation, not currently supported.";
275 if (hipContext ==
nullptr) {
279 if (!support_device(kernel_features)) {
290 const char *kernel_name =
"kernel";
291 string fatbin = compile_kernel(kernel_features, kernel_name);
292 if (fatbin.empty()) {
297 HIPContextScope scope(
this);
303 result = hipModuleLoadData(&hipModule, fatbin_data.c_str());
306 result = hipErrorFileNotFound;
309 if (
result != hipSuccess) {
311 "Failed to load HIP kernel from '%s' (%s)", fatbin.c_str(), hipewErrorString(
result)));
314 if (
result == hipSuccess) {
323 HIPRTDeviceQueue queue(
this);
330 queue.init_execution();
331 queue.enqueue(test_kernel, 1, args);
336 return (
result == hipSuccess);
339void HIPRTDevice::const_copy_to(
const char *name,
void *host,
const size_t size)
341 HIPContextScope scope(
this);
345 if (strcmp(name,
"data") == 0) {
347 KernelData *
const data = (KernelData *)host;
348 *(hiprtScene *)&
data->device_bvh = scene;
351 hip_assert(hipModuleGetGlobal(&mem, &bytes, hipModule,
"kernel_params"));
354# define KERNEL_DATA_ARRAY(data_type, data_name) \
355 if (strcmp(name, #data_name) == 0) { \
356 hip_assert(hipMemcpyHtoD(mem + offsetof(KernelParamsHIPRT, data_name), host, size)); \
368# include "kernel/data_arrays.h"
369# undef KERNEL_DATA_ARRAY
372hiprtGeometryBuildInput HIPRTDevice::prepare_triangle_blas(BVHHIPRT *bvh,
Mesh *mesh)
374 hiprtGeometryBuildInput geom_input;
381 const size_t num_verts = mesh->get_verts().size();
382 const size_t num_steps = mesh->get_motion_steps();
387 if (bvh->params.num_motion_triangle_steps == 0 || bvh->params.use_spatial_split) {
388 bvh->custom_primitive_bound.alloc(num_triangles);
389 bvh->custom_prim_info.resize(num_triangles);
390 for (
uint j = 0; j < num_triangles; j++) {
399 bvh->custom_primitive_bound[num_bounds] =
bounds;
400 bvh->custom_prim_info[num_bounds].x = j;
407 const int num_bvh_steps = bvh->params.num_motion_triangle_steps * 2 + 1;
408 const float num_bvh_steps_inv_1 = 1.0f / (num_bvh_steps - 1);
410 bvh->custom_primitive_bound.alloc(num_triangles * num_bvh_steps);
411 bvh->custom_prim_info.resize(num_triangles * num_bvh_steps);
412 bvh->prims_time.resize(num_triangles * num_bvh_steps);
414 for (
uint j = 0; j < num_triangles; j++) {
419 prev_bounds.
grow(prev_verts[0]);
420 prev_bounds.
grow(prev_verts[1]);
421 prev_bounds.
grow(prev_verts[2]);
423 for (
int bvh_step = 1; bvh_step < num_bvh_steps; ++bvh_step) {
424 const float curr_time = (float)(bvh_step)*num_bvh_steps_inv_1;
428 curr_bounds.
grow(curr_verts[0]);
429 curr_bounds.
grow(curr_verts[1]);
430 curr_bounds.
grow(curr_verts[2]);
434 const float prev_time = (float)(bvh_step - 1) * num_bvh_steps_inv_1;
435 bvh->custom_primitive_bound[num_bounds] =
bounds;
436 bvh->custom_prim_info[num_bounds].x = j;
438 bvh->prims_time[num_bounds].x = curr_time;
439 bvh->prims_time[num_bounds].y = prev_time;
442 prev_bounds = curr_bounds;
447 bvh->custom_prim_aabb.aabbCount = num_bounds;
448 bvh->custom_prim_aabb.aabbStride =
sizeof(
BoundBox);
449 bvh->custom_primitive_bound.copy_to_device();
450 bvh->custom_prim_aabb.aabbs = (
void *)bvh->custom_primitive_bound.device_pointer;
452 geom_input.type = hiprtPrimitiveTypeAABBList;
453 geom_input.primitive.aabbList = bvh->custom_prim_aabb;
454 geom_input.geomType = Motion_Triangle;
456 if (bvh->custom_primitive_bound.device_pointer == 0) {
457 set_error(
"Failed to allocate triangle custom_primitive_bound for BLAS");
461 size_t triangle_size = mesh->get_triangles().size();
462 int *triangle_data = mesh->get_triangles().data();
464 size_t vertex_size = mesh->get_verts().size();
465 float *vertex_data =
reinterpret_cast<float *
>(mesh->get_verts().
data());
468 bvh->triangle_mesh.triangleStride = 3 *
sizeof(int);
469 bvh->triangle_mesh.vertexCount = vertex_size;
470 bvh->triangle_mesh.vertexStride =
sizeof(
float3);
473 int *triangle_index_data = bvh->triangle_index.resize(triangle_size);
474 float *vertex_data_data = bvh->vertex_data.resize(vertex_size * 4);
476 if (triangle_index_data && vertex_data_data) {
477 std::copy_n(triangle_data, triangle_size, triangle_index_data);
478 std::copy_n(vertex_data, vertex_size * 4, vertex_data_data);
479 static_assert(
sizeof(
float3) ==
sizeof(float) * 4);
481 bvh->triangle_index.copy_to_device();
482 bvh->vertex_data.copy_to_device();
485 bvh->triangle_mesh.triangleIndices = (
void *)(bvh->triangle_index.device_pointer);
486 bvh->triangle_mesh.vertices = (
void *)(bvh->vertex_data.device_pointer);
488 geom_input.type = hiprtPrimitiveTypeTriangleMesh;
489 geom_input.primitive.triangleMesh = bvh->triangle_mesh;
491 if (bvh->triangle_index.device_pointer == 0 || bvh->vertex_data.device_pointer == 0) {
492 set_error(
"Failed to allocate triangle data for BLAS");
499hiprtGeometryBuildInput HIPRTDevice::prepare_curve_blas(BVHHIPRT *bvh,
Hair *hair)
501 hiprtGeometryBuildInput geom_input;
506 const Attribute *curve_attr_mP =
nullptr;
512 if (curve_attr_mP ==
nullptr || bvh->params.num_motion_curve_steps == 0) {
513 bvh->custom_prim_info.resize(num_segments);
514 bvh->custom_primitive_bound.alloc(num_segments);
517 size_t num_boxes = bvh->params.num_motion_curve_steps * 2 * num_segments;
518 bvh->custom_prim_info.resize(num_boxes);
519 bvh->prims_time.resize(num_boxes);
520 bvh->custom_primitive_bound.alloc(num_boxes);
524 float3 *curve_keys = hair->get_curve_keys().data();
526 for (
uint j = 0; j < num_curves; j++) {
528 const float *curve_radius = hair->get_curve_radius().data();
530 for (
int k = 0; k < curve.
num_keys - 1; k++) {
531 if (curve_attr_mP ==
nullptr) {
533 current_keys[0] = curve_keys[
max(first_key + k - 1, first_key)];
534 current_keys[1] = curve_keys[first_key + k];
535 current_keys[2] = curve_keys[first_key + k + 1];
536 current_keys[3] = curve_keys[
min(first_key + k + 2, first_key + curve.
num_keys - 1)];
538 if (current_keys[0].
x == current_keys[1].
x && current_keys[1].
x == current_keys[2].
x &&
539 current_keys[2].
x == current_keys[3].
x && current_keys[0].
y == current_keys[1].
y &&
540 current_keys[1].
y == current_keys[2].
y && current_keys[2].
y == current_keys[3].
y &&
541 current_keys[0].
z == current_keys[1].
z && current_keys[1].
z == current_keys[2].
z &&
542 current_keys[2].
z == current_keys[3].
z)
551 bvh->custom_prim_info[num_bounds].x = j;
552 bvh->custom_prim_info[num_bounds].y = type;
553 bvh->custom_primitive_bound[num_bounds] =
bounds;
558 const size_t num_steps = hair->get_motion_steps();
560 const size_t num_keys = hair->get_curve_keys().size();
562 if (bvh->params.num_motion_curve_steps == 0 || bvh->params.use_spatial_split) {
570 bvh->custom_prim_info[num_bounds].x = j;
571 bvh->custom_prim_info[num_bounds].y = type;
572 bvh->custom_primitive_bound[num_bounds] =
bounds;
577 const int num_bvh_steps = bvh->params.num_motion_curve_steps * 2 + 1;
578 const float num_bvh_steps_inv_1 = 1.0f / (num_bvh_steps - 1);
595 for (
int bvh_step = 1; bvh_step < num_bvh_steps; ++bvh_step) {
596 const float curr_time = (float)(bvh_step)*num_bvh_steps_inv_1;
614 const float prev_time = (float)(bvh_step - 1) * num_bvh_steps_inv_1;
616 bvh->custom_prim_info[num_bounds].x = j;
617 bvh->custom_prim_info[num_bounds].y = packed_type;
618 bvh->custom_primitive_bound[num_bounds] =
bounds;
619 bvh->prims_time[num_bounds].x = prev_time;
620 bvh->prims_time[num_bounds].y = curr_time;
623 prev_bounds = curr_bounds;
630 bvh->custom_prim_aabb.aabbCount = num_bounds;
631 bvh->custom_prim_aabb.aabbStride =
sizeof(
BoundBox);
632 bvh->custom_primitive_bound.copy_to_device();
633 bvh->custom_prim_aabb.aabbs = (
void *)bvh->custom_primitive_bound.device_pointer;
635 geom_input.type = hiprtPrimitiveTypeAABBList;
636 geom_input.primitive.aabbList = bvh->custom_prim_aabb;
637 geom_input.geomType =
Curve;
639 if (bvh->custom_primitive_bound.device_pointer == 0) {
640 set_error(
"Failed to allocate curve custom_primitive_bound for BLAS");
646hiprtGeometryBuildInput HIPRTDevice::prepare_point_blas(BVHHIPRT *bvh,
PointCloud *pointcloud)
648 hiprtGeometryBuildInput geom_input;
650 const Attribute *point_attr_mP =
nullptr;
655 const float3 *points_data = pointcloud->get_points().data();
656 const float *radius_data = pointcloud->get_radius().data();
657 const size_t num_points = pointcloud->
num_points();
658 const float4 *motion_data = (point_attr_mP) ? point_attr_mP->
data_float4() :
nullptr;
659 const size_t num_steps = pointcloud->get_motion_steps();
663 if (point_attr_mP ==
nullptr) {
664 bvh->custom_prim_info.resize(num_points);
665 bvh->custom_primitive_bound.alloc(num_points);
666 for (
uint j = 0; j < num_points; j++) {
671 bvh->custom_primitive_bound[num_bounds] =
bounds;
672 bvh->custom_prim_info[num_bounds].x = j;
678 else if (bvh->params.num_motion_point_steps == 0 || bvh->params.use_spatial_split) {
679 bvh->custom_prim_info.resize(num_points);
680 bvh->custom_primitive_bound.alloc(num_points);
682 for (
uint j = 0; j < num_points; j++) {
690 bvh->custom_primitive_bound[num_bounds] =
bounds;
691 bvh->custom_prim_info[num_bounds].x = j;
698 const int num_bvh_steps = bvh->params.num_motion_point_steps * 2 + 1;
699 const float num_bvh_steps_inv_1 = 1.0f / (num_bvh_steps - 1);
701 bvh->custom_prim_info.resize(num_points * num_bvh_steps);
702 bvh->custom_primitive_bound.alloc(num_points * num_bvh_steps);
703 bvh->prims_time.resize(num_points * num_bvh_steps);
705 for (
uint j = 0; j < num_points; j++) {
707 const size_t num_steps = pointcloud->get_motion_steps();
711 points_data, radius_data, point_steps, num_points, num_steps, 0.0f, j);
715 for (
int bvh_step = 1; bvh_step < num_bvh_steps; ++bvh_step) {
716 const float curr_time = (float)(bvh_step)*num_bvh_steps_inv_1;
718 points_data, radius_data, point_steps, num_points, num_steps, curr_time, j);
724 const float prev_time = (float)(bvh_step - 1) * num_bvh_steps_inv_1;
725 bvh->custom_primitive_bound[num_bounds] =
bounds;
726 bvh->custom_prim_info[num_bounds].x = j;
728 bvh->prims_time[num_bounds].x = prev_time;
729 bvh->prims_time[num_bounds].y = curr_time;
732 prev_bounds = curr_bounds;
737 bvh->custom_prim_aabb.aabbCount = num_bounds;
738 bvh->custom_prim_aabb.aabbStride =
sizeof(
BoundBox);
739 bvh->custom_primitive_bound.copy_to_device();
740 bvh->custom_prim_aabb.aabbs = (
void *)bvh->custom_primitive_bound.device_pointer;
742 geom_input.type = hiprtPrimitiveTypeAABBList;
743 geom_input.primitive.aabbList = bvh->custom_prim_aabb;
744 geom_input.geomType =
Point;
746 if (bvh->custom_primitive_bound.device_pointer == 0) {
747 set_error(
"Failed to allocate point custom_primitive_bound for BLAS");
753void HIPRTDevice::build_blas(BVHHIPRT *bvh,
Geometry *geom, hiprtBuildOptions
options)
755 hiprtGeometryBuildInput geom_input = {};
760 Mesh *mesh =
static_cast<Mesh *
>(geom);
766 geom_input = prepare_triangle_blas(bvh, mesh);
771 Hair *
const hair =
static_cast<Hair *const
>(geom);
777 geom_input = prepare_curve_blas(bvh, hair);
787 geom_input = prepare_point_blas(bvh, pointcloud);
795 assert(geom_input.geomType != hiprtInvalidValue);
802 size_t blas_scratch_buffer_size = 0;
803 hiprtError rt_err = hiprtGetGeometryBuildTemporaryBufferSize(
804 hiprt_context, geom_input,
options, blas_scratch_buffer_size);
806 if (rt_err != hiprtSuccess) {
807 set_error(
"Failed to get scratch buffer size for BLAS");
811 rt_err = hiprtCreateGeometry(hiprt_context, geom_input,
options, bvh->hiprt_geom);
813 if (rt_err != hiprtSuccess) {
814 set_error(
"Failed to create BLAS");
819 if (blas_scratch_buffer_size > scratch_buffer_size) {
820 scratch_buffer.alloc(blas_scratch_buffer_size);
821 scratch_buffer.zero_to_device();
822 if (!scratch_buffer.device_pointer) {
823 hiprtDestroyGeometry(hiprt_context, bvh->hiprt_geom);
824 bvh->hiprt_geom =
nullptr;
825 set_error(
"Failed to allocate scratch buffer for BLAS");
828 scratch_buffer_size = blas_scratch_buffer_size;
830 bvh->geom_input = geom_input;
831 rt_err = hiprtBuildGeometry(hiprt_context,
832 hiprtBuildOperationBuild,
835 (
void *)(scratch_buffer.device_pointer),
839 if (rt_err != hiprtSuccess) {
840 set_error(
"Failed to build BLAS");
844hiprtScene HIPRTDevice::build_tlas(BVHHIPRT *bvh,
850 size_t num_object = objects.size();
851 if (num_object == 0) {
855 hiprtBuildOperation build_operation =
refit ? hiprtBuildOperationUpdate :
856 hiprtBuildOperationBuild;
860 unordered_map<Geometry *, int2> prim_info_map;
861 size_t custom_prim_offset = 0;
863 unordered_map<Geometry *, int> prim_time_map;
865 size_t num_instances = 0;
866 int blender_instance_id = 0;
868 user_instance_id.alloc(num_object);
869 prim_visibility.alloc(num_object);
870 hiprt_blas_ptr.alloc(num_object);
871 blas_ptr.alloc(num_object);
872 transform_headers.alloc(num_object);
873 custom_prim_info_offset.alloc(num_object);
874 prim_time_offset.alloc(num_object);
876 for (
Object *ob : objects) {
878 if (ob->is_traceable()) {
879 mask = ob->visibility_for_tracing();
882 Transform current_transform = ob->get_tfm();
883 Geometry *geom = ob->get_geometry();
886 BVHHIPRT *current_bvh =
static_cast<BVHHIPRT *
>(geom->
bvh.get());
887 bool is_valid_geometry = current_bvh->geom_input.geomType != hiprtInvalidValue;
888 hiprtGeometry hiprt_geom_current = current_bvh->hiprt_geom;
890 hiprtFrameMatrix hiprt_transform_matrix = {{{0}}};
892 get_hiprt_transform(hiprt_transform_matrix.matrix, identity_matrix);
894 if (is_valid_geometry) {
895 bool is_custom_prim = current_bvh->custom_prim_info.size() > 0;
897 if (is_custom_prim) {
899 bool has_motion_blur = current_bvh->prims_time.size() > 0;
901 unordered_map<Geometry *, int2>::iterator it = prim_info_map.find(geom);
903 if (prim_info_map.find(geom) != prim_info_map.end()) {
905 custom_prim_info_offset[blender_instance_id] = it->second;
907 if (has_motion_blur) {
909 prim_time_offset[blender_instance_id] = prim_time_map[geom];
913 int offset = bvh->custom_prim_info.size();
915 prim_info_map[geom].x = offset;
916 prim_info_map[geom].y = custom_prim_offset;
918 bvh->custom_prim_info.resize(offset + current_bvh->custom_prim_info.size());
919 memcpy(bvh->custom_prim_info.data() + offset,
920 current_bvh->custom_prim_info.data(),
921 current_bvh->custom_prim_info.size() *
sizeof(
int2));
923 custom_prim_info_offset[blender_instance_id].x = offset;
924 custom_prim_info_offset[blender_instance_id].y = custom_prim_offset;
927 custom_prim_offset += ((
Hair *)geom)->num_curves();
930 custom_prim_offset += ((
PointCloud *)geom)->num_points();
933 custom_prim_offset += ((
Mesh *)geom)->num_triangles();
936 if (has_motion_blur) {
937 int time_offset = bvh->prims_time.size();
938 prim_time_map[geom] = time_offset;
940 bvh->prims_time.resize(time_offset + current_bvh->prims_time.size());
941 memcpy(bvh->prims_time.data() + time_offset,
942 current_bvh->prims_time.data(),
943 current_bvh->prims_time.size() *
sizeof(
float2));
945 prim_time_offset[blender_instance_id] = time_offset;
948 prim_time_offset[blender_instance_id] = -1;
953 custom_prim_info_offset[blender_instance_id] = {-1, -1};
956 hiprtTransformHeader current_header = {0};
957 current_header.frameCount = 1;
958 current_header.frameIndex = transform_matrix.
size();
959 if (use_motion_blur && ob->get_motion().size()) {
960 int motion_size = ob->get_motion().size();
964 float time_iternval = 1 / (float)(motion_size - 1);
965 current_header.frameCount = motion_size;
968 tfm_hiprt_mb.resize(motion_size);
969 for (
int i = 0;
i < motion_size;
i++) {
970 get_hiprt_transform(tfm_hiprt_mb[
i].matrix, tfm_array[
i]);
971 tfm_hiprt_mb[
i].time = (float)
i * time_iternval;
976 if (transform_applied) {
977 current_transform = identity_matrix;
979 get_hiprt_transform(hiprt_transform_matrix.matrix, current_transform);
983 transform_headers[num_instances] = current_header;
985 user_instance_id[num_instances] = blender_instance_id;
986 prim_visibility[num_instances] =
mask;
987 hiprt_blas_ptr[num_instances].geometry = hiprt_geom_current;
988 hiprt_blas_ptr[num_instances].type = hiprtInstanceTypeGeometry;
991 blas_ptr[blender_instance_id] = (
uint64_t)hiprt_geom_current;
992 blender_instance_id++;
995 size_t table_ptr_size = 0;
996 hipDeviceptr_t table_device_ptr;
998 hip_assert(hipModuleGetGlobal(&table_device_ptr, &table_ptr_size, hipModule,
"kernel_params"));
1003 size_t kernel_param_offset[4];
1004 int table_index = 0;
1010 for (
int index = 0; index < table_index; index++) {
1011 hip_assert(hipMemcpyHtoD(table_device_ptr + kernel_param_offset[index],
1012 (
void *)&functions_table,
1019 if (num_instances == 0) {
1023 int frame_count = transform_matrix.
size();
1024 hiprtSceneBuildInput scene_input_ptr = {
nullptr};
1025 scene_input_ptr.instanceCount = num_instances;
1026 scene_input_ptr.frameCount = frame_count;
1027 scene_input_ptr.frameType = hiprtFrameTypeMatrix;
1029 user_instance_id.copy_to_device();
1030 prim_visibility.copy_to_device();
1031 hiprt_blas_ptr.copy_to_device();
1032 blas_ptr.copy_to_device();
1033 transform_headers.copy_to_device();
1035 if (user_instance_id.device_pointer == 0 || prim_visibility.device_pointer == 0 ||
1036 hiprt_blas_ptr.device_pointer == 0 || blas_ptr.device_pointer == 0 ||
1037 transform_headers.device_pointer == 0)
1039 set_error(
"Failed to allocate object buffers for TLAS");
1045 hiprtFrameMatrix *instance_transform_matrix_data = instance_transform_matrix.resize(
1047 if (instance_transform_matrix_data ==
nullptr) {
1048 set_error(
"Failed to allocate host instance_transform_matrix for TLAS");
1052 std::copy_n(transform_matrix.
data(), frame_count, instance_transform_matrix_data);
1053 instance_transform_matrix.copy_to_device();
1055 if (instance_transform_matrix.device_pointer == 0) {
1056 set_error(
"Failed to allocate instance_transform_matrix for TLAS");
1061 scene_input_ptr.instanceMasks = (
void *)prim_visibility.device_pointer;
1062 scene_input_ptr.instances = (
void *)hiprt_blas_ptr.device_pointer;
1063 scene_input_ptr.instanceTransformHeaders = (
void *)transform_headers.device_pointer;
1064 scene_input_ptr.instanceFrames = (
void *)instance_transform_matrix.device_pointer;
1066 hiprtScene scene =
nullptr;
1068 hiprtError rt_err = hiprtCreateScene(hiprt_context, scene_input_ptr,
options, scene);
1070 if (rt_err != hiprtSuccess) {
1071 set_error(
"Failed to create TLAS");
1075 size_t tlas_scratch_buffer_size;
1076 rt_err = hiprtGetSceneBuildTemporaryBufferSize(
1077 hiprt_context, scene_input_ptr,
options, tlas_scratch_buffer_size);
1079 if (rt_err != hiprtSuccess) {
1080 set_error(
"Failed to get scratch buffer size for TLAS");
1081 hiprtDestroyScene(hiprt_context, scene);
1085 if (tlas_scratch_buffer_size > scratch_buffer_size) {
1086 scratch_buffer.alloc(tlas_scratch_buffer_size);
1087 scratch_buffer.zero_to_device();
1088 if (scratch_buffer.device_pointer == 0) {
1089 set_error(
"Failed to allocate scratch buffer for TLAS");
1090 hiprtDestroyScene(hiprt_context, scene);
1095 rt_err = hiprtBuildScene(hiprt_context,
1099 (
void *)scratch_buffer.device_pointer,
1103 scratch_buffer.free();
1104 scratch_buffer_size = 0;
1106 if (rt_err != hiprtSuccess) {
1107 set_error(
"Failed to build TLAS");
1108 hiprtDestroyScene(hiprt_context, scene);
1112 if (bvh->custom_prim_info.size()) {
1114 const size_t data_size = bvh->custom_prim_info.size();
1115 int2 *custom_prim_info_data = custom_prim_info.resize(data_size);
1116 if (custom_prim_info_data ==
nullptr) {
1117 set_error(
"Failed to allocate host custom_prim_info_data for TLAS");
1118 hiprtDestroyScene(hiprt_context, scene);
1122 std::copy_n(bvh->custom_prim_info.data(), data_size, custom_prim_info_data);
1124 custom_prim_info.copy_to_device();
1125 custom_prim_info_offset.copy_to_device();
1126 if (custom_prim_info.device_pointer == 0 || custom_prim_info_offset.device_pointer == 0) {
1127 set_error(
"Failed to allocate custom_prim_info_offset for TLAS");
1128 hiprtDestroyScene(hiprt_context, scene);
1133 if (bvh->prims_time.size()) {
1135 const size_t data_size = bvh->prims_time.size();
1136 float2 *prims_time_data = prims_time.resize(data_size);
1137 if (prims_time_data ==
nullptr) {
1138 set_error(
"Failed to allocate host prims_time for TLAS");
1139 hiprtDestroyScene(hiprt_context, scene);
1143 std::copy_n(bvh->prims_time.data(), data_size, prims_time_data);
1145 prims_time.copy_to_device();
1146 prim_time_offset.copy_to_device();
1148 if (prim_time_offset.device_pointer == 0 || prims_time.device_pointer == 0) {
1149 set_error(
"Failed to allocate prims_time for TLAS");
1150 hiprtDestroyScene(hiprt_context, scene);
1164 progress.set_substatus(
"Building HIPRT acceleration structure");
1167 options.buildFlags = hiprtBuildFlagBitPreferHighQualityBuild;
1169 BVHHIPRT *bvh_rt =
static_cast<BVHHIPRT *
>(bvh);
1170 HIPContextScope scope(
this);
1172 if (!bvh_rt->is_tlas()) {
1174 assert(geometry.size() == 1);
1175 build_blas(bvh_rt, geometry[0],
options);
1180 hiprtDestroyScene(hiprt_context, scene);
1182 scene = build_tlas(bvh_rt, bvh_rt->objects,
options,
refit);
BMesh const char void * data
unsigned long long int uint64_t
static DBVT_INLINE btScalar size(const btDbvtVolume &a)
static btDbvtVolume bounds(btDbvtNode **leaves, int count)
void refit(btStridingMeshInterface *triangles, const btVector3 &aabbMin, const btVector3 &aabbMax)
SIMD_FORCE_INLINE const btScalar & z() const
Return the z value.
Attribute * find(ustring name) const
bool is_pointcloud() const
virtual bool has_motion_blur() const
Curve get_curve(const size_t i) const
size_t num_curves() const
size_t num_segments() const
PrimitiveType primitive_type() const override
void push_back_slow(const T &t)
CCL_NAMESPACE_BEGIN struct Options options
#define KERNEL_DATA_ARRAY(type, name)
#define PRIMITIVE_PACK_SEGMENT(type, segment)
#define KERNEL_FEATURE_OBJECT_MOTION
#define KERNEL_FEATURE_NODE_RAYTRACE
#define KERNEL_FEATURE_MNEE
#define CCL_NAMESPACE_END
VecBase< float, 4 > float4
#define assert(assertion)
VecBase< float, D > step(VecOp< float, D >, VecOp< float, D >) RET
@ ATTR_STD_MOTION_VERTEX_POSITION
@ DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE
@ DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE
@ DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_MNEE
ccl_device_inline float2 mask(const MaskType mask, const float2 a)
string util_md5_string(const string &str)
string path_cache_get(const string &sub)
string path_get(const string &sub)
string path_files_md5_hash(const string &dir)
string path_join(const string &dir, const string &file)
bool path_exists(const string &path)
void path_create_directories(const string &filepath)
bool path_read_compressed_text(const string &path, string &text)
CCL_NAMESPACE_BEGIN string string_printf(const char *format,...)
__forceinline void grow(const float3 &pt)
void bounds_grow(const int k, const float3 *curve_keys, const float *curve_radius, BoundBox &bounds) const
void cardinal_motion_keys(const float3 *curve_keys, const float *curve_radius, const float4 *key_steps, const size_t num_curve_keys, const size_t num_steps, const float time, size_t k0, size_t k1, size_t k2, size_t k3, float4 r_keys[4]) const
void bounds_grow(const float3 *verts, BoundBox &bounds) const
void motion_verts(const float3 *verts, const float3 *vert_steps, const size_t num_verts, const size_t num_steps, const float time, float3 r_verts[3]) const
bool has_motion_blur() const override
size_t num_triangles() const
Triangle get_triangle(const size_t i) const
PrimitiveType primitive_type() const override
void bounds_grow(const float3 *points, const float *radius, BoundBox &bounds) const
float4 motion_key(const float3 *points, const float *radius, const float4 *point_steps, const size_t num_points, const size_t num_steps, const float time, size_t p) const
Point get_point(const int i) const
size_t num_points() const
std::unique_lock< std::mutex > thread_scoped_lock
CCL_NAMESPACE_BEGIN double time_dt()