19# include "device/metal/bvh.h"
24# define BVH_status(...) \
26 string str = string_printf(__VA_ARGS__); \
27 progress.set_substatus(str); \
28 metal_printf("%s", str.c_str()); \
32# ifdef BVH_THROTTLE_DIAGNOSTICS
33# define bvh_throttle_printf(...) printf("BVHMetalBuildThrottler::" __VA_ARGS__)
35# define bvh_throttle_printf(...)
41# ifndef MAC_OS_VERSION_26_0
42# define MTLAccelerationStructureUsagePreferFastIntersection \
43 MTLAccelerationStructureUsage(1 << 4)
48struct BVHMetalBuildThrottler {
50 size_t wired_memory = 0;
51 size_t safe_wired_limit = 0;
52 int requests_in_flight = 0;
54 BVHMetalBuildThrottler()
58 id<MTLDevice> mtlDevice = MTLCreateSystemDefaultDevice();
61 safe_wired_limit = [mtlDevice recommendedMaxWorkingSetSize] / 4;
62 bvh_throttle_printf(
"safe_wired_limit = %zu\n", safe_wired_limit);
66 void acquire(
const size_t bytes_to_be_wired)
68 bool throttled =
false;
75 if (wired_memory == 0 || wired_memory + bytes_to_be_wired <= safe_wired_limit) {
76 wired_memory += bytes_to_be_wired;
77 requests_in_flight += 1;
78 bvh_throttle_printf(
"acquire -- success (requests_in_flight = %d, wired_memory = %zu)\n",
86 "acquire -- throttling (requests_in_flight = %d, wired_memory = %zu, "
87 "bytes_to_be_wired = %zu)\n",
95 std::this_thread::sleep_for(std::chrono::milliseconds(10));
100 void release(
const size_t bytes_just_unwired)
103 wired_memory -= bytes_just_unwired;
104 requests_in_flight -= 1;
105 bvh_throttle_printf(
"release (requests_in_flight = %d, wired_memory = %zu)\n",
116 if (wired_memory == 0) {
120 std::this_thread::sleep_for(std::chrono::milliseconds(10));
123} g_bvh_build_throttler;
130static bool support_refit_blas()
132 if (@available(macos 15.4, *)) {
135 if (@available(macos 15.2, *)) {
141BVHMetal::BVHMetal(
const BVHParams ¶ms_,
145 :
BVH(params_, geometry_, objects_), device(device)
151 if (@available(macos 12.0, *)) {
152 set_accel_struct(nil);
159API_AVAILABLE(macos(11.0))
160void BVHMetal::set_accel_struct(
id<MTLAccelerationStructure> new_accel_struct)
162 if (@available(macos 12.0, *)) {
164 device->stats.mem_free(accel_struct.allocatedSize);
165 [accel_struct release];
169 if (new_accel_struct) {
170 accel_struct = new_accel_struct;
171 device->stats.mem_alloc(accel_struct.allocatedSize);
176bool BVHMetal::build_BLAS_mesh(
Progress &progress,
177 id<MTLDevice> mtl_device,
178 id<MTLCommandQueue> queue,
182 if (@available(macos 12.0, *)) {
184 Mesh *
const mesh =
static_cast<Mesh *const
>(geom);
192 const array<int> &tris = mesh->get_triangles();
193 const size_t num_verts =
verts.size();
194 const size_t num_indices = tris.
size();
196 size_t num_motion_steps = 1;
198 if (motion_blur && mesh->get_use_motion_blur() && motion_keys) {
199 num_motion_steps = mesh->get_motion_steps();
203 id<MTLBuffer> posBuf = nil;
204 id<MTLBuffer> indexBuf = [mtl_device newBufferWithBytes:tris.
data()
206 options:MTLResourceStorageModeShared];
208 if (num_motion_steps == 1) {
209 posBuf = [mtl_device newBufferWithBytes:
verts.data()
211 options:MTLResourceStorageModeShared];
215 newBufferWithLength:num_verts * num_motion_steps *
sizeof(
verts.data()[0])
216 options:MTLResourceStorageModeShared];
218 size_t center_step = (num_motion_steps - 1) / 2;
219 for (
size_t step = 0;
step < num_motion_steps; ++
step) {
223 if (
step != center_step) {
226 std::copy_n(
verts, num_verts, dest_data + num_verts *
step);
231 MTLAccelerationStructureGeometryDescriptor *geomDesc;
232 if (num_motion_steps > 1) {
233 std::vector<MTLMotionKeyframeData *> vertex_ptrs;
234 vertex_ptrs.reserve(num_motion_steps);
235 for (
size_t step = 0;
step < num_motion_steps; ++
step) {
236 MTLMotionKeyframeData *k = [MTLMotionKeyframeData
data];
239 vertex_ptrs.push_back(k);
242 MTLAccelerationStructureMotionTriangleGeometryDescriptor *geomDescMotion =
243 [MTLAccelerationStructureMotionTriangleGeometryDescriptor
descriptor];
244 geomDescMotion.vertexBuffers = [NSArray arrayWithObjects:vertex_ptrs.data()
245 count:vertex_ptrs.size()];
246 geomDescMotion.vertexStride =
sizeof(
verts.data()[0]);
247 geomDescMotion.indexBuffer = indexBuf;
248 geomDescMotion.indexBufferOffset = 0;
249 geomDescMotion.indexType = MTLIndexTypeUInt32;
250 geomDescMotion.triangleCount = num_indices / 3;
251 geomDescMotion.intersectionFunctionTableOffset = 0;
252 geomDescMotion.opaque =
true;
254 geomDesc = geomDescMotion;
256 BVH_status(
"Building motion mesh BLAS | %7d tris | %s | %7d motion keyframes",
259 (
int)num_motion_steps);
262 MTLAccelerationStructureTriangleGeometryDescriptor *geomDescNoMotion =
263 [MTLAccelerationStructureTriangleGeometryDescriptor
descriptor];
264 geomDescNoMotion.vertexBuffer = posBuf;
265 geomDescNoMotion.vertexBufferOffset = 0;
266 geomDescNoMotion.vertexStride =
sizeof(
verts.data()[0]);
267 geomDescNoMotion.indexBuffer = indexBuf;
268 geomDescNoMotion.indexBufferOffset = 0;
269 geomDescNoMotion.indexType = MTLIndexTypeUInt32;
270 geomDescNoMotion.triangleCount = num_indices / 3;
271 geomDescNoMotion.intersectionFunctionTableOffset = 0;
272 geomDescNoMotion.opaque =
true;
274 geomDesc = geomDescNoMotion;
277 "Building mesh BLAS | %7d tris | %s", (
int)mesh->
num_triangles(), geom->
name.c_str());
283 geomDesc.allowDuplicateIntersectionFunctionInvocation =
false;
285 MTLPrimitiveAccelerationStructureDescriptor *accelDesc =
286 [MTLPrimitiveAccelerationStructureDescriptor
descriptor];
287 accelDesc.geometryDescriptors = @[ geomDesc ];
288 if (num_motion_steps > 1) {
289 accelDesc.motionStartTime = 0.0f;
290 accelDesc.motionEndTime = 1.0f;
291 accelDesc.motionStartBorderMode = MTLMotionBorderModeClamp;
292 accelDesc.motionEndBorderMode = MTLMotionBorderModeClamp;
293 accelDesc.motionKeyframeCount = num_motion_steps;
295 if (extended_limits) {
296 accelDesc.usage |= MTLAccelerationStructureUsageExtendedLimits;
299 if (!use_fast_trace_bvh) {
300 accelDesc.usage |= (MTLAccelerationStructureUsageRefit |
301 MTLAccelerationStructureUsagePreferFastBuild);
303 else if (@available(macos 26.0, *)) {
304 accelDesc.usage |= MTLAccelerationStructureUsagePreferFastIntersection;
307 MTLAccelerationStructureSizes accelSizes = [mtl_device
308 accelerationStructureSizesWithDescriptor:accelDesc];
309 id<MTLAccelerationStructure> accel_uncompressed = [mtl_device
310 newAccelerationStructureWithSize:accelSizes.accelerationStructureSize];
311 id<MTLBuffer> scratchBuf = [mtl_device newBufferWithLength:accelSizes.buildScratchBufferSize
312 options:MTLResourceStorageModePrivate];
313 id<MTLBuffer> sizeBuf = [mtl_device newBufferWithLength:8
314 options:MTLResourceStorageModeShared];
315 id<MTLCommandBuffer> accelCommands = [queue commandBuffer];
316 id<MTLAccelerationStructureCommandEncoder> accelEnc =
317 [accelCommands accelerationStructureCommandEncoder];
319 [accelEnc refitAccelerationStructure:accel_struct
321 destination:accel_uncompressed
322 scratchBuffer:scratchBuf
323 scratchBufferOffset:0];
326 [accelEnc buildAccelerationStructure:accel_uncompressed
328 scratchBuffer:scratchBuf
329 scratchBufferOffset:0];
331 if (use_fast_trace_bvh) {
332 [accelEnc writeCompactedAccelerationStructureSize:accel_uncompressed
335 sizeDataType:MTLDataTypeULong];
337 [accelEnc endEncoding];
341 size_t wired_size = posBuf.allocatedSize + indexBuf.allocatedSize + scratchBuf.allocatedSize +
342 accel_uncompressed.allocatedSize * 2;
344 [accelCommands addCompletedHandler:^(id<MTLCommandBuffer> ) {
346 [scratchBuf release];
350 if (use_fast_trace_bvh) {
354 dispatch_async(dispatch_get_global_queue(DISPATCH_QUEUE_PRIORITY_DEFAULT, 0), ^{
355 id<MTLCommandBuffer> accelCommands = [queue commandBuffer];
356 id<MTLAccelerationStructureCommandEncoder> accelEnc =
357 [accelCommands accelerationStructureCommandEncoder];
358 id<MTLAccelerationStructure> accel = [mtl_device
359 newAccelerationStructureWithSize:compressed_size];
360 [accelEnc copyAndCompactAccelerationStructure:accel_uncompressed
361 toAccelerationStructure:accel];
362 [accelEnc endEncoding];
363 [accelCommands addCompletedHandler:^(id<MTLCommandBuffer> ) {
364 set_accel_struct(accel);
365 [accel_uncompressed release];
368 g_bvh_build_throttler.release(wired_size);
370 [accelCommands commit];
375 set_accel_struct(accel_uncompressed);
378 g_bvh_build_throttler.release(wired_size);
385 g_bvh_build_throttler.acquire(wired_size);
386 [accelCommands commit];
393bool BVHMetal::build_BLAS_hair(
Progress &progress,
394 id<MTLDevice> mtl_device,
395 id<MTLCommandQueue> queue,
399# if defined(MAC_OS_VERSION_14_0)
400 if (@available(macos 14.0, *)) {
402 Hair *hair =
static_cast<Hair *
>(geom);
409 size_t num_motion_steps = 1;
411 if (motion_blur && hair->get_use_motion_blur() && motion_keys) {
412 num_motion_steps = hair->get_motion_steps();
415 id<MTLBuffer> cpBuffer = nil;
416 id<MTLBuffer> radiusBuffer = nil;
417 id<MTLBuffer> idxBuffer = nil;
419 MTLAccelerationStructureGeometryDescriptor *geomDesc;
420 if (num_motion_steps > 1) {
421 MTLAccelerationStructureMotionCurveGeometryDescriptor *geomDescCrv =
422 [MTLAccelerationStructureMotionCurveGeometryDescriptor
descriptor];
426 const array<float> &radiuses = hair->get_curve_radius();
429 std::vector<float3> cpData;
430 std::vector<int> idxData;
431 std::vector<float> radiusData;
432 cpData.reserve(numKeys);
433 radiusData.reserve(numKeys);
435 std::vector<int> step_offsets;
436 for (
size_t step = 0;
step < num_motion_steps; ++
step) {
439 const float3 *keys = hair->get_curve_keys().data();
440 size_t center_step = (num_motion_steps - 1) / 2;
441 if (
step != center_step) {
442 size_t attr_offset = (
step > center_step) ?
step - 1 :
step;
444 keys = motion_keys->
data_float3() + attr_offset * numKeys;
447 step_offsets.push_back(cpData.size());
449 for (
int c = 0; c < numCurves; ++c) {
455 cpData.push_back(keys[firstKey]);
456 radiusData.push_back(radiuses[firstKey]);
458 for (
int s = 0; s < segCount; ++s) {
460 idxData.push_back(idxBase + s);
462 cpData.push_back(keys[firstKey + s]);
463 radiusData.push_back(radiuses[firstKey + s]);
465 cpData.push_back(keys[firstKey + curve.
num_keys - 1]);
466 radiusData.push_back(radiuses[firstKey + curve.
num_keys - 1]);
468 cpData.push_back(keys[firstKey + curve.
num_keys - 1]);
469 radiusData.push_back(radiuses[firstKey + curve.
num_keys - 1]);
475 idxBuffer = [mtl_device newBufferWithBytes:idxData.data()
476 length:idxData.size() *
sizeof(int)
477 options:MTLResourceStorageModeShared];
479 cpBuffer = [mtl_device newBufferWithBytes:cpData.data()
481 options:MTLResourceStorageModeShared];
483 radiusBuffer = [mtl_device newBufferWithBytes:radiusData.data()
485 options:MTLResourceStorageModeShared];
487 std::vector<MTLMotionKeyframeData *> cp_ptrs;
488 std::vector<MTLMotionKeyframeData *> radius_ptrs;
489 cp_ptrs.reserve(num_motion_steps);
490 radius_ptrs.reserve(num_motion_steps);
492 for (
size_t step = 0;
step < num_motion_steps; ++
step) {
493 MTLMotionKeyframeData *k = [MTLMotionKeyframeData
data];
495 k.offset = step_offsets[
step] *
sizeof(
float3);
496 cp_ptrs.push_back(k);
498 k = [MTLMotionKeyframeData
data];
499 k.buffer = radiusBuffer;
500 k.offset = step_offsets[
step] *
sizeof(
float);
501 radius_ptrs.push_back(k);
504 geomDescCrv.controlPointBuffers = [NSArray arrayWithObjects:cp_ptrs.data()
505 count:cp_ptrs.size()];
506 geomDescCrv.radiusBuffers = [NSArray arrayWithObjects:radius_ptrs.data()
507 count:radius_ptrs.size()];
510 geomDescCrv.controlPointCount = cpData.size() / num_motion_steps;
511 geomDescCrv.controlPointStride =
sizeof(
float3);
512 geomDescCrv.controlPointFormat = MTLAttributeFormatFloat3;
513 geomDescCrv.radiusStride =
sizeof(
float);
514 geomDescCrv.radiusFormat = MTLAttributeFormatFloat;
515 geomDescCrv.segmentCount = idxData.size();
520 geomDescCrv.curveBasis = MTLCurveBasisLinear;
521 geomDescCrv.curveEndCaps = MTLCurveEndCapsSphere;
524 geomDescCrv.curveBasis = MTLCurveBasisCatmullRom;
525 geomDescCrv.curveEndCaps = MTLCurveEndCapsDisk;
527 geomDescCrv.indexType = MTLIndexTypeUInt32;
528 geomDescCrv.indexBuffer = idxBuffer;
529 geomDescCrv.intersectionFunctionTableOffset = 1;
534 geomDescCrv.allowDuplicateIntersectionFunctionInvocation =
false;
535 geomDescCrv.opaque =
true;
536 geomDesc = geomDescCrv;
539 MTLAccelerationStructureCurveGeometryDescriptor *geomDescCrv =
540 [MTLAccelerationStructureCurveGeometryDescriptor
descriptor];
544 const array<float> &radiuses = hair->get_curve_radius();
547 std::vector<float3> cpData;
548 std::vector<int> idxData;
549 std::vector<float> radiusData;
550 cpData.reserve(numKeys);
551 radiusData.reserve(numKeys);
552 auto keys = hair->get_curve_keys();
553 for (
int c = 0; c < numCurves; ++c) {
559 cpData.push_back(keys[firstKey]);
560 radiusData.push_back(radiuses[firstKey]);
562 for (
int s = 0; s < segCount; ++s) {
563 idxData.push_back(idxBase + s);
564 cpData.push_back(keys[firstKey + s]);
565 radiusData.push_back(radiuses[firstKey + s]);
567 cpData.push_back(keys[firstKey + curve.
num_keys - 1]);
568 radiusData.push_back(radiuses[firstKey + curve.
num_keys - 1]);
570 cpData.push_back(keys[firstKey + curve.
num_keys - 1]);
571 radiusData.push_back(radiuses[firstKey + curve.
num_keys - 1]);
576 idxBuffer = [mtl_device newBufferWithBytes:idxData.data()
577 length:idxData.size() *
sizeof(int)
578 options:MTLResourceStorageModeShared];
580 cpBuffer = [mtl_device newBufferWithBytes:cpData.data()
582 options:MTLResourceStorageModeShared];
584 radiusBuffer = [mtl_device newBufferWithBytes:radiusData.data()
586 options:MTLResourceStorageModeShared];
588 geomDescCrv.controlPointBuffer = cpBuffer;
589 geomDescCrv.radiusBuffer = radiusBuffer;
590 geomDescCrv.controlPointCount = cpData.size();
591 geomDescCrv.controlPointStride =
sizeof(
float3);
592 geomDescCrv.controlPointFormat = MTLAttributeFormatFloat3;
593 geomDescCrv.controlPointBufferOffset = 0;
594 geomDescCrv.segmentCount = idxData.size();
599 geomDescCrv.curveBasis = MTLCurveBasisLinear;
600 geomDescCrv.curveEndCaps = MTLCurveEndCapsSphere;
603 geomDescCrv.curveBasis = MTLCurveBasisCatmullRom;
604 geomDescCrv.curveEndCaps = MTLCurveEndCapsDisk;
606 geomDescCrv.indexType = MTLIndexTypeUInt32;
607 geomDescCrv.indexBuffer = idxBuffer;
608 geomDescCrv.intersectionFunctionTableOffset = 1;
613 geomDescCrv.allowDuplicateIntersectionFunctionInvocation =
false;
614 geomDescCrv.opaque =
true;
615 geomDesc = geomDescCrv;
618 MTLPrimitiveAccelerationStructureDescriptor *accelDesc =
619 [MTLPrimitiveAccelerationStructureDescriptor
descriptor];
620 accelDesc.geometryDescriptors = @[ geomDesc ];
622 if (num_motion_steps > 1) {
623 accelDesc.motionStartTime = 0.0f;
624 accelDesc.motionEndTime = 1.0f;
625 accelDesc.motionStartBorderMode = MTLMotionBorderModeVanish;
626 accelDesc.motionEndBorderMode = MTLMotionBorderModeVanish;
627 accelDesc.motionKeyframeCount = num_motion_steps;
629 BVH_status(
"Building motion hair BLAS | %7d curves | %s | %7d motion keyframes",
632 (
int)num_motion_steps);
636 "Building hair BLAS | %7d curves | %s", (
int)hair->
num_curves(), geom->
name.c_str());
639 if (extended_limits) {
640 accelDesc.usage |= MTLAccelerationStructureUsageExtendedLimits;
643 if (!use_fast_trace_bvh) {
644 accelDesc.usage |= (MTLAccelerationStructureUsageRefit |
645 MTLAccelerationStructureUsagePreferFastBuild);
647 else if (@available(macos 26.0, *)) {
648 accelDesc.usage |= MTLAccelerationStructureUsagePreferFastIntersection;
651 MTLAccelerationStructureSizes accelSizes = [mtl_device
652 accelerationStructureSizesWithDescriptor:accelDesc];
653 id<MTLAccelerationStructure> accel_uncompressed = [mtl_device
654 newAccelerationStructureWithSize:accelSizes.accelerationStructureSize];
655 id<MTLBuffer> scratchBuf = [mtl_device newBufferWithLength:accelSizes.buildScratchBufferSize
656 options:MTLResourceStorageModePrivate];
657 id<MTLBuffer> sizeBuf = [mtl_device newBufferWithLength:8
658 options:MTLResourceStorageModeShared];
659 id<MTLCommandBuffer> accelCommands = [queue commandBuffer];
660 id<MTLAccelerationStructureCommandEncoder> accelEnc =
661 [accelCommands accelerationStructureCommandEncoder];
663 [accelEnc refitAccelerationStructure:accel_struct
665 destination:accel_uncompressed
666 scratchBuffer:scratchBuf
667 scratchBufferOffset:0];
670 [accelEnc buildAccelerationStructure:accel_uncompressed
672 scratchBuffer:scratchBuf
673 scratchBufferOffset:0];
675 if (use_fast_trace_bvh) {
676 [accelEnc writeCompactedAccelerationStructureSize:accel_uncompressed
679 sizeDataType:MTLDataTypeULong];
681 [accelEnc endEncoding];
685 size_t wired_size = cpBuffer.allocatedSize + radiusBuffer.allocatedSize +
686 idxBuffer.allocatedSize + scratchBuf.allocatedSize +
687 accel_uncompressed.allocatedSize * 2;
689 [accelCommands addCompletedHandler:^(id<MTLCommandBuffer> ) {
691 [scratchBuf release];
693 [radiusBuffer release];
696 if (use_fast_trace_bvh) {
699 dispatch_async(dispatch_get_global_queue(DISPATCH_QUEUE_PRIORITY_DEFAULT, 0), ^{
700 id<MTLCommandBuffer> accelCommands = [queue commandBuffer];
701 id<MTLAccelerationStructureCommandEncoder> accelEnc =
702 [accelCommands accelerationStructureCommandEncoder];
703 id<MTLAccelerationStructure> accel = [mtl_device
704 newAccelerationStructureWithSize:compressed_size];
705 [accelEnc copyAndCompactAccelerationStructure:accel_uncompressed
706 toAccelerationStructure:accel];
707 [accelEnc endEncoding];
708 [accelCommands addCompletedHandler:^(id<MTLCommandBuffer> ) {
709 set_accel_struct(accel);
710 [accel_uncompressed release];
713 g_bvh_build_throttler.release(wired_size);
715 [accelCommands commit];
720 set_accel_struct(accel_uncompressed);
723 g_bvh_build_throttler.release(wired_size);
730 g_bvh_build_throttler.acquire(wired_size);
731 [accelCommands commit];
745bool BVHMetal::build_BLAS_pointcloud(
Progress &progress,
746 id<MTLDevice> mtl_device,
747 id<MTLCommandQueue> queue,
751 if (@available(macos 12.0, *)) {
758 const size_t num_points = pointcloud->get_points().size();
759 const float3 *points = pointcloud->get_points().data();
760 const float *radius = pointcloud->get_radius().data();
764 size_t num_motion_steps = 1;
766 if (motion_blur && pointcloud->get_use_motion_blur() && motion_keys) {
767 num_motion_steps = pointcloud->get_motion_steps();
770 const size_t num_aabbs = num_motion_steps * num_points;
773 id<MTLBuffer> aabbBuf = [mtl_device
774 newBufferWithLength:num_aabbs *
sizeof(MTLAxisAlignedBoundingBox)
775 options:MTLResourceStorageModeShared];
776 MTLAxisAlignedBoundingBox *aabb_data = (MTLAxisAlignedBoundingBox *)[aabbBuf contents];
779 size_t center_step = (num_motion_steps - 1) / 2;
780 for (
size_t step = 0;
step < num_motion_steps; ++
step) {
781 if (
step == center_step) {
783 for (
size_t j = 0; j < num_points; ++j) {
788 const size_t index =
step * num_points + j;
789 aabb_data[index].min = (MTLPackedFloat3 &)
bounds.min;
790 aabb_data[index].max = (MTLPackedFloat3 &)
bounds.max;
794 size_t attr_offset = (
step > center_step) ?
step - 1 :
step;
797 for (
size_t j = 0; j < num_points; ++j) {
802 const size_t index =
step * num_points + j;
803 aabb_data[index].min = (MTLPackedFloat3 &)
bounds.min;
804 aabb_data[index].max = (MTLPackedFloat3 &)
bounds.max;
809 MTLAccelerationStructureGeometryDescriptor *geomDesc;
810 if (num_motion_steps > 1) {
811 std::vector<MTLMotionKeyframeData *> aabb_ptrs;
812 aabb_ptrs.reserve(num_motion_steps);
813 for (
size_t step = 0;
step < num_motion_steps; ++
step) {
814 MTLMotionKeyframeData *k = [MTLMotionKeyframeData
data];
816 k.offset =
step * num_points *
sizeof(MTLAxisAlignedBoundingBox);
817 aabb_ptrs.push_back(k);
820 MTLAccelerationStructureMotionBoundingBoxGeometryDescriptor *geomDescMotion =
821 [MTLAccelerationStructureMotionBoundingBoxGeometryDescriptor
descriptor];
822 geomDescMotion.boundingBoxBuffers = [NSArray arrayWithObjects:aabb_ptrs.data()
823 count:aabb_ptrs.size()];
824 geomDescMotion.boundingBoxCount = num_points;
825 geomDescMotion.boundingBoxStride =
sizeof(aabb_data[0]);
826 geomDescMotion.intersectionFunctionTableOffset = 2;
831 geomDescMotion.allowDuplicateIntersectionFunctionInvocation =
false;
832 geomDescMotion.opaque =
true;
833 geomDesc = geomDescMotion;
836 MTLAccelerationStructureBoundingBoxGeometryDescriptor *geomDescNoMotion =
837 [MTLAccelerationStructureBoundingBoxGeometryDescriptor
descriptor];
838 geomDescNoMotion.boundingBoxBuffer = aabbBuf;
839 geomDescNoMotion.boundingBoxBufferOffset = 0;
840 geomDescNoMotion.boundingBoxCount = int(num_aabbs);
841 geomDescNoMotion.boundingBoxStride =
sizeof(aabb_data[0]);
842 geomDescNoMotion.intersectionFunctionTableOffset = 2;
847 geomDescNoMotion.allowDuplicateIntersectionFunctionInvocation =
false;
848 geomDescNoMotion.opaque =
true;
849 geomDesc = geomDescNoMotion;
852 MTLPrimitiveAccelerationStructureDescriptor *accelDesc =
853 [MTLPrimitiveAccelerationStructureDescriptor
descriptor];
854 accelDesc.geometryDescriptors = @[ geomDesc ];
856 if (num_motion_steps > 1) {
857 accelDesc.motionStartTime = 0.0f;
858 accelDesc.motionEndTime = 1.0f;
861 accelDesc.motionKeyframeCount = num_motion_steps;
863 BVH_status(
"Building motion pointcloud BLAS | %7d points | %s | %7d motion keyframes",
866 (
int)num_motion_steps);
869 BVH_status(
"Building pointcloud BLAS | %7d points | %s",
873 if (extended_limits) {
874 accelDesc.usage |= MTLAccelerationStructureUsageExtendedLimits;
877 if (!use_fast_trace_bvh) {
878 accelDesc.usage |= (MTLAccelerationStructureUsageRefit |
879 MTLAccelerationStructureUsagePreferFastBuild);
881 else if (@available(macos 26.0, *)) {
882 accelDesc.usage |= MTLAccelerationStructureUsagePreferFastIntersection;
885 MTLAccelerationStructureSizes accelSizes = [mtl_device
886 accelerationStructureSizesWithDescriptor:accelDesc];
887 id<MTLAccelerationStructure> accel_uncompressed = [mtl_device
888 newAccelerationStructureWithSize:accelSizes.accelerationStructureSize];
889 id<MTLBuffer> scratchBuf = [mtl_device newBufferWithLength:accelSizes.buildScratchBufferSize
890 options:MTLResourceStorageModePrivate];
891 id<MTLBuffer> sizeBuf = [mtl_device newBufferWithLength:8
892 options:MTLResourceStorageModeShared];
893 id<MTLCommandBuffer> accelCommands = [queue commandBuffer];
894 id<MTLAccelerationStructureCommandEncoder> accelEnc =
895 [accelCommands accelerationStructureCommandEncoder];
897 [accelEnc refitAccelerationStructure:accel_struct
899 destination:accel_uncompressed
900 scratchBuffer:scratchBuf
901 scratchBufferOffset:0];
904 [accelEnc buildAccelerationStructure:accel_uncompressed
906 scratchBuffer:scratchBuf
907 scratchBufferOffset:0];
909 if (use_fast_trace_bvh) {
910 [accelEnc writeCompactedAccelerationStructureSize:accel_uncompressed
913 sizeDataType:MTLDataTypeULong];
915 [accelEnc endEncoding];
919 size_t wired_size = aabbBuf.allocatedSize + scratchBuf.allocatedSize +
920 accel_uncompressed.allocatedSize * 2;
922 [accelCommands addCompletedHandler:^(id<MTLCommandBuffer> ) {
924 [scratchBuf release];
927 if (use_fast_trace_bvh) {
931 dispatch_async(dispatch_get_global_queue(DISPATCH_QUEUE_PRIORITY_DEFAULT, 0), ^{
932 id<MTLCommandBuffer> accelCommands = [queue commandBuffer];
933 id<MTLAccelerationStructureCommandEncoder> accelEnc =
934 [accelCommands accelerationStructureCommandEncoder];
935 id<MTLAccelerationStructure> accel = [mtl_device
936 newAccelerationStructureWithSize:compressed_size];
937 [accelEnc copyAndCompactAccelerationStructure:accel_uncompressed
938 toAccelerationStructure:accel];
939 [accelEnc endEncoding];
940 [accelCommands addCompletedHandler:^(id<MTLCommandBuffer> ) {
941 set_accel_struct(accel);
942 [accel_uncompressed release];
945 g_bvh_build_throttler.release(wired_size);
947 [accelCommands commit];
952 set_accel_struct(accel_uncompressed);
955 g_bvh_build_throttler.release(wired_size);
962 g_bvh_build_throttler.acquire(wired_size);
963 [accelCommands commit];
969bool BVHMetal::build_BLAS(
Progress &progress,
970 id<MTLDevice> mtl_device,
971 id<MTLCommandQueue> queue,
974 assert(objects.size() == 1 && geometry.size() == 1);
981 return build_BLAS_mesh(progress, mtl_device, queue, geom,
refit);
983 return build_BLAS_hair(progress, mtl_device, queue, geom,
refit);
985 return build_BLAS_pointcloud(progress, mtl_device, queue, geom,
refit);
992# if defined(MAC_OS_VERSION_15_0)
995static MTLComponentTransform decomposed_to_component_transform(
const DecomposedTransform &src)
997 MTLComponentTransform tfm;
998 tfm.scale = MTLPackedFloat3Make(src.
y.
w, src.
z.
w, src.
w.
w);
999 tfm.shear = MTLPackedFloat3Make(src.
z.
x, src.
z.
y, src.
w.
x);
1000 tfm.pivot = MTLPackedFloat3Make(0.0f, 0.0f, 0.0f);
1001 tfm.rotation = MTLPackedFloatQuaternionMake(src.
x.
x, src.
x.
y, src.
x.
z, src.
x.
w);
1002 tfm.translation = MTLPackedFloat3Make(src.
y.
x, src.
y.
y, src.
y.
z);
1007static MTLComponentTransform component_transform_make_unit()
1009 MTLComponentTransform tfm;
1010 tfm.scale = MTLPackedFloat3Make(1.0f, 1.0f, 1.0f);
1011 tfm.shear = MTLPackedFloat3Make(0.0f, 0.0f, 0.0f);
1012 tfm.pivot = MTLPackedFloat3Make(0.0f, 0.0f, 0.0f);
1013 tfm.rotation = MTLPackedFloatQuaternionMake(0.0f, 0.0f, 0.0f, 1.0f);
1014 tfm.translation = MTLPackedFloat3Make(0.0f, 0.0f, 0.0f);
1020bool BVHMetal::build_TLAS(
Progress &progress,
1021 id<MTLDevice> mtl_device,
1022 id<MTLCommandQueue> queue,
1026 g_bvh_build_throttler.wait_for_all();
1028 if (@available(macos 12.0, *)) {
1030 auto make_null_BLAS = [
this](id<MTLDevice> mtl_device,
1031 id<MTLCommandQueue> queue) -> id<MTLAccelerationStructure> {
1032 id<MTLBuffer> nullBuf = [mtl_device newBufferWithLength:
sizeof(
float3)
1033 options:MTLResourceStorageModeShared];
1036 MTLAccelerationStructureTriangleGeometryDescriptor *geomDesc =
1037 [MTLAccelerationStructureTriangleGeometryDescriptor
descriptor];
1038 geomDesc.vertexBuffer = nullBuf;
1039 geomDesc.vertexBufferOffset = 0;
1040 geomDesc.vertexStride =
sizeof(
float3);
1041 geomDesc.indexBuffer = nullBuf;
1042 geomDesc.indexBufferOffset = 0;
1043 geomDesc.indexType = MTLIndexTypeUInt32;
1044 geomDesc.triangleCount = 0;
1045 geomDesc.intersectionFunctionTableOffset = 0;
1046 geomDesc.opaque =
true;
1047 geomDesc.allowDuplicateIntersectionFunctionInvocation =
false;
1049 MTLPrimitiveAccelerationStructureDescriptor *accelDesc =
1050 [MTLPrimitiveAccelerationStructureDescriptor
descriptor];
1051 accelDesc.geometryDescriptors = @[ geomDesc ];
1052 if (extended_limits) {
1053 accelDesc.usage |= MTLAccelerationStructureUsageExtendedLimits;
1056 MTLAccelerationStructureSizes accelSizes = [mtl_device
1057 accelerationStructureSizesWithDescriptor:accelDesc];
1058 id<MTLAccelerationStructure> accel_struct = [mtl_device
1059 newAccelerationStructureWithSize:accelSizes.accelerationStructureSize];
1060 id<MTLBuffer> scratchBuf = [mtl_device newBufferWithLength:accelSizes.buildScratchBufferSize
1061 options:MTLResourceStorageModePrivate];
1062 id<MTLBuffer> sizeBuf = [mtl_device newBufferWithLength:8
1063 options:MTLResourceStorageModeShared];
1064 id<MTLCommandBuffer> accelCommands = [queue commandBuffer];
1065 id<MTLAccelerationStructureCommandEncoder> accelEnc =
1066 [accelCommands accelerationStructureCommandEncoder];
1067 [accelEnc buildAccelerationStructure:accel_struct
1069 scratchBuffer:scratchBuf
1070 scratchBufferOffset:0];
1071 [accelEnc endEncoding];
1072 [accelCommands commit];
1073 [accelCommands waitUntilCompleted];
1076 [scratchBuf release];
1080 return accel_struct;
1084 uint32_t num_motion_transforms = 0;
1086 for (
Object *ob : objects) {
1089 if (ob->use_motion()) {
1090 num_motion_transforms +=
max((
size_t)1, ob->get_motion().size());
1091 num_motion_instances++;
1094 num_motion_transforms++;
1098 const bool use_instance_motion = motion_blur && num_motion_instances;
1101 NSMutableArray *all_blas = [NSMutableArray
array];
1102 unordered_map<const BVHMetal *, int> instance_mapping;
1105 auto get_blas_index = [&](
const BVHMetal *blas) {
1106 auto it = instance_mapping.find(blas);
1107 if (it != instance_mapping.end()) {
1110 int blas_index = (int)[all_blas
count];
1111 instance_mapping[blas] = blas_index;
1112 if (@available(macos 12.0, *)) {
1113 [all_blas addObject:(blas ? blas->accel_struct : null_BLAS)];
1118 size_t instance_size;
1119 if (use_instance_motion) {
1120 instance_size =
sizeof(MTLAccelerationStructureMotionInstanceDescriptor);
1123 instance_size =
sizeof(MTLAccelerationStructureUserIDInstanceDescriptor);
1127 id<MTLBuffer> instanceBuf = [mtl_device newBufferWithLength:num_instances * instance_size
1128 options:MTLResourceStorageModeShared];
1129 id<MTLBuffer> motion_transforms_buf = nil;
1130 MTLPackedFloat4x3 *matrix_motion_transforms =
nullptr;
1131# if defined(MAC_OS_VERSION_15_0)
1132 MTLComponentTransform *decomposed_motion_transforms =
nullptr;
1134 if (use_instance_motion && num_motion_transforms) {
1135# if defined(MAC_OS_VERSION_15_0)
1137 if (@available(macos 15.0, *)) {
1138 motion_transforms_buf = [mtl_device
1139 newBufferWithLength:num_motion_transforms *
sizeof(MTLComponentTransform)
1140 options:MTLResourceStorageModeShared];
1141 decomposed_motion_transforms = (MTLComponentTransform *)motion_transforms_buf.contents;
1147 motion_transforms_buf = [mtl_device
1148 newBufferWithLength:num_motion_transforms *
sizeof(MTLPackedFloat4x3)
1149 options:MTLResourceStorageModeShared];
1150 matrix_motion_transforms = (MTLPackedFloat4x3 *)motion_transforms_buf.contents;
1155 uint32_t motion_transform_index = 0;
1158 blas_array.reserve(num_instances);
1160 for (
Object *ob : objects) {
1162 const Geometry *geom = ob->get_geometry();
1163 const BVHMetal *blas =
static_cast<const BVHMetal *
>(geom->
bvh.get());
1164 if (!blas || !blas->accel_struct || !ob->is_traceable()) {
1172 null_BLAS = make_null_BLAS(mtl_device, queue);
1174 blas_array.push_back(null_BLAS);
1177 blas_array.push_back(blas->accel_struct);
1180 uint32_t accel_struct_index = get_blas_index(blas);
1192 int currIndex = instance_index++;
1212 if (use_instance_motion) {
1213 MTLAccelerationStructureMotionInstanceDescriptor *instances =
1214 (MTLAccelerationStructureMotionInstanceDescriptor *)[instanceBuf contents];
1215 MTLAccelerationStructureMotionInstanceDescriptor &desc = instances[currIndex];
1217 desc.accelerationStructureIndex = accel_struct_index;
1218 desc.userID = primitive_offset;
1220 desc.motionStartTime = 0.0f;
1221 desc.motionEndTime = 1.0f;
1222 desc.motionTransformsStartIndex = motion_transform_index;
1223 desc.motionStartBorderMode = MTLMotionBorderModeVanish;
1224 desc.motionEndBorderMode = MTLMotionBorderModeVanish;
1225 desc.intersectionFunctionTableOffset = 0;
1229 decomp.data(), ob->get_motion().data(), ob->get_motion().size());
1231 int key_count = ob->get_motion().size();
1233 desc.motionTransformsCount = key_count;
1235# if defined(MAC_OS_VERSION_15_0)
1237 for (
int i = 0;
i < key_count;
i++) {
1238 decomposed_motion_transforms[motion_transform_index++] =
1239 decomposed_to_component_transform(decomp[
i]);
1245 Transform *keys = ob->get_motion().data();
1246 for (
int i = 0;
i < key_count;
i++) {
1247 float *t = (
float *)&matrix_motion_transforms[motion_transform_index++];
1249 const auto *src = (
const float *)&keys[
i];
1250 for (
int i = 0;
i < 12;
i++) {
1251 t[
i] = src[(
i / 3) + 4 * (
i % 3)];
1257 desc.motionTransformsCount = 1;
1259# if defined(MAC_OS_VERSION_15_0)
1261 if (ob->get_geometry()->is_instanced()) {
1264 decomposed_motion_transforms[motion_transform_index++] =
1265 decomposed_to_component_transform(decomp);
1268 decomposed_motion_transforms[motion_transform_index++] =
1269 component_transform_make_unit();
1275 float *t = (
float *)&matrix_motion_transforms[motion_transform_index++];
1276 if (ob->get_geometry()->is_instanced()) {
1278 const auto *src = (
const float *)&ob->get_tfm();
1279 for (
int i = 0;
i < 12;
i++) {
1280 t[
i] = src[(
i / 3) + 4 * (
i % 3)];
1285 t[0] = t[4] = t[8] = 1.0f;
1291 MTLAccelerationStructureUserIDInstanceDescriptor *instances =
1292 (MTLAccelerationStructureUserIDInstanceDescriptor *)[instanceBuf contents];
1293 MTLAccelerationStructureUserIDInstanceDescriptor &desc = instances[currIndex];
1295 desc.accelerationStructureIndex = accel_struct_index;
1296 desc.userID = primitive_offset;
1298 desc.intersectionFunctionTableOffset = 0;
1299 desc.options = MTLAccelerationStructureInstanceOptionOpaque;
1301 float *t = (
float *)&desc.transformationMatrix;
1302 if (ob->get_geometry()->is_instanced()) {
1304 const auto *src = (
const float *)&ob->get_tfm();
1305 for (
int i = 0;
i < 12;
i++) {
1306 t[
i] = src[(
i / 3) + 4 * (
i % 3)];
1311 t[0] = t[4] = t[8] = 1.0f;
1316 if (use_instance_motion) {
1318 "Building motion TLAS | %7d instances | %7d motion instances | %7d motion "
1321 (
int)num_motion_instances,
1322 (
int)num_motion_transforms);
1325 BVH_status(
"Building TLAS | %7d instances", (
int)num_instances);
1328 MTLInstanceAccelerationStructureDescriptor *accelDesc =
1329 [MTLInstanceAccelerationStructureDescriptor
descriptor];
1330 accelDesc.instanceCount = num_instances;
1331 accelDesc.instanceDescriptorType = MTLAccelerationStructureInstanceDescriptorTypeUserID;
1332 accelDesc.instanceDescriptorBuffer = instanceBuf;
1333 accelDesc.instanceDescriptorBufferOffset = 0;
1334 accelDesc.instanceDescriptorStride = instance_size;
1335 accelDesc.instancedAccelerationStructures = all_blas;
1337 if (use_instance_motion) {
1338 accelDesc.instanceDescriptorType = MTLAccelerationStructureInstanceDescriptorTypeMotion;
1339 accelDesc.motionTransformBuffer = motion_transforms_buf;
1340 accelDesc.motionTransformCount = num_motion_transforms;
1341# if defined(MAC_OS_VERSION_15_0)
1342 if (@available(macos 15.0, *)) {
1343 accelDesc.motionTransformStride = 0;
1344 accelDesc.motionTransformType = use_pcmi ? MTLTransformTypeComponent :
1345 MTLTransformTypePackedFloat4x3;
1350 if (extended_limits) {
1351 accelDesc.usage |= MTLAccelerationStructureUsageExtendedLimits;
1353 if (!use_fast_trace_bvh) {
1354 accelDesc.usage |= (MTLAccelerationStructureUsageRefit |
1355 MTLAccelerationStructureUsagePreferFastBuild);
1357 else if (@available(macos 26.0, *)) {
1358 accelDesc.usage |= MTLAccelerationStructureUsagePreferFastIntersection;
1361 MTLAccelerationStructureSizes accelSizes = [mtl_device
1362 accelerationStructureSizesWithDescriptor:accelDesc];
1363 id<MTLAccelerationStructure> accel = [mtl_device
1364 newAccelerationStructureWithSize:accelSizes.accelerationStructureSize];
1365 id<MTLBuffer> scratchBuf = [mtl_device newBufferWithLength:accelSizes.buildScratchBufferSize
1366 options:MTLResourceStorageModePrivate];
1367 id<MTLCommandBuffer> accelCommands = [queue commandBuffer];
1368 id<MTLAccelerationStructureCommandEncoder> accelEnc =
1369 [accelCommands accelerationStructureCommandEncoder];
1371 [accelEnc refitAccelerationStructure:accel_struct
1374 scratchBuffer:scratchBuf
1375 scratchBufferOffset:0];
1378 [accelEnc buildAccelerationStructure:accel
1380 scratchBuffer:scratchBuf
1381 scratchBufferOffset:0];
1383 [accelEnc endEncoding];
1384 [accelCommands commit];
1385 [accelCommands waitUntilCompleted];
1387 if (motion_transforms_buf) {
1388 [motion_transforms_buf release];
1390 [instanceBuf release];
1391 [scratchBuf release];
1394 set_accel_struct(accel);
1396 unique_blas_array.clear();
1397 unique_blas_array.reserve(all_blas.count);
1398 [all_blas enumerateObjectsUsingBlock:^(id<MTLAccelerationStructure> blas, NSUInteger, BOOL *) {
1399 unique_blas_array.push_back(blas);
1407bool BVHMetal::build(
Progress &progress,
1408 id<MTLDevice> mtl_device,
1409 id<MTLCommandQueue> queue,
1412 if (@available(macos 12.0, *)) {
1417 assert(!
"Can't refit static Metal BVH");
1420 else if (!accel_struct) {
1421 assert(!
"Can't refit non-existing Metal BVH");
1427 set_accel_struct(nil);
1431 if (!support_refit_blas()) {
1437 return build_BLAS(progress, mtl_device, queue,
refit);
1439 return build_TLAS(progress, mtl_device, queue,
refit);
BMesh const char void * data
unsigned long long int uint64_t
static btDbvtVolume bounds(btDbvtNode **leaves, int count)
void refit(btStridingMeshInterface *triangles, const btVector3 &aabbMin, const btVector3 &aabbMax)
Attribute * find(ustring name) const
bool is_pointcloud() const
Curve get_curve(const size_t i) const
size_t curve_segment_offset
size_t num_curves() const
CurveShapeType curve_shape
CCL_NAMESPACE_BEGIN struct Options options
#define CCL_NAMESPACE_END
#define assert(assertion)
VecBase< float, D > step(VecOp< float, D >, VecOp< float, D >) RET
float length(VecOp< float, D >) RET
@ ATTR_STD_MOTION_VERTEX_POSITION
ccl_device_inline float2 mask(const MaskType mask, const float2 a)
size_t num_triangles() const
void bounds_grow(const float3 *points, const float *radius, BoundBox &bounds) const
Point get_point(const int i) const
size_t num_points() const
std::unique_lock< std::mutex > thread_scoped_lock