14# include "device/metal/bvh.h"
19# define BVH_status(...) \
21 string str = string_printf(__VA_ARGS__); \
22 progress.set_substatus(str); \
23 metal_printf("%s\n", str.c_str()); \
27# ifdef BVH_THROTTLE_DIAGNOSTICS
28# define bvh_throttle_printf(...) printf("BVHMetalBuildThrottler::" __VA_ARGS__)
30# define bvh_throttle_printf(...)
35struct BVHMetalBuildThrottler {
37 size_t wired_memory = 0;
38 size_t safe_wired_limit = 0;
39 int requests_in_flight = 0;
41 BVHMetalBuildThrottler()
45 id<MTLDevice> mtlDevice = MTLCreateSystemDefaultDevice();
48 safe_wired_limit = [mtlDevice recommendedMaxWorkingSetSize] / 4;
49 bvh_throttle_printf(
"safe_wired_limit = %zu\n", safe_wired_limit);
53 void acquire(
size_t bytes_to_be_wired)
55 bool throttled =
false;
62 if (wired_memory == 0 || wired_memory + bytes_to_be_wired <= safe_wired_limit) {
63 wired_memory += bytes_to_be_wired;
64 requests_in_flight += 1;
65 bvh_throttle_printf(
"acquire -- success (requests_in_flight = %d, wired_memory = %zu)\n",
73 "acquire -- throttling (requests_in_flight = %d, wired_memory = %zu, "
74 "bytes_to_be_wired = %zu)\n",
82 std::this_thread::sleep_for(std::chrono::milliseconds(10));
87 void release(
size_t bytes_just_unwired)
90 wired_memory -= bytes_just_unwired;
91 requests_in_flight -= 1;
92 bvh_throttle_printf(
"release (requests_in_flight = %d, wired_memory = %zu)\n",
103 if (wired_memory == 0) {
107 std::this_thread::sleep_for(std::chrono::milliseconds(10));
110} g_bvh_build_throttler;
112BVHMetal::BVHMetal(
const BVHParams ¶ms_,
116 :
BVH(params_, geometry_, objects_), device(device)
122 if (@available(macos 12.0, *)) {
123 set_accel_struct(nil);
130API_AVAILABLE(macos(11.0))
131void BVHMetal::set_accel_struct(
id<MTLAccelerationStructure> new_accel_struct)
133 if (@available(macos 12.0, *)) {
135 device->stats.mem_free(accel_struct.allocatedSize);
136 [accel_struct release];
140 if (new_accel_struct) {
141 accel_struct = new_accel_struct;
142 device->stats.mem_alloc(accel_struct.allocatedSize);
147bool BVHMetal::build_BLAS_mesh(
Progress &progress,
148 id<MTLDevice> mtl_device,
149 id<MTLCommandQueue> queue,
153 if (@available(macos 12.0, *)) {
155 Mesh *
const mesh =
static_cast<Mesh *const
>(geom);
156 if (mesh->num_triangles() == 0) {
162 "Building mesh BLAS | %7d tris | %s", (
int)mesh->num_triangles(), geom->
name.c_str());
168 const array<int> &tris = mesh->get_triangles();
170 const size_t num_indices = tris.
size();
172 size_t num_motion_steps = 1;
174 if (motion_blur && mesh->get_use_motion_blur() && motion_keys) {
175 num_motion_steps = mesh->get_motion_steps();
178 MTLResourceOptions storage_mode;
179 if (mtl_device.hasUnifiedMemory) {
180 storage_mode = MTLResourceStorageModeShared;
183 storage_mode = MTLResourceStorageModeManaged;
187 id<MTLBuffer> posBuf = nil;
188 id<MTLBuffer> indexBuf = [mtl_device newBufferWithBytes:tris.
data()
192 if (num_motion_steps == 1) {
193 posBuf = [mtl_device newBufferWithBytes:
verts.data()
199 newBufferWithLength:num_verts * num_motion_steps *
sizeof(
verts.data()[0])
202 size_t center_step = (num_motion_steps - 1) / 2;
203 for (
size_t step = 0;
step < num_motion_steps; ++
step) {
207 if (step != center_step) {
210 memcpy(dest_data + num_verts * step,
verts, num_verts *
sizeof(
float3));
212 if (storage_mode == MTLResourceStorageModeManaged) {
213 [posBuf didModifyRange:NSMakeRange(0, posBuf.length)];
218 MTLAccelerationStructureGeometryDescriptor *geomDesc;
219 if (num_motion_steps > 1) {
220 std::vector<MTLMotionKeyframeData *> vertex_ptrs;
221 vertex_ptrs.reserve(num_motion_steps);
222 for (
size_t step = 0;
step < num_motion_steps; ++
step) {
223 MTLMotionKeyframeData *k = [MTLMotionKeyframeData
data];
226 vertex_ptrs.push_back(k);
229 MTLAccelerationStructureMotionTriangleGeometryDescriptor *geomDescMotion =
230 [MTLAccelerationStructureMotionTriangleGeometryDescriptor
descriptor];
231 geomDescMotion.vertexBuffers = [NSArray arrayWithObjects:vertex_ptrs.data()
232 count:vertex_ptrs.size()];
233 geomDescMotion.vertexStride =
sizeof(
verts.data()[0]);
234 geomDescMotion.indexBuffer = indexBuf;
235 geomDescMotion.indexBufferOffset = 0;
236 geomDescMotion.indexType = MTLIndexTypeUInt32;
237 geomDescMotion.triangleCount = num_indices / 3;
238 geomDescMotion.intersectionFunctionTableOffset = 0;
239 geomDescMotion.opaque =
true;
241 geomDesc = geomDescMotion;
244 MTLAccelerationStructureTriangleGeometryDescriptor *geomDescNoMotion =
245 [MTLAccelerationStructureTriangleGeometryDescriptor
descriptor];
246 geomDescNoMotion.vertexBuffer = posBuf;
247 geomDescNoMotion.vertexBufferOffset = 0;
248 geomDescNoMotion.vertexStride =
sizeof(
verts.data()[0]);
249 geomDescNoMotion.indexBuffer = indexBuf;
250 geomDescNoMotion.indexBufferOffset = 0;
251 geomDescNoMotion.indexType = MTLIndexTypeUInt32;
252 geomDescNoMotion.triangleCount = num_indices / 3;
253 geomDescNoMotion.intersectionFunctionTableOffset = 0;
254 geomDescNoMotion.opaque =
true;
256 geomDesc = geomDescNoMotion;
262 geomDesc.allowDuplicateIntersectionFunctionInvocation =
false;
264 MTLPrimitiveAccelerationStructureDescriptor *accelDesc =
265 [MTLPrimitiveAccelerationStructureDescriptor
descriptor];
266 accelDesc.geometryDescriptors = @[ geomDesc ];
267 if (num_motion_steps > 1) {
268 accelDesc.motionStartTime = 0.0f;
269 accelDesc.motionEndTime = 1.0f;
270 accelDesc.motionStartBorderMode = MTLMotionBorderModeClamp;
271 accelDesc.motionEndBorderMode = MTLMotionBorderModeClamp;
272 accelDesc.motionKeyframeCount = num_motion_steps;
274 accelDesc.usage |= MTLAccelerationStructureUsageExtendedLimits;
276 if (!use_fast_trace_bvh) {
277 accelDesc.usage |= (MTLAccelerationStructureUsageRefit |
278 MTLAccelerationStructureUsagePreferFastBuild);
281 MTLAccelerationStructureSizes accelSizes = [mtl_device
282 accelerationStructureSizesWithDescriptor:accelDesc];
283 id<MTLAccelerationStructure> accel_uncompressed = [mtl_device
284 newAccelerationStructureWithSize:accelSizes.accelerationStructureSize];
285 id<MTLBuffer> scratchBuf = [mtl_device newBufferWithLength:accelSizes.buildScratchBufferSize
286 options:MTLResourceStorageModePrivate];
287 id<MTLBuffer> sizeBuf = [mtl_device newBufferWithLength:8
288 options:MTLResourceStorageModeShared];
289 id<MTLCommandBuffer> accelCommands = [queue commandBuffer];
290 id<MTLAccelerationStructureCommandEncoder> accelEnc =
291 [accelCommands accelerationStructureCommandEncoder];
293 [accelEnc refitAccelerationStructure:accel_struct
295 destination:accel_uncompressed
296 scratchBuffer:scratchBuf
297 scratchBufferOffset:0];
300 [accelEnc buildAccelerationStructure:accel_uncompressed
302 scratchBuffer:scratchBuf
303 scratchBufferOffset:0];
305 if (use_fast_trace_bvh) {
306 [accelEnc writeCompactedAccelerationStructureSize:accel_uncompressed
309 sizeDataType:MTLDataTypeULong];
311 [accelEnc endEncoding];
315 size_t wired_size = posBuf.allocatedSize + indexBuf.allocatedSize + scratchBuf.allocatedSize +
316 accel_uncompressed.allocatedSize * 2;
318 [accelCommands addCompletedHandler:^(id<MTLCommandBuffer> ) {
320 [scratchBuf release];
324 if (use_fast_trace_bvh) {
328 dispatch_async(dispatch_get_global_queue(DISPATCH_QUEUE_PRIORITY_DEFAULT, 0), ^{
329 id<MTLCommandBuffer> accelCommands = [queue commandBuffer];
330 id<MTLAccelerationStructureCommandEncoder> accelEnc =
331 [accelCommands accelerationStructureCommandEncoder];
332 id<MTLAccelerationStructure> accel = [mtl_device
333 newAccelerationStructureWithSize:compressed_size];
334 [accelEnc copyAndCompactAccelerationStructure:accel_uncompressed
335 toAccelerationStructure:accel];
336 [accelEnc endEncoding];
337 [accelCommands addCompletedHandler:^(id<MTLCommandBuffer> ) {
338 set_accel_struct(accel);
339 [accel_uncompressed release];
342 g_bvh_build_throttler.release(wired_size);
344 [accelCommands commit];
349 set_accel_struct(accel_uncompressed);
352 g_bvh_build_throttler.release(wired_size);
359 g_bvh_build_throttler.acquire(wired_size);
360 [accelCommands commit];
367bool BVHMetal::build_BLAS_hair(
Progress &progress,
368 id<MTLDevice> mtl_device,
369 id<MTLCommandQueue> queue,
373# if defined(MAC_OS_VERSION_14_0)
374 if (@available(macos 14.0, *)) {
376 Hair *hair =
static_cast<Hair *
>(geom);
377 if (hair->num_curves() == 0) {
383 "Building hair BLAS | %7d curves | %s", (
int)hair->num_curves(), geom->
name.c_str());
388 size_t num_motion_steps = 1;
390 if (motion_blur && hair->get_use_motion_blur() && motion_keys) {
391 num_motion_steps = hair->get_motion_steps();
394 MTLResourceOptions storage_mode;
395 if (mtl_device.hasUnifiedMemory) {
396 storage_mode = MTLResourceStorageModeShared;
399 storage_mode = MTLResourceStorageModeManaged;
402 id<MTLBuffer> cpBuffer = nil;
403 id<MTLBuffer> radiusBuffer = nil;
404 id<MTLBuffer> idxBuffer = nil;
406 MTLAccelerationStructureGeometryDescriptor *geomDesc;
408 MTLAccelerationStructureMotionCurveGeometryDescriptor *geomDescCrv =
409 [MTLAccelerationStructureMotionCurveGeometryDescriptor
descriptor];
411 uint64_t numKeys = hair->num_keys();
412 uint64_t numCurves = hair->num_curves();
413 const array<float> &radiuses = hair->get_curve_radius();
416 std::vector<float3> cpData;
417 std::vector<int> idxData;
418 std::vector<float> radiusData;
419 cpData.reserve(numKeys);
420 radiusData.reserve(numKeys);
422 std::vector<int> step_offsets;
423 for (
size_t step = 0;
step < num_motion_steps; ++
step) {
426 const float3 *keys = hair->get_curve_keys().data();
427 size_t center_step = (num_motion_steps - 1) / 2;
428 if (step != center_step) {
429 size_t attr_offset = (
step > center_step) ? step - 1 :
step;
431 keys = motion_keys->
data_float3() + attr_offset * numKeys;
434 step_offsets.push_back(cpData.size());
436 for (
int c = 0; c < numCurves; ++c) {
439 int firstKey = curve.first_key;
441 cpData.push_back(keys[firstKey]);
442 radiusData.push_back(radiuses[firstKey]);
443 for (
int s = 0; s < segCount; ++s) {
445 idxData.push_back(idxBase + s);
447 cpData.push_back(keys[firstKey + s]);
448 radiusData.push_back(radiuses[firstKey + s]);
450 cpData.push_back(keys[firstKey + curve.num_keys - 1]);
451 cpData.push_back(keys[firstKey + curve.num_keys - 1]);
452 radiusData.push_back(radiuses[firstKey + curve.num_keys - 1]);
453 radiusData.push_back(radiuses[firstKey + curve.num_keys - 1]);
458 idxBuffer = [mtl_device newBufferWithBytes:idxData.data()
462 cpBuffer = [mtl_device newBufferWithBytes:cpData.data()
466 radiusBuffer = [mtl_device newBufferWithBytes:radiusData.data()
470 std::vector<MTLMotionKeyframeData *> cp_ptrs;
471 std::vector<MTLMotionKeyframeData *> radius_ptrs;
472 cp_ptrs.reserve(num_motion_steps);
473 radius_ptrs.reserve(num_motion_steps);
475 for (
size_t step = 0;
step < num_motion_steps; ++
step) {
476 MTLMotionKeyframeData *k = [MTLMotionKeyframeData
data];
478 k.offset = step_offsets[
step] *
sizeof(
float3);
479 cp_ptrs.push_back(k);
481 k = [MTLMotionKeyframeData
data];
482 k.buffer = radiusBuffer;
483 k.offset = step_offsets[
step] *
sizeof(
float);
484 radius_ptrs.push_back(k);
487 if (storage_mode == MTLResourceStorageModeManaged) {
488 [cpBuffer didModifyRange:NSMakeRange(0, cpBuffer.length)];
489 [idxBuffer didModifyRange:NSMakeRange(0, idxBuffer.length)];
490 [radiusBuffer didModifyRange:NSMakeRange(0, radiusBuffer.length)];
493 geomDescCrv.controlPointBuffers = [NSArray arrayWithObjects:cp_ptrs.data()
494 count:cp_ptrs.size()];
495 geomDescCrv.radiusBuffers = [NSArray arrayWithObjects:radius_ptrs.data()
496 count:radius_ptrs.size()];
498 geomDescCrv.controlPointCount = cpData.size();
499 geomDescCrv.controlPointStride =
sizeof(
float3);
500 geomDescCrv.controlPointFormat = MTLAttributeFormatFloat3;
501 geomDescCrv.radiusStride =
sizeof(
float);
502 geomDescCrv.radiusFormat = MTLAttributeFormatFloat;
503 geomDescCrv.segmentCount = idxData.size();
504 geomDescCrv.segmentControlPointCount = 4;
505 geomDescCrv.curveType = (hair->curve_shape ==
CURVE_RIBBON) ? MTLCurveTypeFlat :
507 geomDescCrv.curveBasis = MTLCurveBasisCatmullRom;
508 geomDescCrv.curveEndCaps = MTLCurveEndCapsDisk;
509 geomDescCrv.indexType = MTLIndexTypeUInt32;
510 geomDescCrv.indexBuffer = idxBuffer;
511 geomDescCrv.intersectionFunctionTableOffset = 1;
516 geomDescCrv.allowDuplicateIntersectionFunctionInvocation =
false;
517 geomDescCrv.opaque =
true;
518 geomDesc = geomDescCrv;
521 MTLAccelerationStructureCurveGeometryDescriptor *geomDescCrv =
522 [MTLAccelerationStructureCurveGeometryDescriptor
descriptor];
524 uint64_t numKeys = hair->num_keys();
525 uint64_t numCurves = hair->num_curves();
526 const array<float> &radiuses = hair->get_curve_radius();
529 std::vector<float3> cpData;
530 std::vector<int> idxData;
531 std::vector<float> radiusData;
532 cpData.reserve(numKeys);
533 radiusData.reserve(numKeys);
534 auto keys = hair->get_curve_keys();
535 for (
int c = 0; c < numCurves; ++c) {
537 int segCount = curve.num_segments();
538 int firstKey = curve.first_key;
539 radiusData.push_back(radiuses[firstKey]);
541 cpData.push_back(keys[firstKey]);
542 for (
int s = 0; s < segCount; ++s) {
543 idxData.push_back(idxBase + s);
544 cpData.push_back(keys[firstKey + s]);
545 radiusData.push_back(radiuses[firstKey + s]);
547 cpData.push_back(keys[firstKey + curve.num_keys - 1]);
548 cpData.push_back(keys[firstKey + curve.num_keys - 1]);
549 radiusData.push_back(radiuses[firstKey + curve.num_keys - 1]);
550 radiusData.push_back(radiuses[firstKey + curve.num_keys - 1]);
554 idxBuffer = [mtl_device newBufferWithBytes:idxData.data()
558 cpBuffer = [mtl_device newBufferWithBytes:cpData.data()
562 radiusBuffer = [mtl_device newBufferWithBytes:radiusData.data()
566 if (storage_mode == MTLResourceStorageModeManaged) {
567 [cpBuffer didModifyRange:NSMakeRange(0, cpBuffer.length)];
568 [idxBuffer didModifyRange:NSMakeRange(0, idxBuffer.length)];
569 [radiusBuffer didModifyRange:NSMakeRange(0, radiusBuffer.length)];
571 geomDescCrv.controlPointBuffer = cpBuffer;
572 geomDescCrv.radiusBuffer = radiusBuffer;
573 geomDescCrv.controlPointCount = cpData.size();
574 geomDescCrv.controlPointStride =
sizeof(
float3);
575 geomDescCrv.controlPointFormat = MTLAttributeFormatFloat3;
576 geomDescCrv.controlPointBufferOffset = 0;
577 geomDescCrv.segmentCount = idxData.size();
578 geomDescCrv.segmentControlPointCount = 4;
579 geomDescCrv.curveType = (hair->curve_shape ==
CURVE_RIBBON) ? MTLCurveTypeFlat :
581 geomDescCrv.curveBasis = MTLCurveBasisCatmullRom;
582 geomDescCrv.curveEndCaps = MTLCurveEndCapsDisk;
583 geomDescCrv.indexType = MTLIndexTypeUInt32;
584 geomDescCrv.indexBuffer = idxBuffer;
585 geomDescCrv.intersectionFunctionTableOffset = 1;
590 geomDescCrv.allowDuplicateIntersectionFunctionInvocation =
false;
591 geomDescCrv.opaque =
true;
592 geomDesc = geomDescCrv;
595 MTLPrimitiveAccelerationStructureDescriptor *accelDesc =
596 [MTLPrimitiveAccelerationStructureDescriptor
descriptor];
597 accelDesc.geometryDescriptors = @[ geomDesc ];
600 accelDesc.motionStartTime = 0.0f;
601 accelDesc.motionEndTime = 1.0f;
602 accelDesc.motionStartBorderMode = MTLMotionBorderModeVanish;
603 accelDesc.motionEndBorderMode = MTLMotionBorderModeVanish;
604 accelDesc.motionKeyframeCount = num_motion_steps;
607 if (!use_fast_trace_bvh) {
608 accelDesc.usage |= (MTLAccelerationStructureUsageRefit |
609 MTLAccelerationStructureUsagePreferFastBuild);
611 accelDesc.usage |= MTLAccelerationStructureUsageExtendedLimits;
613 MTLAccelerationStructureSizes accelSizes = [mtl_device
614 accelerationStructureSizesWithDescriptor:accelDesc];
615 id<MTLAccelerationStructure> accel_uncompressed = [mtl_device
616 newAccelerationStructureWithSize:accelSizes.accelerationStructureSize];
617 id<MTLBuffer> scratchBuf = [mtl_device newBufferWithLength:accelSizes.buildScratchBufferSize
618 options:MTLResourceStorageModePrivate];
619 id<MTLBuffer> sizeBuf = [mtl_device newBufferWithLength:8
620 options:MTLResourceStorageModeShared];
621 id<MTLCommandBuffer> accelCommands = [queue commandBuffer];
622 id<MTLAccelerationStructureCommandEncoder> accelEnc =
623 [accelCommands accelerationStructureCommandEncoder];
625 [accelEnc refitAccelerationStructure:accel_struct
627 destination:accel_uncompressed
628 scratchBuffer:scratchBuf
629 scratchBufferOffset:0];
632 [accelEnc buildAccelerationStructure:accel_uncompressed
634 scratchBuffer:scratchBuf
635 scratchBufferOffset:0];
637 if (use_fast_trace_bvh) {
638 [accelEnc writeCompactedAccelerationStructureSize:accel_uncompressed
641 sizeDataType:MTLDataTypeULong];
643 [accelEnc endEncoding];
647 size_t wired_size = cpBuffer.allocatedSize + radiusBuffer.allocatedSize +
648 idxBuffer.allocatedSize + scratchBuf.allocatedSize +
649 accel_uncompressed.allocatedSize * 2;
651 [accelCommands addCompletedHandler:^(id<MTLCommandBuffer> ) {
653 [scratchBuf release];
655 [radiusBuffer release];
658 if (use_fast_trace_bvh) {
661 dispatch_async(dispatch_get_global_queue(DISPATCH_QUEUE_PRIORITY_DEFAULT, 0), ^{
662 id<MTLCommandBuffer> accelCommands = [queue commandBuffer];
663 id<MTLAccelerationStructureCommandEncoder> accelEnc =
664 [accelCommands accelerationStructureCommandEncoder];
665 id<MTLAccelerationStructure> accel = [mtl_device
666 newAccelerationStructureWithSize:compressed_size];
667 [accelEnc copyAndCompactAccelerationStructure:accel_uncompressed
668 toAccelerationStructure:accel];
669 [accelEnc endEncoding];
670 [accelCommands addCompletedHandler:^(id<MTLCommandBuffer> ) {
671 set_accel_struct(accel);
672 [accel_uncompressed release];
675 g_bvh_build_throttler.release(wired_size);
677 [accelCommands commit];
682 set_accel_struct(accel_uncompressed);
685 g_bvh_build_throttler.release(wired_size);
692 g_bvh_build_throttler.acquire(wired_size);
693 [accelCommands commit];
707bool BVHMetal::build_BLAS_pointcloud(
Progress &progress,
708 id<MTLDevice> mtl_device,
709 id<MTLCommandQueue> queue,
713 if (@available(macos 12.0, *)) {
721 BVH_status(
"Building pointcloud BLAS | %7d points | %s",
726 const size_t num_points = pointcloud->get_points().size();
727 const float3 *points = pointcloud->get_points().data();
728 const float *radius = pointcloud->get_radius().data();
732 size_t num_motion_steps = 1;
734 if (motion_blur && pointcloud->get_use_motion_blur() && motion_keys) {
735 num_motion_steps = pointcloud->get_motion_steps();
738 const size_t num_aabbs = num_motion_steps * num_points;
740 MTLResourceOptions storage_mode;
741 if (mtl_device.hasUnifiedMemory) {
742 storage_mode = MTLResourceStorageModeShared;
745 storage_mode = MTLResourceStorageModeManaged;
749 id<MTLBuffer> aabbBuf = [mtl_device
750 newBufferWithLength:num_aabbs *
sizeof(MTLAxisAlignedBoundingBox)
752 MTLAxisAlignedBoundingBox *aabb_data = (MTLAxisAlignedBoundingBox *)[aabbBuf contents];
755 size_t center_step = (num_motion_steps - 1) / 2;
756 for (
size_t step = 0;
step < num_motion_steps; ++
step) {
757 if (step == center_step) {
759 for (
size_t j = 0; j < num_points; ++j) {
762 point.bounds_grow(points, radius,
bounds);
764 const size_t index =
step * num_points + j;
765 aabb_data[index].min = (MTLPackedFloat3 &)
bounds.min;
766 aabb_data[index].max = (MTLPackedFloat3 &)
bounds.max;
770 size_t attr_offset = (
step > center_step) ? step - 1 :
step;
773 for (
size_t j = 0; j < num_points; ++j) {
776 point.bounds_grow(motion_points[j],
bounds);
778 const size_t index =
step * num_points + j;
779 aabb_data[index].min = (MTLPackedFloat3 &)
bounds.min;
780 aabb_data[index].max = (MTLPackedFloat3 &)
bounds.max;
785 if (storage_mode == MTLResourceStorageModeManaged) {
786 [aabbBuf didModifyRange:NSMakeRange(0, aabbBuf.length)];
790 for (
size_t i=0; i<num_aabbs && i < 400; i++) {
791 MTLAxisAlignedBoundingBox& bb = aabb_data[i];
792 printf(
" %d: %.1f,%.1f,%.1f -- %.1f,%.1f,%.1f\n",
int(i), bb.min.x, bb.min.y, bb.min.z, bb.max.x, bb.max.y, bb.max.z);
796 MTLAccelerationStructureGeometryDescriptor *geomDesc;
798 std::vector<MTLMotionKeyframeData *> aabb_ptrs;
799 aabb_ptrs.reserve(num_motion_steps);
800 for (
size_t step = 0;
step < num_motion_steps; ++
step) {
801 MTLMotionKeyframeData *k = [MTLMotionKeyframeData
data];
803 k.offset =
step * num_points *
sizeof(MTLAxisAlignedBoundingBox);
804 aabb_ptrs.push_back(k);
807 MTLAccelerationStructureMotionBoundingBoxGeometryDescriptor *geomDescMotion =
808 [MTLAccelerationStructureMotionBoundingBoxGeometryDescriptor
descriptor];
809 geomDescMotion.boundingBoxBuffers = [NSArray arrayWithObjects:aabb_ptrs.data()
810 count:aabb_ptrs.size()];
811 geomDescMotion.boundingBoxCount = num_points;
812 geomDescMotion.boundingBoxStride =
sizeof(aabb_data[0]);
813 geomDescMotion.intersectionFunctionTableOffset = 2;
818 geomDescMotion.allowDuplicateIntersectionFunctionInvocation =
false;
819 geomDescMotion.opaque =
true;
820 geomDesc = geomDescMotion;
823 MTLAccelerationStructureBoundingBoxGeometryDescriptor *geomDescNoMotion =
824 [MTLAccelerationStructureBoundingBoxGeometryDescriptor
descriptor];
825 geomDescNoMotion.boundingBoxBuffer = aabbBuf;
826 geomDescNoMotion.boundingBoxBufferOffset = 0;
827 geomDescNoMotion.boundingBoxCount =
int(num_aabbs);
828 geomDescNoMotion.boundingBoxStride =
sizeof(aabb_data[0]);
829 geomDescNoMotion.intersectionFunctionTableOffset = 2;
834 geomDescNoMotion.allowDuplicateIntersectionFunctionInvocation =
false;
835 geomDescNoMotion.opaque =
true;
836 geomDesc = geomDescNoMotion;
839 MTLPrimitiveAccelerationStructureDescriptor *accelDesc =
840 [MTLPrimitiveAccelerationStructureDescriptor
descriptor];
841 accelDesc.geometryDescriptors = @[ geomDesc ];
844 accelDesc.motionStartTime = 0.0f;
845 accelDesc.motionEndTime = 1.0f;
848 accelDesc.motionKeyframeCount = num_motion_steps;
850 accelDesc.usage |= MTLAccelerationStructureUsageExtendedLimits;
852 if (!use_fast_trace_bvh) {
853 accelDesc.usage |= (MTLAccelerationStructureUsageRefit |
854 MTLAccelerationStructureUsagePreferFastBuild);
857 MTLAccelerationStructureSizes accelSizes = [mtl_device
858 accelerationStructureSizesWithDescriptor:accelDesc];
859 id<MTLAccelerationStructure> accel_uncompressed = [mtl_device
860 newAccelerationStructureWithSize:accelSizes.accelerationStructureSize];
861 id<MTLBuffer> scratchBuf = [mtl_device newBufferWithLength:accelSizes.buildScratchBufferSize
862 options:MTLResourceStorageModePrivate];
863 id<MTLBuffer> sizeBuf = [mtl_device newBufferWithLength:8
864 options:MTLResourceStorageModeShared];
865 id<MTLCommandBuffer> accelCommands = [queue commandBuffer];
866 id<MTLAccelerationStructureCommandEncoder> accelEnc =
867 [accelCommands accelerationStructureCommandEncoder];
869 [accelEnc refitAccelerationStructure:accel_struct
871 destination:accel_uncompressed
872 scratchBuffer:scratchBuf
873 scratchBufferOffset:0];
876 [accelEnc buildAccelerationStructure:accel_uncompressed
878 scratchBuffer:scratchBuf
879 scratchBufferOffset:0];
881 if (use_fast_trace_bvh) {
882 [accelEnc writeCompactedAccelerationStructureSize:accel_uncompressed
885 sizeDataType:MTLDataTypeULong];
887 [accelEnc endEncoding];
891 size_t wired_size = aabbBuf.allocatedSize + scratchBuf.allocatedSize +
892 accel_uncompressed.allocatedSize * 2;
894 [accelCommands addCompletedHandler:^(id<MTLCommandBuffer> ) {
896 [scratchBuf release];
899 if (use_fast_trace_bvh) {
903 dispatch_async(dispatch_get_global_queue(DISPATCH_QUEUE_PRIORITY_DEFAULT, 0), ^{
904 id<MTLCommandBuffer> accelCommands = [queue commandBuffer];
905 id<MTLAccelerationStructureCommandEncoder> accelEnc =
906 [accelCommands accelerationStructureCommandEncoder];
907 id<MTLAccelerationStructure> accel = [mtl_device
908 newAccelerationStructureWithSize:compressed_size];
909 [accelEnc copyAndCompactAccelerationStructure:accel_uncompressed
910 toAccelerationStructure:accel];
911 [accelEnc endEncoding];
912 [accelCommands addCompletedHandler:^(id<MTLCommandBuffer> ) {
913 set_accel_struct(accel);
914 [accel_uncompressed release];
917 g_bvh_build_throttler.release(wired_size);
919 [accelCommands commit];
924 set_accel_struct(accel_uncompressed);
927 g_bvh_build_throttler.release(wired_size);
934 g_bvh_build_throttler.acquire(wired_size);
935 [accelCommands commit];
941bool BVHMetal::build_BLAS(
Progress &progress,
942 id<MTLDevice> mtl_device,
943 id<MTLCommandQueue> queue,
946 assert(objects.size() == 1 && geometry.size() == 1);
953 return build_BLAS_mesh(progress, mtl_device, queue, geom,
refit);
955 return build_BLAS_hair(progress, mtl_device, queue, geom,
refit);
957 return build_BLAS_pointcloud(progress, mtl_device, queue, geom,
refit);
964bool BVHMetal::build_TLAS(
Progress &progress,
965 id<MTLDevice> mtl_device,
966 id<MTLCommandQueue> queue,
970 g_bvh_build_throttler.wait_for_all();
972 if (@available(macos 12.0, *)) {
974 auto make_null_BLAS = [](id<MTLDevice> mtl_device,
975 id<MTLCommandQueue> queue) -> id<MTLAccelerationStructure> {
976 MTLResourceOptions storage_mode = MTLResourceStorageModeManaged;
977 if (mtl_device.hasUnifiedMemory) {
978 storage_mode = MTLResourceStorageModeShared;
981 id<MTLBuffer> nullBuf = [mtl_device newBufferWithLength:
sizeof(
float3)
options:storage_mode];
984 MTLAccelerationStructureTriangleGeometryDescriptor *geomDesc =
985 [MTLAccelerationStructureTriangleGeometryDescriptor
descriptor];
986 geomDesc.vertexBuffer = nullBuf;
987 geomDesc.vertexBufferOffset = 0;
988 geomDesc.vertexStride =
sizeof(
float3);
989 geomDesc.indexBuffer = nullBuf;
990 geomDesc.indexBufferOffset = 0;
991 geomDesc.indexType = MTLIndexTypeUInt32;
992 geomDesc.triangleCount = 0;
993 geomDesc.intersectionFunctionTableOffset = 0;
994 geomDesc.opaque =
true;
995 geomDesc.allowDuplicateIntersectionFunctionInvocation =
false;
997 MTLPrimitiveAccelerationStructureDescriptor *accelDesc =
998 [MTLPrimitiveAccelerationStructureDescriptor
descriptor];
999 accelDesc.geometryDescriptors = @[ geomDesc ];
1000 accelDesc.usage |= MTLAccelerationStructureUsageExtendedLimits;
1002 MTLAccelerationStructureSizes accelSizes = [mtl_device
1003 accelerationStructureSizesWithDescriptor:accelDesc];
1004 id<MTLAccelerationStructure> accel_struct = [mtl_device
1005 newAccelerationStructureWithSize:accelSizes.accelerationStructureSize];
1006 id<MTLBuffer> scratchBuf = [mtl_device newBufferWithLength:accelSizes.buildScratchBufferSize
1007 options:MTLResourceStorageModePrivate];
1008 id<MTLBuffer> sizeBuf = [mtl_device newBufferWithLength:8
1009 options:MTLResourceStorageModeShared];
1010 id<MTLCommandBuffer> accelCommands = [queue commandBuffer];
1011 id<MTLAccelerationStructureCommandEncoder> accelEnc =
1012 [accelCommands accelerationStructureCommandEncoder];
1013 [accelEnc buildAccelerationStructure:accel_struct
1015 scratchBuffer:scratchBuf
1016 scratchBufferOffset:0];
1017 [accelEnc endEncoding];
1018 [accelCommands commit];
1019 [accelCommands waitUntilCompleted];
1022 [scratchBuf release];
1026 return accel_struct;
1030 uint32_t num_motion_transforms = 0;
1031 for (
Object *ob : objects) {
1034 if (ob->use_motion()) {
1035 num_motion_transforms +=
max((
size_t)1, ob->get_motion().size());
1038 num_motion_transforms++;
1042 if (num_instances == 0) {
1047 BVH_status(
"Building TLAS | %7d instances", (
int)num_instances);
1052 NSMutableArray *all_blas = [NSMutableArray
array];
1053 unordered_map<BVHMetal const *, int> instance_mapping;
1056 auto get_blas_index = [&](BVHMetal
const *blas) {
1057 auto it = instance_mapping.find(blas);
1058 if (it != instance_mapping.end()) {
1062 int blas_index = (
int)[all_blas
count];
1063 instance_mapping[blas] = blas_index;
1064 if (@available(macos 12.0, *)) {
1065 [all_blas addObject:(blas ? blas->accel_struct : null_BLAS)];
1071 MTLResourceOptions storage_mode;
1072 if (mtl_device.hasUnifiedMemory) {
1073 storage_mode = MTLResourceStorageModeShared;
1076 storage_mode = MTLResourceStorageModeManaged;
1079 size_t instance_size;
1081 instance_size =
sizeof(MTLAccelerationStructureMotionInstanceDescriptor);
1084 instance_size =
sizeof(MTLAccelerationStructureUserIDInstanceDescriptor);
1088 id<MTLBuffer> instanceBuf = [mtl_device newBufferWithLength:num_instances * instance_size
1090 id<MTLBuffer> motion_transforms_buf = nil;
1091 MTLPackedFloat4x3 *motion_transforms =
nullptr;
1092 if (motion_blur && num_motion_transforms) {
1093 motion_transforms_buf = [mtl_device
1094 newBufferWithLength:num_motion_transforms *
sizeof(MTLPackedFloat4x3)
1096 motion_transforms = (MTLPackedFloat4x3 *)motion_transforms_buf.contents;
1100 uint32_t motion_transform_index = 0;
1103 blas_array.reserve(num_instances);
1105 for (
Object *ob : objects) {
1107 Geometry const *geom = ob->get_geometry();
1108 BVHMetal
const *blas =
static_cast<BVHMetal
const *
>(geom->
bvh);
1109 if (!blas || !blas->accel_struct || !ob->is_traceable()) {
1117 null_BLAS = make_null_BLAS(mtl_device, queue);
1119 blas_array.push_back(null_BLAS);
1122 blas_array.push_back(blas->accel_struct);
1125 uint32_t accel_struct_index = get_blas_index(blas);
1131 uint32_t mask = ob->visibility_for_tracing();
1140 int currIndex = instance_index++;
1145 primitive_offset =
uint32_t(hair->curve_segment_offset);
1150 primitive_offset =
uint32_t(mesh->prim_offset);
1161 MTLAccelerationStructureMotionInstanceDescriptor *instances =
1162 (MTLAccelerationStructureMotionInstanceDescriptor *)[instanceBuf contents];
1163 MTLAccelerationStructureMotionInstanceDescriptor &desc = instances[currIndex];
1165 desc.accelerationStructureIndex = accel_struct_index;
1166 desc.userID = primitive_offset;
1168 desc.motionStartTime = 0.0f;
1169 desc.motionEndTime = 1.0f;
1170 desc.motionTransformsStartIndex = motion_transform_index;
1171 desc.motionStartBorderMode = MTLMotionBorderModeVanish;
1172 desc.motionEndBorderMode = MTLMotionBorderModeVanish;
1173 desc.intersectionFunctionTableOffset = 0;
1175 int key_count = ob->get_motion().size();
1177 desc.motionTransformsCount = key_count;
1179 Transform *keys = ob->get_motion().data();
1180 for (
int i = 0; i < key_count; i++) {
1181 float *t = (
float *)&motion_transforms[motion_transform_index++];
1183 auto src = (
float const *)&keys[i];
1184 for (
int i = 0; i < 12; i++) {
1185 t[i] = src[(i / 3) + 4 * (i % 3)];
1190 desc.motionTransformsCount = 1;
1192 float *t = (
float *)&motion_transforms[motion_transform_index++];
1193 if (ob->get_geometry()->is_instanced()) {
1195 auto src = (
float const *)&ob->get_tfm();
1196 for (
int i = 0; i < 12; i++) {
1197 t[i] = src[(i / 3) + 4 * (i % 3)];
1202 t[0] = t[4] = t[8] = 1.0f;
1207 MTLAccelerationStructureUserIDInstanceDescriptor *instances =
1208 (MTLAccelerationStructureUserIDInstanceDescriptor *)[instanceBuf contents];
1209 MTLAccelerationStructureUserIDInstanceDescriptor &desc = instances[currIndex];
1211 desc.accelerationStructureIndex = accel_struct_index;
1212 desc.userID = primitive_offset;
1214 desc.intersectionFunctionTableOffset = 0;
1215 desc.options = MTLAccelerationStructureInstanceOptionOpaque;
1217 float *t = (
float *)&desc.transformationMatrix;
1218 if (ob->get_geometry()->is_instanced()) {
1220 auto src = (
float const *)&ob->get_tfm();
1221 for (
int i = 0; i < 12; i++) {
1222 t[i] = src[(i / 3) + 4 * (i % 3)];
1227 t[0] = t[4] = t[8] = 1.0f;
1232 if (storage_mode == MTLResourceStorageModeManaged) {
1233 [instanceBuf didModifyRange:NSMakeRange(0, instanceBuf.length)];
1234 if (motion_transforms_buf) {
1235 [motion_transforms_buf didModifyRange:NSMakeRange(0, motion_transforms_buf.length)];
1236 assert(num_motion_transforms == motion_transform_index);
1240 MTLInstanceAccelerationStructureDescriptor *accelDesc =
1241 [MTLInstanceAccelerationStructureDescriptor
descriptor];
1242 accelDesc.instanceCount = num_instances;
1243 accelDesc.instanceDescriptorType = MTLAccelerationStructureInstanceDescriptorTypeUserID;
1244 accelDesc.instanceDescriptorBuffer = instanceBuf;
1245 accelDesc.instanceDescriptorBufferOffset = 0;
1246 accelDesc.instanceDescriptorStride = instance_size;
1247 accelDesc.instancedAccelerationStructures = all_blas;
1250 accelDesc.instanceDescriptorType = MTLAccelerationStructureInstanceDescriptorTypeMotion;
1251 accelDesc.motionTransformBuffer = motion_transforms_buf;
1252 accelDesc.motionTransformCount = num_motion_transforms;
1255 accelDesc.usage |= MTLAccelerationStructureUsageExtendedLimits;
1256 if (!use_fast_trace_bvh) {
1257 accelDesc.usage |= (MTLAccelerationStructureUsageRefit |
1258 MTLAccelerationStructureUsagePreferFastBuild);
1261 MTLAccelerationStructureSizes accelSizes = [mtl_device
1262 accelerationStructureSizesWithDescriptor:accelDesc];
1263 id<MTLAccelerationStructure> accel = [mtl_device
1264 newAccelerationStructureWithSize:accelSizes.accelerationStructureSize];
1265 id<MTLBuffer> scratchBuf = [mtl_device newBufferWithLength:accelSizes.buildScratchBufferSize
1266 options:MTLResourceStorageModePrivate];
1267 id<MTLCommandBuffer> accelCommands = [queue commandBuffer];
1268 id<MTLAccelerationStructureCommandEncoder> accelEnc =
1269 [accelCommands accelerationStructureCommandEncoder];
1271 [accelEnc refitAccelerationStructure:accel_struct
1274 scratchBuffer:scratchBuf
1275 scratchBufferOffset:0];
1278 [accelEnc buildAccelerationStructure:accel
1280 scratchBuffer:scratchBuf
1281 scratchBufferOffset:0];
1283 [accelEnc endEncoding];
1284 [accelCommands commit];
1285 [accelCommands waitUntilCompleted];
1287 if (motion_transforms_buf) {
1288 [motion_transforms_buf release];
1290 [instanceBuf release];
1291 [scratchBuf release];
1294 set_accel_struct(accel);
1296 unique_blas_array.clear();
1297 unique_blas_array.reserve(all_blas.count);
1298 [all_blas enumerateObjectsUsingBlock:^(id<MTLAccelerationStructure> blas, NSUInteger,
BOOL *) {
1299 unique_blas_array.push_back(blas);
1307bool BVHMetal::build(
Progress &progress,
1308 id<MTLDevice> mtl_device,
1309 id<MTLCommandQueue> queue,
1312 if (@available(macos 12.0, *)) {
1323 set_accel_struct(nil);
1329 return build_BLAS(progress, mtl_device, queue,
refit);
1332 return build_TLAS(progress, mtl_device, queue,
refit);
static btDbvtVolume bounds(btDbvtNode **leaves, int count)
void refit(btStridingMeshInterface *triangles, const btVector3 &aabbMin, const btVector3 &aabbMax)
SIMD_FORCE_INLINE btScalar length() const
Return the length of the vector.
Attribute * find(ustring name) const
CCL_NAMESPACE_BEGIN struct Options options
#define CCL_NAMESPACE_END
draw_view in_light_buf[] float
draw_view push_constant(Type::INT, "radiance_src") .push_constant(Type capture_info_buf storage_buf(1, Qualifier::READ, "ObjectBounds", "bounds_buf[]") .push_constant(Type draw_view int
@ ATTR_STD_MOTION_VERTEX_POSITION
ccl_device_inline float4 mask(const int4 mask, const float4 a)
T step(const T &edge, const T &value)
VecBase< float, 4 > float4
unsigned __int64 uint64_t
Point get_point(int i) const
size_t num_points() const
std::unique_lock< std::mutex > thread_scoped_lock
CCL_NAMESPACE_BEGIN typedef std::mutex thread_mutex