Blender V5.0
bvh.mm
Go to the documentation of this file.
1/* SPDX-FileCopyrightText: 2021-2022 Blender Foundation
2 *
3 * SPDX-License-Identifier: Apache-2.0 */
4
5#ifdef WITH_METAL
6
7# include <algorithm>
8# include <chrono>
9# include <thread>
10# include <vector>
11
12# include "scene/hair.h"
13# include "scene/mesh.h"
14# include "scene/object.h"
15# include "scene/pointcloud.h"
16
17# include "util/progress.h"
18
19# include "device/metal/bvh.h"
20# include "device/metal/util.h"
21
23
24# define BVH_status(...) \
25 { \
26 string str = string_printf(__VA_ARGS__); \
27 progress.set_substatus(str); \
28 metal_printf("%s", str.c_str()); \
29 }
30
31// # define BVH_THROTTLE_DIAGNOSTICS
32# ifdef BVH_THROTTLE_DIAGNOSTICS
33# define bvh_throttle_printf(...) printf("BVHMetalBuildThrottler::" __VA_ARGS__)
34# else
35# define bvh_throttle_printf(...)
36# endif
37
38/* This flag didn't exist until Xcode 26.0, so we ensure that it is defined for
39 * forward-compatibility.
40 */
41# ifndef MAC_OS_VERSION_26_0
42# define MTLAccelerationStructureUsagePreferFastIntersection \
43 MTLAccelerationStructureUsage(1 << 4)
44# endif
45
46/* Limit the number of concurrent BVH builds so that we don't approach unsafe GPU working set
47 * sizes. */
48struct BVHMetalBuildThrottler {
50 size_t wired_memory = 0;
51 size_t safe_wired_limit = 0;
52 int requests_in_flight = 0;
53
54 BVHMetalBuildThrottler()
55 {
56 /* The default device will always be the one that supports MetalRT if the machine supports it.
57 */
58 id<MTLDevice> mtlDevice = MTLCreateSystemDefaultDevice();
59
60 /* Set a conservative limit, but which will still only throttle in extreme cases. */
61 safe_wired_limit = [mtlDevice recommendedMaxWorkingSetSize] / 4;
62 bvh_throttle_printf("safe_wired_limit = %zu\n", safe_wired_limit);
63 }
64
65 /* Block until we're safely able to wire the requested resources. */
66 void acquire(const size_t bytes_to_be_wired)
67 {
68 bool throttled = false;
69 while (true) {
70 {
72
73 /* Always allow a BVH build to proceed if no other is in flight, otherwise
74 * only proceed if we're within safe limits. */
75 if (wired_memory == 0 || wired_memory + bytes_to_be_wired <= safe_wired_limit) {
76 wired_memory += bytes_to_be_wired;
77 requests_in_flight += 1;
78 bvh_throttle_printf("acquire -- success (requests_in_flight = %d, wired_memory = %zu)\n",
79 requests_in_flight,
80 wired_memory);
81 return;
82 }
83
84 if (!throttled) {
85 bvh_throttle_printf(
86 "acquire -- throttling (requests_in_flight = %d, wired_memory = %zu, "
87 "bytes_to_be_wired = %zu)\n",
88 requests_in_flight,
89 wired_memory,
90 bytes_to_be_wired);
91 }
92 throttled = true;
93 }
94
95 std::this_thread::sleep_for(std::chrono::milliseconds(10));
96 }
97 }
98
99 /* Notify of resources that have stopped being wired. */
100 void release(const size_t bytes_just_unwired)
101 {
103 wired_memory -= bytes_just_unwired;
104 requests_in_flight -= 1;
105 bvh_throttle_printf("release (requests_in_flight = %d, wired_memory = %zu)\n",
106 requests_in_flight,
107 wired_memory);
108 }
109
110 /* Wait for all outstanding work to finish. */
111 void wait_for_all()
112 {
113 while (true) {
114 {
116 if (wired_memory == 0) {
117 return;
118 }
119 }
120 std::this_thread::sleep_for(std::chrono::milliseconds(10));
121 }
122 }
123} g_bvh_build_throttler;
124
125/* macOS 15.2 and 15.3 has a bug in the dynamic BVH refitting which leads to missing geometry
126 * during render. The issue is fixed in the macOS 15.4, until then disable refitting even for
127 * the viewport.
128 * Note that dynamic BVH is still used on the scene level to speed up updates of instances and
129 * such. #132782. */
130static bool support_refit_blas()
131{
132 if (@available(macos 15.4, *)) {
133 return true;
134 }
135 if (@available(macos 15.2, *)) {
136 return false;
137 }
138 return true;
139}
140
141BVHMetal::BVHMetal(const BVHParams &params_,
142 const vector<Geometry *> &geometry_,
143 const vector<Object *> &objects_,
144 Device *device)
145 : BVH(params_, geometry_, objects_), device(device)
146{
147}
148
149BVHMetal::~BVHMetal()
150{
151 if (@available(macos 12.0, *)) {
152 set_accel_struct(nil);
153 if (null_BLAS) {
154 [null_BLAS release];
155 }
156 }
157}
158
159API_AVAILABLE(macos(11.0))
160void BVHMetal::set_accel_struct(id<MTLAccelerationStructure> new_accel_struct)
161{
162 if (@available(macos 12.0, *)) {
163 if (accel_struct) {
164 device->stats.mem_free(accel_struct.allocatedSize);
165 [accel_struct release];
166 accel_struct = nil;
167 }
168
169 if (new_accel_struct) {
170 accel_struct = new_accel_struct;
171 device->stats.mem_alloc(accel_struct.allocatedSize);
172 }
173 }
174}
175
176bool BVHMetal::build_BLAS_mesh(Progress &progress,
177 id<MTLDevice> mtl_device,
178 id<MTLCommandQueue> queue,
179 Geometry *const geom,
180 bool refit)
181{
182 if (@available(macos 12.0, *)) {
183 /* Build BLAS for triangle primitives */
184 Mesh *const mesh = static_cast<Mesh *const>(geom);
185 if (mesh->num_triangles() == 0) {
186 return false;
187 }
188
189 const bool use_fast_trace_bvh = (params.bvh_type == BVH_TYPE_STATIC) || !support_refit_blas();
190
191 const array<float3> &verts = mesh->get_verts();
192 const array<int> &tris = mesh->get_triangles();
193 const size_t num_verts = verts.size();
194 const size_t num_indices = tris.size();
195
196 size_t num_motion_steps = 1;
198 if (motion_blur && mesh->get_use_motion_blur() && motion_keys) {
199 num_motion_steps = mesh->get_motion_steps();
200 }
201
202 /* Upload the mesh data to the GPU */
203 id<MTLBuffer> posBuf = nil;
204 id<MTLBuffer> indexBuf = [mtl_device newBufferWithBytes:tris.data()
205 length:num_indices * sizeof(tris.data()[0])
206 options:MTLResourceStorageModeShared];
207
208 if (num_motion_steps == 1) {
209 posBuf = [mtl_device newBufferWithBytes:verts.data()
210 length:num_verts * sizeof(verts.data()[0])
211 options:MTLResourceStorageModeShared];
212 }
213 else {
214 posBuf = [mtl_device
215 newBufferWithLength:num_verts * num_motion_steps * sizeof(verts.data()[0])
216 options:MTLResourceStorageModeShared];
217 float3 *dest_data = (float3 *)[posBuf contents];
218 size_t center_step = (num_motion_steps - 1) / 2;
219 for (size_t step = 0; step < num_motion_steps; ++step) {
220 const float3 *verts = mesh->get_verts().data();
221
222 /* The center step for motion vertices is not stored in the attribute. */
223 if (step != center_step) {
224 verts = motion_keys->data_float3() + (step > center_step ? step - 1 : step) * num_verts;
225 }
226 std::copy_n(verts, num_verts, dest_data + num_verts * step);
227 }
228 }
229
230 /* Create an acceleration structure. */
231 MTLAccelerationStructureGeometryDescriptor *geomDesc;
232 if (num_motion_steps > 1) {
233 std::vector<MTLMotionKeyframeData *> vertex_ptrs;
234 vertex_ptrs.reserve(num_motion_steps);
235 for (size_t step = 0; step < num_motion_steps; ++step) {
236 MTLMotionKeyframeData *k = [MTLMotionKeyframeData data];
237 k.buffer = posBuf;
238 k.offset = num_verts * step * sizeof(float3);
239 vertex_ptrs.push_back(k);
240 }
241
242 MTLAccelerationStructureMotionTriangleGeometryDescriptor *geomDescMotion =
243 [MTLAccelerationStructureMotionTriangleGeometryDescriptor descriptor];
244 geomDescMotion.vertexBuffers = [NSArray arrayWithObjects:vertex_ptrs.data()
245 count:vertex_ptrs.size()];
246 geomDescMotion.vertexStride = sizeof(verts.data()[0]);
247 geomDescMotion.indexBuffer = indexBuf;
248 geomDescMotion.indexBufferOffset = 0;
249 geomDescMotion.indexType = MTLIndexTypeUInt32;
250 geomDescMotion.triangleCount = num_indices / 3;
251 geomDescMotion.intersectionFunctionTableOffset = 0;
252 geomDescMotion.opaque = true;
253
254 geomDesc = geomDescMotion;
255
256 BVH_status("Building motion mesh BLAS | %7d tris | %s | %7d motion keyframes",
257 (int)mesh->num_triangles(),
258 geom->name.c_str(),
259 (int)num_motion_steps);
260 }
261 else {
262 MTLAccelerationStructureTriangleGeometryDescriptor *geomDescNoMotion =
263 [MTLAccelerationStructureTriangleGeometryDescriptor descriptor];
264 geomDescNoMotion.vertexBuffer = posBuf;
265 geomDescNoMotion.vertexBufferOffset = 0;
266 geomDescNoMotion.vertexStride = sizeof(verts.data()[0]);
267 geomDescNoMotion.indexBuffer = indexBuf;
268 geomDescNoMotion.indexBufferOffset = 0;
269 geomDescNoMotion.indexType = MTLIndexTypeUInt32;
270 geomDescNoMotion.triangleCount = num_indices / 3;
271 geomDescNoMotion.intersectionFunctionTableOffset = 0;
272 geomDescNoMotion.opaque = true;
273
274 geomDesc = geomDescNoMotion;
275
276 BVH_status(
277 "Building mesh BLAS | %7d tris | %s", (int)mesh->num_triangles(), geom->name.c_str());
278 }
279
280 /* Force a single any-hit call, so shadow record-all behavior works correctly */
281 /* (Match optix behavior: unsigned int build_flags =
282 * OPTIX_GEOMETRY_FLAG_REQUIRE_SINGLE_ANYHIT_CALL;) */
283 geomDesc.allowDuplicateIntersectionFunctionInvocation = false;
284
285 MTLPrimitiveAccelerationStructureDescriptor *accelDesc =
286 [MTLPrimitiveAccelerationStructureDescriptor descriptor];
287 accelDesc.geometryDescriptors = @[ geomDesc ];
288 if (num_motion_steps > 1) {
289 accelDesc.motionStartTime = 0.0f;
290 accelDesc.motionEndTime = 1.0f;
291 accelDesc.motionStartBorderMode = MTLMotionBorderModeClamp;
292 accelDesc.motionEndBorderMode = MTLMotionBorderModeClamp;
293 accelDesc.motionKeyframeCount = num_motion_steps;
294 }
295 if (extended_limits) {
296 accelDesc.usage |= MTLAccelerationStructureUsageExtendedLimits;
297 }
298
299 if (!use_fast_trace_bvh) {
300 accelDesc.usage |= (MTLAccelerationStructureUsageRefit |
301 MTLAccelerationStructureUsagePreferFastBuild);
302 }
303 else if (@available(macos 26.0, *)) {
304 accelDesc.usage |= MTLAccelerationStructureUsagePreferFastIntersection;
305 }
306
307 MTLAccelerationStructureSizes accelSizes = [mtl_device
308 accelerationStructureSizesWithDescriptor:accelDesc];
309 id<MTLAccelerationStructure> accel_uncompressed = [mtl_device
310 newAccelerationStructureWithSize:accelSizes.accelerationStructureSize];
311 id<MTLBuffer> scratchBuf = [mtl_device newBufferWithLength:accelSizes.buildScratchBufferSize
312 options:MTLResourceStorageModePrivate];
313 id<MTLBuffer> sizeBuf = [mtl_device newBufferWithLength:8
314 options:MTLResourceStorageModeShared];
315 id<MTLCommandBuffer> accelCommands = [queue commandBuffer];
316 id<MTLAccelerationStructureCommandEncoder> accelEnc =
317 [accelCommands accelerationStructureCommandEncoder];
318 if (refit) {
319 [accelEnc refitAccelerationStructure:accel_struct
320 descriptor:accelDesc
321 destination:accel_uncompressed
322 scratchBuffer:scratchBuf
323 scratchBufferOffset:0];
324 }
325 else {
326 [accelEnc buildAccelerationStructure:accel_uncompressed
327 descriptor:accelDesc
328 scratchBuffer:scratchBuf
329 scratchBufferOffset:0];
330 }
331 if (use_fast_trace_bvh) {
332 [accelEnc writeCompactedAccelerationStructureSize:accel_uncompressed
333 toBuffer:sizeBuf
334 offset:0
335 sizeDataType:MTLDataTypeULong];
336 }
337 [accelEnc endEncoding];
338
339 /* Estimated size of resources that will be wired for the GPU accelerated build.
340 * Acceleration-struct size is doubled to account for possible compaction step. */
341 size_t wired_size = posBuf.allocatedSize + indexBuf.allocatedSize + scratchBuf.allocatedSize +
342 accel_uncompressed.allocatedSize * 2;
343
344 [accelCommands addCompletedHandler:^(id<MTLCommandBuffer> /*command_buffer*/) {
345 /* free temp resources */
346 [scratchBuf release];
347 [indexBuf release];
348 [posBuf release];
349
350 if (use_fast_trace_bvh) {
351 /* Compact the accel structure */
352 uint64_t compressed_size = *(uint64_t *)sizeBuf.contents;
353
354 dispatch_async(dispatch_get_global_queue(DISPATCH_QUEUE_PRIORITY_DEFAULT, 0), ^{
355 id<MTLCommandBuffer> accelCommands = [queue commandBuffer];
356 id<MTLAccelerationStructureCommandEncoder> accelEnc =
357 [accelCommands accelerationStructureCommandEncoder];
358 id<MTLAccelerationStructure> accel = [mtl_device
359 newAccelerationStructureWithSize:compressed_size];
360 [accelEnc copyAndCompactAccelerationStructure:accel_uncompressed
361 toAccelerationStructure:accel];
362 [accelEnc endEncoding];
363 [accelCommands addCompletedHandler:^(id<MTLCommandBuffer> /*command_buffer*/) {
364 set_accel_struct(accel);
365 [accel_uncompressed release];
366
367 /* Signal that we've finished doing GPU acceleration struct build. */
368 g_bvh_build_throttler.release(wired_size);
369 }];
370 [accelCommands commit];
371 });
372 }
373 else {
374 /* set our acceleration structure to the uncompressed structure */
375 set_accel_struct(accel_uncompressed);
376
377 /* Signal that we've finished doing GPU acceleration struct build. */
378 g_bvh_build_throttler.release(wired_size);
379 }
380
381 [sizeBuf release];
382 }];
383
384 /* Wait until it's safe to proceed with GPU acceleration struct build. */
385 g_bvh_build_throttler.acquire(wired_size);
386 [accelCommands commit];
387
388 return true;
389 }
390 return false;
391}
392
393bool BVHMetal::build_BLAS_hair(Progress &progress,
394 id<MTLDevice> mtl_device,
395 id<MTLCommandQueue> queue,
396 Geometry *const geom,
397 bool refit)
398{
399# if defined(MAC_OS_VERSION_14_0)
400 if (@available(macos 14.0, *)) {
401 /* Build BLAS for hair curves */
402 Hair *hair = static_cast<Hair *>(geom);
403 if (hair->num_curves() == 0) {
404 return false;
405 }
406
407 const bool use_fast_trace_bvh = (params.bvh_type == BVH_TYPE_STATIC) || !support_refit_blas();
408
409 size_t num_motion_steps = 1;
411 if (motion_blur && hair->get_use_motion_blur() && motion_keys) {
412 num_motion_steps = hair->get_motion_steps();
413 }
414
415 id<MTLBuffer> cpBuffer = nil;
416 id<MTLBuffer> radiusBuffer = nil;
417 id<MTLBuffer> idxBuffer = nil;
418
419 MTLAccelerationStructureGeometryDescriptor *geomDesc;
420 if (num_motion_steps > 1) {
421 MTLAccelerationStructureMotionCurveGeometryDescriptor *geomDescCrv =
422 [MTLAccelerationStructureMotionCurveGeometryDescriptor descriptor];
423
424 uint64_t numKeys = hair->num_keys();
425 uint64_t numCurves = hair->num_curves();
426 const array<float> &radiuses = hair->get_curve_radius();
427
428 /* Gather the curve geometry. */
429 std::vector<float3> cpData;
430 std::vector<int> idxData;
431 std::vector<float> radiusData;
432 cpData.reserve(numKeys);
433 radiusData.reserve(numKeys);
434
435 std::vector<int> step_offsets;
436 for (size_t step = 0; step < num_motion_steps; ++step) {
437
438 /* The center step for motion vertices is not stored in the attribute. */
439 const float3 *keys = hair->get_curve_keys().data();
440 size_t center_step = (num_motion_steps - 1) / 2;
441 if (step != center_step) {
442 size_t attr_offset = (step > center_step) ? step - 1 : step;
443 /* Technically this is a float4 array, but sizeof(float3) == sizeof(float4). */
444 keys = motion_keys->data_float3() + attr_offset * numKeys;
445 }
446
447 step_offsets.push_back(cpData.size());
448
449 for (int c = 0; c < numCurves; ++c) {
450 const Hair::Curve curve = hair->get_curve(c);
451 int segCount = curve.num_segments();
452 int firstKey = curve.first_key;
453 uint64_t idxBase = cpData.size();
454 if (hair->curve_shape != CURVE_THICK_LINEAR) {
455 cpData.push_back(keys[firstKey]);
456 radiusData.push_back(radiuses[firstKey]);
457 }
458 for (int s = 0; s < segCount; ++s) {
459 if (step == 0) {
460 idxData.push_back(idxBase + s);
461 }
462 cpData.push_back(keys[firstKey + s]);
463 radiusData.push_back(radiuses[firstKey + s]);
464 }
465 cpData.push_back(keys[firstKey + curve.num_keys - 1]);
466 radiusData.push_back(radiuses[firstKey + curve.num_keys - 1]);
467 if (hair->curve_shape != CURVE_THICK_LINEAR) {
468 cpData.push_back(keys[firstKey + curve.num_keys - 1]);
469 radiusData.push_back(radiuses[firstKey + curve.num_keys - 1]);
470 }
471 }
472 }
473
474 /* Allocate and populate MTLBuffers for geometry. */
475 idxBuffer = [mtl_device newBufferWithBytes:idxData.data()
476 length:idxData.size() * sizeof(int)
477 options:MTLResourceStorageModeShared];
478
479 cpBuffer = [mtl_device newBufferWithBytes:cpData.data()
480 length:cpData.size() * sizeof(float3)
481 options:MTLResourceStorageModeShared];
482
483 radiusBuffer = [mtl_device newBufferWithBytes:radiusData.data()
484 length:radiusData.size() * sizeof(float)
485 options:MTLResourceStorageModeShared];
486
487 std::vector<MTLMotionKeyframeData *> cp_ptrs;
488 std::vector<MTLMotionKeyframeData *> radius_ptrs;
489 cp_ptrs.reserve(num_motion_steps);
490 radius_ptrs.reserve(num_motion_steps);
491
492 for (size_t step = 0; step < num_motion_steps; ++step) {
493 MTLMotionKeyframeData *k = [MTLMotionKeyframeData data];
494 k.buffer = cpBuffer;
495 k.offset = step_offsets[step] * sizeof(float3);
496 cp_ptrs.push_back(k);
497
498 k = [MTLMotionKeyframeData data];
499 k.buffer = radiusBuffer;
500 k.offset = step_offsets[step] * sizeof(float);
501 radius_ptrs.push_back(k);
502 }
503
504 geomDescCrv.controlPointBuffers = [NSArray arrayWithObjects:cp_ptrs.data()
505 count:cp_ptrs.size()];
506 geomDescCrv.radiusBuffers = [NSArray arrayWithObjects:radius_ptrs.data()
507 count:radius_ptrs.size()];
508
509 /* controlPointCount should specify the *per-step* control point count. */
510 geomDescCrv.controlPointCount = cpData.size() / num_motion_steps;
511 geomDescCrv.controlPointStride = sizeof(float3);
512 geomDescCrv.controlPointFormat = MTLAttributeFormatFloat3;
513 geomDescCrv.radiusStride = sizeof(float);
514 geomDescCrv.radiusFormat = MTLAttributeFormatFloat;
515 geomDescCrv.segmentCount = idxData.size();
516 geomDescCrv.segmentControlPointCount = (hair->curve_shape == CURVE_THICK_LINEAR) ? 2 : 4;
517 geomDescCrv.curveType = (hair->curve_shape == CURVE_RIBBON) ? MTLCurveTypeFlat :
518 MTLCurveTypeRound;
519 if (hair->curve_shape == CURVE_THICK_LINEAR) {
520 geomDescCrv.curveBasis = MTLCurveBasisLinear;
521 geomDescCrv.curveEndCaps = MTLCurveEndCapsSphere;
522 }
523 else {
524 geomDescCrv.curveBasis = MTLCurveBasisCatmullRom;
525 geomDescCrv.curveEndCaps = MTLCurveEndCapsDisk;
526 }
527 geomDescCrv.indexType = MTLIndexTypeUInt32;
528 geomDescCrv.indexBuffer = idxBuffer;
529 geomDescCrv.intersectionFunctionTableOffset = 1;
530
531 /* Force a single any-hit call, so shadow record-all behavior works correctly */
532 /* (Match optix behavior: unsigned int build_flags =
533 * OPTIX_GEOMETRY_FLAG_REQUIRE_SINGLE_ANYHIT_CALL;) */
534 geomDescCrv.allowDuplicateIntersectionFunctionInvocation = false;
535 geomDescCrv.opaque = true;
536 geomDesc = geomDescCrv;
537 }
538 else {
539 MTLAccelerationStructureCurveGeometryDescriptor *geomDescCrv =
540 [MTLAccelerationStructureCurveGeometryDescriptor descriptor];
541
542 uint64_t numKeys = hair->num_keys();
543 uint64_t numCurves = hair->num_curves();
544 const array<float> &radiuses = hair->get_curve_radius();
545
546 /* Gather the curve geometry. */
547 std::vector<float3> cpData;
548 std::vector<int> idxData;
549 std::vector<float> radiusData;
550 cpData.reserve(numKeys);
551 radiusData.reserve(numKeys);
552 auto keys = hair->get_curve_keys();
553 for (int c = 0; c < numCurves; ++c) {
554 const Hair::Curve curve = hair->get_curve(c);
555 int segCount = curve.num_segments();
556 int firstKey = curve.first_key;
557 uint64_t idxBase = cpData.size();
558 if (hair->curve_shape != CURVE_THICK_LINEAR) {
559 cpData.push_back(keys[firstKey]);
560 radiusData.push_back(radiuses[firstKey]);
561 }
562 for (int s = 0; s < segCount; ++s) {
563 idxData.push_back(idxBase + s);
564 cpData.push_back(keys[firstKey + s]);
565 radiusData.push_back(radiuses[firstKey + s]);
566 }
567 cpData.push_back(keys[firstKey + curve.num_keys - 1]);
568 radiusData.push_back(radiuses[firstKey + curve.num_keys - 1]);
569 if (hair->curve_shape != CURVE_THICK_LINEAR) {
570 cpData.push_back(keys[firstKey + curve.num_keys - 1]);
571 radiusData.push_back(radiuses[firstKey + curve.num_keys - 1]);
572 }
573 }
574
575 /* Allocate and populate MTLBuffers for geometry. */
576 idxBuffer = [mtl_device newBufferWithBytes:idxData.data()
577 length:idxData.size() * sizeof(int)
578 options:MTLResourceStorageModeShared];
579
580 cpBuffer = [mtl_device newBufferWithBytes:cpData.data()
581 length:cpData.size() * sizeof(float3)
582 options:MTLResourceStorageModeShared];
583
584 radiusBuffer = [mtl_device newBufferWithBytes:radiusData.data()
585 length:radiusData.size() * sizeof(float)
586 options:MTLResourceStorageModeShared];
587
588 geomDescCrv.controlPointBuffer = cpBuffer;
589 geomDescCrv.radiusBuffer = radiusBuffer;
590 geomDescCrv.controlPointCount = cpData.size();
591 geomDescCrv.controlPointStride = sizeof(float3);
592 geomDescCrv.controlPointFormat = MTLAttributeFormatFloat3;
593 geomDescCrv.controlPointBufferOffset = 0;
594 geomDescCrv.segmentCount = idxData.size();
595 geomDescCrv.segmentControlPointCount = (hair->curve_shape == CURVE_THICK_LINEAR) ? 2 : 4;
596 geomDescCrv.curveType = (hair->curve_shape == CURVE_RIBBON) ? MTLCurveTypeFlat :
597 MTLCurveTypeRound;
598 if (hair->curve_shape == CURVE_THICK_LINEAR) {
599 geomDescCrv.curveBasis = MTLCurveBasisLinear;
600 geomDescCrv.curveEndCaps = MTLCurveEndCapsSphere;
601 }
602 else {
603 geomDescCrv.curveBasis = MTLCurveBasisCatmullRom;
604 geomDescCrv.curveEndCaps = MTLCurveEndCapsDisk;
605 }
606 geomDescCrv.indexType = MTLIndexTypeUInt32;
607 geomDescCrv.indexBuffer = idxBuffer;
608 geomDescCrv.intersectionFunctionTableOffset = 1;
609
610 /* Force a single any-hit call, so shadow record-all behavior works correctly */
611 /* (Match optix behavior: unsigned int build_flags =
612 * OPTIX_GEOMETRY_FLAG_REQUIRE_SINGLE_ANYHIT_CALL;) */
613 geomDescCrv.allowDuplicateIntersectionFunctionInvocation = false;
614 geomDescCrv.opaque = true;
615 geomDesc = geomDescCrv;
616 }
617
618 MTLPrimitiveAccelerationStructureDescriptor *accelDesc =
619 [MTLPrimitiveAccelerationStructureDescriptor descriptor];
620 accelDesc.geometryDescriptors = @[ geomDesc ];
621
622 if (num_motion_steps > 1) {
623 accelDesc.motionStartTime = 0.0f;
624 accelDesc.motionEndTime = 1.0f;
625 accelDesc.motionStartBorderMode = MTLMotionBorderModeVanish;
626 accelDesc.motionEndBorderMode = MTLMotionBorderModeVanish;
627 accelDesc.motionKeyframeCount = num_motion_steps;
628
629 BVH_status("Building motion hair BLAS | %7d curves | %s | %7d motion keyframes",
630 (int)hair->num_curves(),
631 geom->name.c_str(),
632 (int)num_motion_steps);
633 }
634 else {
635 BVH_status(
636 "Building hair BLAS | %7d curves | %s", (int)hair->num_curves(), geom->name.c_str());
637 }
638
639 if (extended_limits) {
640 accelDesc.usage |= MTLAccelerationStructureUsageExtendedLimits;
641 }
642
643 if (!use_fast_trace_bvh) {
644 accelDesc.usage |= (MTLAccelerationStructureUsageRefit |
645 MTLAccelerationStructureUsagePreferFastBuild);
646 }
647 else if (@available(macos 26.0, *)) {
648 accelDesc.usage |= MTLAccelerationStructureUsagePreferFastIntersection;
649 }
650
651 MTLAccelerationStructureSizes accelSizes = [mtl_device
652 accelerationStructureSizesWithDescriptor:accelDesc];
653 id<MTLAccelerationStructure> accel_uncompressed = [mtl_device
654 newAccelerationStructureWithSize:accelSizes.accelerationStructureSize];
655 id<MTLBuffer> scratchBuf = [mtl_device newBufferWithLength:accelSizes.buildScratchBufferSize
656 options:MTLResourceStorageModePrivate];
657 id<MTLBuffer> sizeBuf = [mtl_device newBufferWithLength:8
658 options:MTLResourceStorageModeShared];
659 id<MTLCommandBuffer> accelCommands = [queue commandBuffer];
660 id<MTLAccelerationStructureCommandEncoder> accelEnc =
661 [accelCommands accelerationStructureCommandEncoder];
662 if (refit) {
663 [accelEnc refitAccelerationStructure:accel_struct
664 descriptor:accelDesc
665 destination:accel_uncompressed
666 scratchBuffer:scratchBuf
667 scratchBufferOffset:0];
668 }
669 else {
670 [accelEnc buildAccelerationStructure:accel_uncompressed
671 descriptor:accelDesc
672 scratchBuffer:scratchBuf
673 scratchBufferOffset:0];
674 }
675 if (use_fast_trace_bvh) {
676 [accelEnc writeCompactedAccelerationStructureSize:accel_uncompressed
677 toBuffer:sizeBuf
678 offset:0
679 sizeDataType:MTLDataTypeULong];
680 }
681 [accelEnc endEncoding];
682
683 /* Estimated size of resources that will be wired for the GPU accelerated build.
684 * Acceleration-struct size is doubled to account for possible compaction step. */
685 size_t wired_size = cpBuffer.allocatedSize + radiusBuffer.allocatedSize +
686 idxBuffer.allocatedSize + scratchBuf.allocatedSize +
687 accel_uncompressed.allocatedSize * 2;
688
689 [accelCommands addCompletedHandler:^(id<MTLCommandBuffer> /*command_buffer*/) {
690 /* free temp resources */
691 [scratchBuf release];
692 [cpBuffer release];
693 [radiusBuffer release];
694 [idxBuffer release];
695
696 if (use_fast_trace_bvh) {
697 uint64_t compressed_size = *(uint64_t *)sizeBuf.contents;
698
699 dispatch_async(dispatch_get_global_queue(DISPATCH_QUEUE_PRIORITY_DEFAULT, 0), ^{
700 id<MTLCommandBuffer> accelCommands = [queue commandBuffer];
701 id<MTLAccelerationStructureCommandEncoder> accelEnc =
702 [accelCommands accelerationStructureCommandEncoder];
703 id<MTLAccelerationStructure> accel = [mtl_device
704 newAccelerationStructureWithSize:compressed_size];
705 [accelEnc copyAndCompactAccelerationStructure:accel_uncompressed
706 toAccelerationStructure:accel];
707 [accelEnc endEncoding];
708 [accelCommands addCompletedHandler:^(id<MTLCommandBuffer> /*command_buffer*/) {
709 set_accel_struct(accel);
710 [accel_uncompressed release];
711
712 /* Signal that we've finished doing GPU acceleration struct build. */
713 g_bvh_build_throttler.release(wired_size);
714 }];
715 [accelCommands commit];
716 });
717 }
718 else {
719 /* set our acceleration structure to the uncompressed structure */
720 set_accel_struct(accel_uncompressed);
721
722 /* Signal that we've finished doing GPU acceleration struct build. */
723 g_bvh_build_throttler.release(wired_size);
724 }
725
726 [sizeBuf release];
727 }];
728
729 /* Wait until it's safe to proceed with GPU acceleration struct build. */
730 g_bvh_build_throttler.acquire(wired_size);
731 [accelCommands commit];
732
733 return true;
734 }
735# else /* MAC_OS_VERSION_14_0 */
736 (void)progress;
737 (void)mtl_device;
738 (void)queue;
739 (void)geom;
740 (void)(refit);
741# endif /* MAC_OS_VERSION_14_0 */
742 return false;
743}
744
745bool BVHMetal::build_BLAS_pointcloud(Progress &progress,
746 id<MTLDevice> mtl_device,
747 id<MTLCommandQueue> queue,
748 Geometry *const geom,
749 bool refit)
750{
751 if (@available(macos 12.0, *)) {
752 /* Build BLAS for point cloud */
753 PointCloud *pointcloud = static_cast<PointCloud *>(geom);
754 if (pointcloud->num_points() == 0) {
755 return false;
756 }
757
758 const size_t num_points = pointcloud->get_points().size();
759 const float3 *points = pointcloud->get_points().data();
760 const float *radius = pointcloud->get_radius().data();
761
762 const bool use_fast_trace_bvh = (params.bvh_type == BVH_TYPE_STATIC) || !support_refit_blas();
763
764 size_t num_motion_steps = 1;
765 Attribute *motion_keys = pointcloud->attributes.find(ATTR_STD_MOTION_VERTEX_POSITION);
766 if (motion_blur && pointcloud->get_use_motion_blur() && motion_keys) {
767 num_motion_steps = pointcloud->get_motion_steps();
768 }
769
770 const size_t num_aabbs = num_motion_steps * num_points;
771
772 /* Allocate a GPU buffer for the AABB data and populate it */
773 id<MTLBuffer> aabbBuf = [mtl_device
774 newBufferWithLength:num_aabbs * sizeof(MTLAxisAlignedBoundingBox)
775 options:MTLResourceStorageModeShared];
776 MTLAxisAlignedBoundingBox *aabb_data = (MTLAxisAlignedBoundingBox *)[aabbBuf contents];
777
778 /* Get AABBs for each motion step */
779 size_t center_step = (num_motion_steps - 1) / 2;
780 for (size_t step = 0; step < num_motion_steps; ++step) {
781 if (step == center_step) {
782 /* The center step for motion vertices is not stored in the attribute */
783 for (size_t j = 0; j < num_points; ++j) {
784 const PointCloud::Point point = pointcloud->get_point(j);
786 point.bounds_grow(points, radius, bounds);
787
788 const size_t index = step * num_points + j;
789 aabb_data[index].min = (MTLPackedFloat3 &)bounds.min;
790 aabb_data[index].max = (MTLPackedFloat3 &)bounds.max;
791 }
792 }
793 else {
794 size_t attr_offset = (step > center_step) ? step - 1 : step;
795 float4 *motion_points = motion_keys->data_float4() + attr_offset * num_points;
796
797 for (size_t j = 0; j < num_points; ++j) {
798 const PointCloud::Point point = pointcloud->get_point(j);
800 point.bounds_grow(motion_points[j], bounds);
801
802 const size_t index = step * num_points + j;
803 aabb_data[index].min = (MTLPackedFloat3 &)bounds.min;
804 aabb_data[index].max = (MTLPackedFloat3 &)bounds.max;
805 }
806 }
807 }
808
809 MTLAccelerationStructureGeometryDescriptor *geomDesc;
810 if (num_motion_steps > 1) {
811 std::vector<MTLMotionKeyframeData *> aabb_ptrs;
812 aabb_ptrs.reserve(num_motion_steps);
813 for (size_t step = 0; step < num_motion_steps; ++step) {
814 MTLMotionKeyframeData *k = [MTLMotionKeyframeData data];
815 k.buffer = aabbBuf;
816 k.offset = step * num_points * sizeof(MTLAxisAlignedBoundingBox);
817 aabb_ptrs.push_back(k);
818 }
819
820 MTLAccelerationStructureMotionBoundingBoxGeometryDescriptor *geomDescMotion =
821 [MTLAccelerationStructureMotionBoundingBoxGeometryDescriptor descriptor];
822 geomDescMotion.boundingBoxBuffers = [NSArray arrayWithObjects:aabb_ptrs.data()
823 count:aabb_ptrs.size()];
824 geomDescMotion.boundingBoxCount = num_points;
825 geomDescMotion.boundingBoxStride = sizeof(aabb_data[0]);
826 geomDescMotion.intersectionFunctionTableOffset = 2;
827
828 /* Force a single any-hit call, so shadow record-all behavior works correctly */
829 /* (Match optix behavior: unsigned int build_flags =
830 * OPTIX_GEOMETRY_FLAG_REQUIRE_SINGLE_ANYHIT_CALL;) */
831 geomDescMotion.allowDuplicateIntersectionFunctionInvocation = false;
832 geomDescMotion.opaque = true;
833 geomDesc = geomDescMotion;
834 }
835 else {
836 MTLAccelerationStructureBoundingBoxGeometryDescriptor *geomDescNoMotion =
837 [MTLAccelerationStructureBoundingBoxGeometryDescriptor descriptor];
838 geomDescNoMotion.boundingBoxBuffer = aabbBuf;
839 geomDescNoMotion.boundingBoxBufferOffset = 0;
840 geomDescNoMotion.boundingBoxCount = int(num_aabbs);
841 geomDescNoMotion.boundingBoxStride = sizeof(aabb_data[0]);
842 geomDescNoMotion.intersectionFunctionTableOffset = 2;
843
844 /* Force a single any-hit call, so shadow record-all behavior works correctly */
845 /* (Match optix behavior: unsigned int build_flags =
846 * OPTIX_GEOMETRY_FLAG_REQUIRE_SINGLE_ANYHIT_CALL;) */
847 geomDescNoMotion.allowDuplicateIntersectionFunctionInvocation = false;
848 geomDescNoMotion.opaque = true;
849 geomDesc = geomDescNoMotion;
850 }
851
852 MTLPrimitiveAccelerationStructureDescriptor *accelDesc =
853 [MTLPrimitiveAccelerationStructureDescriptor descriptor];
854 accelDesc.geometryDescriptors = @[ geomDesc ];
855
856 if (num_motion_steps > 1) {
857 accelDesc.motionStartTime = 0.0f;
858 accelDesc.motionEndTime = 1.0f;
859 // accelDesc.motionStartBorderMode = MTLMotionBorderModeVanish;
860 // accelDesc.motionEndBorderMode = MTLMotionBorderModeVanish;
861 accelDesc.motionKeyframeCount = num_motion_steps;
862
863 BVH_status("Building motion pointcloud BLAS | %7d points | %s | %7d motion keyframes",
864 (int)pointcloud->num_points(),
865 geom->name.c_str(),
866 (int)num_motion_steps);
867 }
868 else {
869 BVH_status("Building pointcloud BLAS | %7d points | %s",
870 (int)pointcloud->num_points(),
871 geom->name.c_str());
872 }
873 if (extended_limits) {
874 accelDesc.usage |= MTLAccelerationStructureUsageExtendedLimits;
875 }
876
877 if (!use_fast_trace_bvh) {
878 accelDesc.usage |= (MTLAccelerationStructureUsageRefit |
879 MTLAccelerationStructureUsagePreferFastBuild);
880 }
881 else if (@available(macos 26.0, *)) {
882 accelDesc.usage |= MTLAccelerationStructureUsagePreferFastIntersection;
883 }
884
885 MTLAccelerationStructureSizes accelSizes = [mtl_device
886 accelerationStructureSizesWithDescriptor:accelDesc];
887 id<MTLAccelerationStructure> accel_uncompressed = [mtl_device
888 newAccelerationStructureWithSize:accelSizes.accelerationStructureSize];
889 id<MTLBuffer> scratchBuf = [mtl_device newBufferWithLength:accelSizes.buildScratchBufferSize
890 options:MTLResourceStorageModePrivate];
891 id<MTLBuffer> sizeBuf = [mtl_device newBufferWithLength:8
892 options:MTLResourceStorageModeShared];
893 id<MTLCommandBuffer> accelCommands = [queue commandBuffer];
894 id<MTLAccelerationStructureCommandEncoder> accelEnc =
895 [accelCommands accelerationStructureCommandEncoder];
896 if (refit) {
897 [accelEnc refitAccelerationStructure:accel_struct
898 descriptor:accelDesc
899 destination:accel_uncompressed
900 scratchBuffer:scratchBuf
901 scratchBufferOffset:0];
902 }
903 else {
904 [accelEnc buildAccelerationStructure:accel_uncompressed
905 descriptor:accelDesc
906 scratchBuffer:scratchBuf
907 scratchBufferOffset:0];
908 }
909 if (use_fast_trace_bvh) {
910 [accelEnc writeCompactedAccelerationStructureSize:accel_uncompressed
911 toBuffer:sizeBuf
912 offset:0
913 sizeDataType:MTLDataTypeULong];
914 }
915 [accelEnc endEncoding];
916
917 /* Estimated size of resources that will be wired for the GPU accelerated build.
918 * Acceleration-struct size is doubled to account for possible compaction step. */
919 size_t wired_size = aabbBuf.allocatedSize + scratchBuf.allocatedSize +
920 accel_uncompressed.allocatedSize * 2;
921
922 [accelCommands addCompletedHandler:^(id<MTLCommandBuffer> /*command_buffer*/) {
923 /* free temp resources */
924 [scratchBuf release];
925 [aabbBuf release];
926
927 if (use_fast_trace_bvh) {
928 /* Compact the accel structure */
929 uint64_t compressed_size = *(uint64_t *)sizeBuf.contents;
930
931 dispatch_async(dispatch_get_global_queue(DISPATCH_QUEUE_PRIORITY_DEFAULT, 0), ^{
932 id<MTLCommandBuffer> accelCommands = [queue commandBuffer];
933 id<MTLAccelerationStructureCommandEncoder> accelEnc =
934 [accelCommands accelerationStructureCommandEncoder];
935 id<MTLAccelerationStructure> accel = [mtl_device
936 newAccelerationStructureWithSize:compressed_size];
937 [accelEnc copyAndCompactAccelerationStructure:accel_uncompressed
938 toAccelerationStructure:accel];
939 [accelEnc endEncoding];
940 [accelCommands addCompletedHandler:^(id<MTLCommandBuffer> /*command_buffer*/) {
941 set_accel_struct(accel);
942 [accel_uncompressed release];
943
944 /* Signal that we've finished doing GPU acceleration struct build. */
945 g_bvh_build_throttler.release(wired_size);
946 }];
947 [accelCommands commit];
948 });
949 }
950 else {
951 /* set our acceleration structure to the uncompressed structure */
952 set_accel_struct(accel_uncompressed);
953
954 /* Signal that we've finished doing GPU acceleration struct build. */
955 g_bvh_build_throttler.release(wired_size);
956 }
957
958 [sizeBuf release];
959 }];
960
961 /* Wait until it's safe to proceed with GPU acceleration struct build. */
962 g_bvh_build_throttler.acquire(wired_size);
963 [accelCommands commit];
964 return true;
965 }
966 return false;
967}
968
969bool BVHMetal::build_BLAS(Progress &progress,
970 id<MTLDevice> mtl_device,
971 id<MTLCommandQueue> queue,
972 bool refit)
973{
974 assert(objects.size() == 1 && geometry.size() == 1);
975
976 /* Build bottom level acceleration structures (BLAS) */
977 Geometry *const geom = geometry[0];
978 switch (geom->geometry_type) {
979 case Geometry::VOLUME:
980 case Geometry::MESH:
981 return build_BLAS_mesh(progress, mtl_device, queue, geom, refit);
982 case Geometry::HAIR:
983 return build_BLAS_hair(progress, mtl_device, queue, geom, refit);
985 return build_BLAS_pointcloud(progress, mtl_device, queue, geom, refit);
986 default:
987 return false;
988 }
989 return false;
990}
991
992# if defined(MAC_OS_VERSION_15_0)
993
994/* Return MTLComponentTransform from a DecomposedTransform. */
995static MTLComponentTransform decomposed_to_component_transform(const DecomposedTransform &src)
996{
997 MTLComponentTransform tfm;
998 tfm.scale = MTLPackedFloat3Make(src.y.w, src.z.w, src.w.w);
999 tfm.shear = MTLPackedFloat3Make(src.z.x, src.z.y, src.w.x);
1000 tfm.pivot = MTLPackedFloat3Make(0.0f, 0.0f, 0.0f);
1001 tfm.rotation = MTLPackedFloatQuaternionMake(src.x.x, src.x.y, src.x.z, src.x.w);
1002 tfm.translation = MTLPackedFloat3Make(src.y.x, src.y.y, src.y.z);
1003 return tfm;
1004}
1005
1006/* Return unit MTLComponentTransform. */
1007static MTLComponentTransform component_transform_make_unit()
1008{
1009 MTLComponentTransform tfm;
1010 tfm.scale = MTLPackedFloat3Make(1.0f, 1.0f, 1.0f);
1011 tfm.shear = MTLPackedFloat3Make(0.0f, 0.0f, 0.0f);
1012 tfm.pivot = MTLPackedFloat3Make(0.0f, 0.0f, 0.0f);
1013 tfm.rotation = MTLPackedFloatQuaternionMake(0.0f, 0.0f, 0.0f, 1.0f);
1014 tfm.translation = MTLPackedFloat3Make(0.0f, 0.0f, 0.0f);
1015 return tfm;
1016}
1017
1018# endif
1019
1020bool BVHMetal::build_TLAS(Progress &progress,
1021 id<MTLDevice> mtl_device,
1022 id<MTLCommandQueue> queue,
1023 bool refit)
1024{
1025 /* Wait for all BLAS builds to finish. */
1026 g_bvh_build_throttler.wait_for_all();
1027
1028 if (@available(macos 12.0, *)) {
1029 /* Defined inside available check, for return type to be available. */
1030 auto make_null_BLAS = [this](id<MTLDevice> mtl_device,
1031 id<MTLCommandQueue> queue) -> id<MTLAccelerationStructure> {
1032 id<MTLBuffer> nullBuf = [mtl_device newBufferWithLength:sizeof(float3)
1033 options:MTLResourceStorageModeShared];
1034
1035 /* Create an acceleration structure. */
1036 MTLAccelerationStructureTriangleGeometryDescriptor *geomDesc =
1037 [MTLAccelerationStructureTriangleGeometryDescriptor descriptor];
1038 geomDesc.vertexBuffer = nullBuf;
1039 geomDesc.vertexBufferOffset = 0;
1040 geomDesc.vertexStride = sizeof(float3);
1041 geomDesc.indexBuffer = nullBuf;
1042 geomDesc.indexBufferOffset = 0;
1043 geomDesc.indexType = MTLIndexTypeUInt32;
1044 geomDesc.triangleCount = 0;
1045 geomDesc.intersectionFunctionTableOffset = 0;
1046 geomDesc.opaque = true;
1047 geomDesc.allowDuplicateIntersectionFunctionInvocation = false;
1048
1049 MTLPrimitiveAccelerationStructureDescriptor *accelDesc =
1050 [MTLPrimitiveAccelerationStructureDescriptor descriptor];
1051 accelDesc.geometryDescriptors = @[ geomDesc ];
1052 if (extended_limits) {
1053 accelDesc.usage |= MTLAccelerationStructureUsageExtendedLimits;
1054 }
1055
1056 MTLAccelerationStructureSizes accelSizes = [mtl_device
1057 accelerationStructureSizesWithDescriptor:accelDesc];
1058 id<MTLAccelerationStructure> accel_struct = [mtl_device
1059 newAccelerationStructureWithSize:accelSizes.accelerationStructureSize];
1060 id<MTLBuffer> scratchBuf = [mtl_device newBufferWithLength:accelSizes.buildScratchBufferSize
1061 options:MTLResourceStorageModePrivate];
1062 id<MTLBuffer> sizeBuf = [mtl_device newBufferWithLength:8
1063 options:MTLResourceStorageModeShared];
1064 id<MTLCommandBuffer> accelCommands = [queue commandBuffer];
1065 id<MTLAccelerationStructureCommandEncoder> accelEnc =
1066 [accelCommands accelerationStructureCommandEncoder];
1067 [accelEnc buildAccelerationStructure:accel_struct
1068 descriptor:accelDesc
1069 scratchBuffer:scratchBuf
1070 scratchBufferOffset:0];
1071 [accelEnc endEncoding];
1072 [accelCommands commit];
1073 [accelCommands waitUntilCompleted];
1074
1075 /* free temp resources */
1076 [scratchBuf release];
1077 [nullBuf release];
1078 [sizeBuf release];
1079
1080 return accel_struct;
1081 };
1082
1083 uint32_t num_instances = 0;
1084 uint32_t num_motion_transforms = 0;
1085 uint32_t num_motion_instances = 0;
1086 for (Object *ob : objects) {
1087 num_instances++;
1088
1089 if (ob->use_motion()) {
1090 num_motion_transforms += max((size_t)1, ob->get_motion().size());
1091 num_motion_instances++;
1092 }
1093 else {
1094 num_motion_transforms++;
1095 }
1096 }
1097
1098 const bool use_instance_motion = motion_blur && num_motion_instances;
1099 const bool use_fast_trace_bvh = (params.bvh_type == BVH_TYPE_STATIC) || !support_refit_blas();
1100
1101 NSMutableArray *all_blas = [NSMutableArray array];
1102 unordered_map<const BVHMetal *, int> instance_mapping;
1103
1104 /* Lambda function to build/retrieve the BLAS index mapping */
1105 auto get_blas_index = [&](const BVHMetal *blas) {
1106 auto it = instance_mapping.find(blas);
1107 if (it != instance_mapping.end()) {
1108 return it->second;
1109 }
1110 int blas_index = (int)[all_blas count];
1111 instance_mapping[blas] = blas_index;
1112 if (@available(macos 12.0, *)) {
1113 [all_blas addObject:(blas ? blas->accel_struct : null_BLAS)];
1114 }
1115 return blas_index;
1116 };
1117
1118 size_t instance_size;
1119 if (use_instance_motion) {
1120 instance_size = sizeof(MTLAccelerationStructureMotionInstanceDescriptor);
1121 }
1122 else {
1123 instance_size = sizeof(MTLAccelerationStructureUserIDInstanceDescriptor);
1124 }
1125
1126 /* Allocate a GPU buffer for the instance data and populate it */
1127 id<MTLBuffer> instanceBuf = [mtl_device newBufferWithLength:num_instances * instance_size
1128 options:MTLResourceStorageModeShared];
1129 id<MTLBuffer> motion_transforms_buf = nil;
1130 MTLPackedFloat4x3 *matrix_motion_transforms = nullptr;
1131# if defined(MAC_OS_VERSION_15_0)
1132 MTLComponentTransform *decomposed_motion_transforms = nullptr;
1133# endif
1134 if (use_instance_motion && num_motion_transforms) {
1135# if defined(MAC_OS_VERSION_15_0)
1136 if (use_pcmi) {
1137 if (@available(macos 15.0, *)) {
1138 motion_transforms_buf = [mtl_device
1139 newBufferWithLength:num_motion_transforms * sizeof(MTLComponentTransform)
1140 options:MTLResourceStorageModeShared];
1141 decomposed_motion_transforms = (MTLComponentTransform *)motion_transforms_buf.contents;
1142 }
1143 }
1144 else
1145# endif
1146 {
1147 motion_transforms_buf = [mtl_device
1148 newBufferWithLength:num_motion_transforms * sizeof(MTLPackedFloat4x3)
1149 options:MTLResourceStorageModeShared];
1150 matrix_motion_transforms = (MTLPackedFloat4x3 *)motion_transforms_buf.contents;
1151 }
1152 }
1153
1154 uint32_t instance_index = 0;
1155 uint32_t motion_transform_index = 0;
1156
1157 blas_array.clear();
1158 blas_array.reserve(num_instances);
1159
1160 for (Object *ob : objects) {
1161 /* Skip non-traceable objects */
1162 const Geometry *geom = ob->get_geometry();
1163 const BVHMetal *blas = static_cast<const BVHMetal *>(geom->bvh.get());
1164 if (!blas || !blas->accel_struct || !ob->is_traceable()) {
1165 /* Place a degenerate instance, to ensure [[instance_id]] equals ob->get_device_index()
1166 * in our intersection functions */
1167 blas = nullptr;
1168
1169 /* Workaround for issue in macOS <= 14.1: Insert degenerate BLAS instead of zero-filling
1170 * the descriptor. */
1171 if (!null_BLAS) {
1172 null_BLAS = make_null_BLAS(mtl_device, queue);
1173 }
1174 blas_array.push_back(null_BLAS);
1175 }
1176 else {
1177 blas_array.push_back(blas->accel_struct);
1178 }
1179
1180 uint32_t accel_struct_index = get_blas_index(blas);
1181
1182 /* The MetalRT visibility mask can only contain 8 bits by default. */
1183 uint32_t mask = ob->visibility_for_tracing() & 0xFF;
1184
1185 /* Have to have at least one bit in the mask, or else instance would always be culled. */
1186 if (0 == mask) {
1187 mask = 0xFF;
1188 }
1189
1190 /* Set user instance ID to object index */
1191 uint32_t primitive_offset = 0;
1192 int currIndex = instance_index++;
1193
1194 if (geom->is_hair()) {
1195 /* Build BLAS for curve primitives. */
1196 Hair *const hair = static_cast<Hair *const>(const_cast<Geometry *>(geom));
1197 primitive_offset = uint32_t(hair->curve_segment_offset);
1198 }
1199 else if (geom->is_mesh() || geom->is_volume()) {
1200 /* Build BLAS for triangle primitives. */
1201 Mesh *const mesh = static_cast<Mesh *const>(const_cast<Geometry *>(geom));
1202 primitive_offset = uint32_t(mesh->prim_offset);
1203 }
1204 else if (geom->is_pointcloud()) {
1205 /* Build BLAS for points primitives. */
1206 PointCloud *const pointcloud = static_cast<PointCloud *const>(
1207 const_cast<Geometry *>(geom));
1208 primitive_offset = uint32_t(pointcloud->prim_offset);
1209 }
1210
1211 /* Bake into the appropriate descriptor */
1212 if (use_instance_motion) {
1213 MTLAccelerationStructureMotionInstanceDescriptor *instances =
1214 (MTLAccelerationStructureMotionInstanceDescriptor *)[instanceBuf contents];
1215 MTLAccelerationStructureMotionInstanceDescriptor &desc = instances[currIndex];
1216
1217 desc.accelerationStructureIndex = accel_struct_index;
1218 desc.userID = primitive_offset;
1219 desc.mask = mask;
1220 desc.motionStartTime = 0.0f;
1221 desc.motionEndTime = 1.0f;
1222 desc.motionTransformsStartIndex = motion_transform_index;
1223 desc.motionStartBorderMode = MTLMotionBorderModeVanish;
1224 desc.motionEndBorderMode = MTLMotionBorderModeVanish;
1225 desc.intersectionFunctionTableOffset = 0;
1226
1227 array<DecomposedTransform> decomp(ob->get_motion().size());
1229 decomp.data(), ob->get_motion().data(), ob->get_motion().size());
1230
1231 int key_count = ob->get_motion().size();
1232 if (key_count) {
1233 desc.motionTransformsCount = key_count;
1234
1235# if defined(MAC_OS_VERSION_15_0)
1236 if (use_pcmi) {
1237 for (int i = 0; i < key_count; i++) {
1238 decomposed_motion_transforms[motion_transform_index++] =
1239 decomposed_to_component_transform(decomp[i]);
1240 }
1241 }
1242 else
1243# endif
1244 {
1245 Transform *keys = ob->get_motion().data();
1246 for (int i = 0; i < key_count; i++) {
1247 float *t = (float *)&matrix_motion_transforms[motion_transform_index++];
1248 /* Transpose transform */
1249 const auto *src = (const float *)&keys[i];
1250 for (int i = 0; i < 12; i++) {
1251 t[i] = src[(i / 3) + 4 * (i % 3)];
1252 }
1253 }
1254 }
1255 }
1256 else {
1257 desc.motionTransformsCount = 1;
1258
1259# if defined(MAC_OS_VERSION_15_0)
1260 if (use_pcmi) {
1261 if (ob->get_geometry()->is_instanced()) {
1262 DecomposedTransform decomp;
1263 transform_motion_decompose(&decomp, &ob->get_tfm(), 1);
1264 decomposed_motion_transforms[motion_transform_index++] =
1265 decomposed_to_component_transform(decomp);
1266 }
1267 else {
1268 decomposed_motion_transforms[motion_transform_index++] =
1269 component_transform_make_unit();
1270 }
1271 }
1272 else
1273# endif
1274 {
1275 float *t = (float *)&matrix_motion_transforms[motion_transform_index++];
1276 if (ob->get_geometry()->is_instanced()) {
1277 /* Transpose transform */
1278 const auto *src = (const float *)&ob->get_tfm();
1279 for (int i = 0; i < 12; i++) {
1280 t[i] = src[(i / 3) + 4 * (i % 3)];
1281 }
1282 }
1283 else {
1284 /* Clear transform to identity matrix */
1285 t[0] = t[4] = t[8] = 1.0f;
1286 }
1287 }
1288 }
1289 }
1290 else {
1291 MTLAccelerationStructureUserIDInstanceDescriptor *instances =
1292 (MTLAccelerationStructureUserIDInstanceDescriptor *)[instanceBuf contents];
1293 MTLAccelerationStructureUserIDInstanceDescriptor &desc = instances[currIndex];
1294
1295 desc.accelerationStructureIndex = accel_struct_index;
1296 desc.userID = primitive_offset;
1297 desc.mask = mask;
1298 desc.intersectionFunctionTableOffset = 0;
1299 desc.options = MTLAccelerationStructureInstanceOptionOpaque;
1300
1301 float *t = (float *)&desc.transformationMatrix;
1302 if (ob->get_geometry()->is_instanced()) {
1303 /* Transpose transform */
1304 const auto *src = (const float *)&ob->get_tfm();
1305 for (int i = 0; i < 12; i++) {
1306 t[i] = src[(i / 3) + 4 * (i % 3)];
1307 }
1308 }
1309 else {
1310 /* Clear transform to identity matrix */
1311 t[0] = t[4] = t[8] = 1.0f;
1312 }
1313 }
1314 }
1315
1316 if (use_instance_motion) {
1317 BVH_status(
1318 "Building motion TLAS | %7d instances | %7d motion instances | %7d motion "
1319 "transforms",
1320 (int)num_instances,
1321 (int)num_motion_instances,
1322 (int)num_motion_transforms);
1323 }
1324 else {
1325 BVH_status("Building TLAS | %7d instances", (int)num_instances);
1326 }
1327
1328 MTLInstanceAccelerationStructureDescriptor *accelDesc =
1329 [MTLInstanceAccelerationStructureDescriptor descriptor];
1330 accelDesc.instanceCount = num_instances;
1331 accelDesc.instanceDescriptorType = MTLAccelerationStructureInstanceDescriptorTypeUserID;
1332 accelDesc.instanceDescriptorBuffer = instanceBuf;
1333 accelDesc.instanceDescriptorBufferOffset = 0;
1334 accelDesc.instanceDescriptorStride = instance_size;
1335 accelDesc.instancedAccelerationStructures = all_blas;
1336
1337 if (use_instance_motion) {
1338 accelDesc.instanceDescriptorType = MTLAccelerationStructureInstanceDescriptorTypeMotion;
1339 accelDesc.motionTransformBuffer = motion_transforms_buf;
1340 accelDesc.motionTransformCount = num_motion_transforms;
1341# if defined(MAC_OS_VERSION_15_0)
1342 if (@available(macos 15.0, *)) {
1343 accelDesc.motionTransformStride = 0;
1344 accelDesc.motionTransformType = use_pcmi ? MTLTransformTypeComponent :
1345 MTLTransformTypePackedFloat4x3;
1346 }
1347# endif
1348 }
1349
1350 if (extended_limits) {
1351 accelDesc.usage |= MTLAccelerationStructureUsageExtendedLimits;
1352 }
1353 if (!use_fast_trace_bvh) {
1354 accelDesc.usage |= (MTLAccelerationStructureUsageRefit |
1355 MTLAccelerationStructureUsagePreferFastBuild);
1356 }
1357 else if (@available(macos 26.0, *)) {
1358 accelDesc.usage |= MTLAccelerationStructureUsagePreferFastIntersection;
1359 }
1360
1361 MTLAccelerationStructureSizes accelSizes = [mtl_device
1362 accelerationStructureSizesWithDescriptor:accelDesc];
1363 id<MTLAccelerationStructure> accel = [mtl_device
1364 newAccelerationStructureWithSize:accelSizes.accelerationStructureSize];
1365 id<MTLBuffer> scratchBuf = [mtl_device newBufferWithLength:accelSizes.buildScratchBufferSize
1366 options:MTLResourceStorageModePrivate];
1367 id<MTLCommandBuffer> accelCommands = [queue commandBuffer];
1368 id<MTLAccelerationStructureCommandEncoder> accelEnc =
1369 [accelCommands accelerationStructureCommandEncoder];
1370 if (refit) {
1371 [accelEnc refitAccelerationStructure:accel_struct
1372 descriptor:accelDesc
1373 destination:accel
1374 scratchBuffer:scratchBuf
1375 scratchBufferOffset:0];
1376 }
1377 else {
1378 [accelEnc buildAccelerationStructure:accel
1379 descriptor:accelDesc
1380 scratchBuffer:scratchBuf
1381 scratchBufferOffset:0];
1382 }
1383 [accelEnc endEncoding];
1384 [accelCommands commit];
1385 [accelCommands waitUntilCompleted];
1386
1387 if (motion_transforms_buf) {
1388 [motion_transforms_buf release];
1389 }
1390 [instanceBuf release];
1391 [scratchBuf release];
1392
1393 /* Cache top and bottom-level acceleration structs */
1394 set_accel_struct(accel);
1395
1396 unique_blas_array.clear();
1397 unique_blas_array.reserve(all_blas.count);
1398 [all_blas enumerateObjectsUsingBlock:^(id<MTLAccelerationStructure> blas, NSUInteger, BOOL *) {
1399 unique_blas_array.push_back(blas);
1400 }];
1401
1402 return true;
1403 }
1404 return false;
1405}
1406
1407bool BVHMetal::build(Progress &progress,
1408 id<MTLDevice> mtl_device,
1409 id<MTLCommandQueue> queue,
1410 bool refit)
1411{
1412 if (@available(macos 12.0, *)) {
1413 if (refit) {
1414 /* It isn't valid to refit a non-existent BVH, or one which wasn't constructed as dynamic.
1415 * In such cases, assert in development but try to recover in the wild. */
1416 if (params.bvh_type != BVH_TYPE_DYNAMIC) {
1417 assert(!"Can't refit static Metal BVH");
1418 refit = false;
1419 }
1420 else if (!accel_struct) {
1421 assert(!"Can't refit non-existing Metal BVH");
1422 refit = false;
1423 }
1424 }
1425
1426 if (!refit) {
1427 set_accel_struct(nil);
1428 }
1429 }
1430
1431 if (!support_refit_blas()) {
1432 refit = false;
1433 }
1434
1435 @autoreleasepool {
1436 if (!params.top_level) {
1437 return build_BLAS(progress, mtl_device, queue, refit);
1438 }
1439 return build_TLAS(progress, mtl_device, queue, refit);
1440 }
1441}
1442
1444
1445#endif /* WITH_METAL */
volatile int lock
BMesh const char void * data
unsigned long long int uint64_t
static btDbvtVolume bounds(btDbvtNode **leaves, int count)
Definition btDbvt.cpp:299
void refit(btStridingMeshInterface *triangles, const btVector3 &aabbMin, const btVector3 &aabbMax)
Attribute * find(ustring name) const
Definition bvh/bvh.h:67
Type geometry_type
bool is_volume() const
bool is_pointcloud() const
bool is_hair() const
size_t prim_offset
unique_ptr< BVH > bvh
AttributeSet attributes
bool is_mesh() const
Definition hair.h:13
Curve get_curve(const size_t i) const
Definition hair.h:111
size_t curve_segment_offset
Definition hair.h:90
size_t num_curves() const
Definition hair.h:126
CurveShapeType curve_shape
Definition hair.h:91
size_t num_keys() const
Definition hair.h:121
size_t size() const
nullptr float
CCL_NAMESPACE_BEGIN struct Options options
#define CCL_NAMESPACE_END
static float verts[][3]
ThreadMutex mutex
#define assert(assertion)
VecBase< float, D > step(VecOp< float, D >, VecOp< float, D >) RET
float length(VecOp< float, D >) RET
uiWidgetBaseParameters params[MAX_WIDGET_BASE_BATCH]
int count
@ ATTR_STD_MOTION_VERTEX_POSITION
@ CURVE_RIBBON
@ CURVE_THICK_LINEAR
descriptor
ccl_device_inline float2 mask(const MaskType mask, const float2 a)
@ BVH_TYPE_DYNAMIC
Definition params.h:33
@ BVH_TYPE_STATIC
Definition params.h:40
float3 * data_float3()
float4 * data_float4()
int first_key
Definition hair.h:19
int num_segments() const
Definition hair.h:22
int num_keys
Definition hair.h:20
size_t num_triangles() const
Definition scene/mesh.h:77
ustring name
Definition graph/node.h:177
void bounds_grow(const float3 *points, const float *radius, BoundBox &bounds) const
Point get_point(const int i) const
size_t num_points() const
float y
Definition sky_math.h:225
float z
Definition sky_math.h:225
float x
Definition sky_math.h:225
float w
Definition sky_math.h:225
i
Definition text_draw.cc:230
max
Definition text_draw.cc:251
std::mutex thread_mutex
Definition thread.h:27
std::unique_lock< std::mutex > thread_scoped_lock
Definition thread.h:28
void transform_motion_decompose(DecomposedTransform *decomp, const Transform *motion, const size_t size)