Blender V4.5
bvh.mm
Go to the documentation of this file.
1/* SPDX-FileCopyrightText: 2021-2022 Blender Foundation
2 *
3 * SPDX-License-Identifier: Apache-2.0 */
4
5#ifdef WITH_METAL
6
7# include <algorithm>
8# include <chrono>
9# include <thread>
10# include <vector>
11
12# include "scene/hair.h"
13# include "scene/mesh.h"
14# include "scene/object.h"
15# include "scene/pointcloud.h"
16
17# include "util/progress.h"
18
19# include "device/metal/bvh.h"
20# include "device/metal/util.h"
21
23
24# define BVH_status(...) \
25 { \
26 string str = string_printf(__VA_ARGS__); \
27 progress.set_substatus(str); \
28 metal_printf("%s\n", str.c_str()); \
29 }
30
31// # define BVH_THROTTLE_DIAGNOSTICS
32# ifdef BVH_THROTTLE_DIAGNOSTICS
33# define bvh_throttle_printf(...) printf("BVHMetalBuildThrottler::" __VA_ARGS__)
34# else
35# define bvh_throttle_printf(...)
36# endif
37
38/* Limit the number of concurrent BVH builds so that we don't approach unsafe GPU working set
39 * sizes. */
40struct BVHMetalBuildThrottler {
42 size_t wired_memory = 0;
43 size_t safe_wired_limit = 0;
44 int requests_in_flight = 0;
45
46 BVHMetalBuildThrottler()
47 {
48 /* The default device will always be the one that supports MetalRT if the machine supports it.
49 */
50 id<MTLDevice> mtlDevice = MTLCreateSystemDefaultDevice();
51
52 /* Set a conservative limit, but which will still only throttle in extreme cases. */
53 safe_wired_limit = [mtlDevice recommendedMaxWorkingSetSize] / 4;
54 bvh_throttle_printf("safe_wired_limit = %zu\n", safe_wired_limit);
55 }
56
57 /* Block until we're safely able to wire the requested resources. */
58 void acquire(const size_t bytes_to_be_wired)
59 {
60 bool throttled = false;
61 while (true) {
62 {
64
65 /* Always allow a BVH build to proceed if no other is in flight, otherwise
66 * only proceed if we're within safe limits. */
67 if (wired_memory == 0 || wired_memory + bytes_to_be_wired <= safe_wired_limit) {
68 wired_memory += bytes_to_be_wired;
69 requests_in_flight += 1;
70 bvh_throttle_printf("acquire -- success (requests_in_flight = %d, wired_memory = %zu)\n",
71 requests_in_flight,
72 wired_memory);
73 return;
74 }
75
76 if (!throttled) {
77 bvh_throttle_printf(
78 "acquire -- throttling (requests_in_flight = %d, wired_memory = %zu, "
79 "bytes_to_be_wired = %zu)\n",
80 requests_in_flight,
81 wired_memory,
82 bytes_to_be_wired);
83 }
84 throttled = true;
85 }
86
87 std::this_thread::sleep_for(std::chrono::milliseconds(10));
88 }
89 }
90
91 /* Notify of resources that have stopped being wired. */
92 void release(const size_t bytes_just_unwired)
93 {
95 wired_memory -= bytes_just_unwired;
96 requests_in_flight -= 1;
97 bvh_throttle_printf("release (requests_in_flight = %d, wired_memory = %zu)\n",
98 requests_in_flight,
99 wired_memory);
100 }
101
102 /* Wait for all outstanding work to finish. */
103 void wait_for_all()
104 {
105 while (true) {
106 {
108 if (wired_memory == 0) {
109 return;
110 }
111 }
112 std::this_thread::sleep_for(std::chrono::milliseconds(10));
113 }
114 }
115} g_bvh_build_throttler;
116
117/* macOS 15.2 and 15.3 has a bug in the dynamic BVH refitting which leads to missing geometry
118 * during render. The issue is fixed in the macOS 15.4, until then disable refitting even for
119 * the viewport.
120 * Note that dynamic BVH is still used on the scene level to speed up updates of instances and
121 * such. #132782. */
122static bool support_refit_blas()
123{
124 if (@available(macos 15.4, *)) {
125 return true;
126 }
127 if (@available(macos 15.2, *)) {
128 return false;
129 }
130 return true;
131}
132
133BVHMetal::BVHMetal(const BVHParams &params_,
134 const vector<Geometry *> &geometry_,
135 const vector<Object *> &objects_,
136 Device *device)
137 : BVH(params_, geometry_, objects_), device(device)
138{
139}
140
141BVHMetal::~BVHMetal()
142{
143 if (@available(macos 12.0, *)) {
144 set_accel_struct(nil);
145 if (null_BLAS) {
146 [null_BLAS release];
147 }
148 }
149}
150
151API_AVAILABLE(macos(11.0))
152void BVHMetal::set_accel_struct(id<MTLAccelerationStructure> new_accel_struct)
153{
154 if (@available(macos 12.0, *)) {
155 if (accel_struct) {
156 device->stats.mem_free(accel_struct.allocatedSize);
157 [accel_struct release];
158 accel_struct = nil;
159 }
160
161 if (new_accel_struct) {
162 accel_struct = new_accel_struct;
163 device->stats.mem_alloc(accel_struct.allocatedSize);
164 }
165 }
166}
167
168bool BVHMetal::build_BLAS_mesh(Progress &progress,
169 id<MTLDevice> mtl_device,
170 id<MTLCommandQueue> queue,
171 Geometry *const geom,
172 bool refit)
173{
174 if (@available(macos 12.0, *)) {
175 /* Build BLAS for triangle primitives */
176 Mesh *const mesh = static_cast<Mesh *const>(geom);
177 if (mesh->num_triangles() == 0) {
178 return false;
179 }
180
181 const bool use_fast_trace_bvh = (params.bvh_type == BVH_TYPE_STATIC) || !support_refit_blas();
182
183 const array<float3> &verts = mesh->get_verts();
184 const array<int> &tris = mesh->get_triangles();
185 const size_t num_verts = verts.size();
186 const size_t num_indices = tris.size();
187
188 size_t num_motion_steps = 1;
190 if (motion_blur && mesh->get_use_motion_blur() && motion_keys) {
191 num_motion_steps = mesh->get_motion_steps();
192 }
193
194 /* Upload the mesh data to the GPU */
195 id<MTLBuffer> posBuf = nil;
196 id<MTLBuffer> indexBuf = [mtl_device newBufferWithBytes:tris.data()
197 length:num_indices * sizeof(tris.data()[0])
198 options:MTLResourceStorageModeShared];
199
200 if (num_motion_steps == 1) {
201 posBuf = [mtl_device newBufferWithBytes:verts.data()
202 length:num_verts * sizeof(verts.data()[0])
203 options:MTLResourceStorageModeShared];
204 }
205 else {
206 posBuf = [mtl_device
207 newBufferWithLength:num_verts * num_motion_steps * sizeof(verts.data()[0])
208 options:MTLResourceStorageModeShared];
209 float3 *dest_data = (float3 *)[posBuf contents];
210 size_t center_step = (num_motion_steps - 1) / 2;
211 for (size_t step = 0; step < num_motion_steps; ++step) {
212 const float3 *verts = mesh->get_verts().data();
213
214 /* The center step for motion vertices is not stored in the attribute. */
215 if (step != center_step) {
216 verts = motion_keys->data_float3() + (step > center_step ? step - 1 : step) * num_verts;
217 }
218 std::copy_n(verts, num_verts, dest_data + num_verts * step);
219 }
220 }
221
222 /* Create an acceleration structure. */
223 MTLAccelerationStructureGeometryDescriptor *geomDesc;
224 if (num_motion_steps > 1) {
225 std::vector<MTLMotionKeyframeData *> vertex_ptrs;
226 vertex_ptrs.reserve(num_motion_steps);
227 for (size_t step = 0; step < num_motion_steps; ++step) {
228 MTLMotionKeyframeData *k = [MTLMotionKeyframeData data];
229 k.buffer = posBuf;
230 k.offset = num_verts * step * sizeof(float3);
231 vertex_ptrs.push_back(k);
232 }
233
234 MTLAccelerationStructureMotionTriangleGeometryDescriptor *geomDescMotion =
235 [MTLAccelerationStructureMotionTriangleGeometryDescriptor descriptor];
236 geomDescMotion.vertexBuffers = [NSArray arrayWithObjects:vertex_ptrs.data()
237 count:vertex_ptrs.size()];
238 geomDescMotion.vertexStride = sizeof(verts.data()[0]);
239 geomDescMotion.indexBuffer = indexBuf;
240 geomDescMotion.indexBufferOffset = 0;
241 geomDescMotion.indexType = MTLIndexTypeUInt32;
242 geomDescMotion.triangleCount = num_indices / 3;
243 geomDescMotion.intersectionFunctionTableOffset = 0;
244 geomDescMotion.opaque = true;
245
246 geomDesc = geomDescMotion;
247
248 BVH_status("Building motion mesh BLAS | %7d tris | %s | %7d motion keyframes",
249 (int)mesh->num_triangles(),
250 geom->name.c_str(),
251 (int)num_motion_steps);
252 }
253 else {
254 MTLAccelerationStructureTriangleGeometryDescriptor *geomDescNoMotion =
255 [MTLAccelerationStructureTriangleGeometryDescriptor descriptor];
256 geomDescNoMotion.vertexBuffer = posBuf;
257 geomDescNoMotion.vertexBufferOffset = 0;
258 geomDescNoMotion.vertexStride = sizeof(verts.data()[0]);
259 geomDescNoMotion.indexBuffer = indexBuf;
260 geomDescNoMotion.indexBufferOffset = 0;
261 geomDescNoMotion.indexType = MTLIndexTypeUInt32;
262 geomDescNoMotion.triangleCount = num_indices / 3;
263 geomDescNoMotion.intersectionFunctionTableOffset = 0;
264 geomDescNoMotion.opaque = true;
265
266 geomDesc = geomDescNoMotion;
267
268 BVH_status(
269 "Building mesh BLAS | %7d tris | %s", (int)mesh->num_triangles(), geom->name.c_str());
270 }
271
272 /* Force a single any-hit call, so shadow record-all behavior works correctly */
273 /* (Match optix behavior: unsigned int build_flags =
274 * OPTIX_GEOMETRY_FLAG_REQUIRE_SINGLE_ANYHIT_CALL;) */
275 geomDesc.allowDuplicateIntersectionFunctionInvocation = false;
276
277 MTLPrimitiveAccelerationStructureDescriptor *accelDesc =
278 [MTLPrimitiveAccelerationStructureDescriptor descriptor];
279 accelDesc.geometryDescriptors = @[ geomDesc ];
280 if (num_motion_steps > 1) {
281 accelDesc.motionStartTime = 0.0f;
282 accelDesc.motionEndTime = 1.0f;
283 accelDesc.motionStartBorderMode = MTLMotionBorderModeClamp;
284 accelDesc.motionEndBorderMode = MTLMotionBorderModeClamp;
285 accelDesc.motionKeyframeCount = num_motion_steps;
286 }
287 accelDesc.usage |= MTLAccelerationStructureUsageExtendedLimits;
288
289 if (!use_fast_trace_bvh) {
290 accelDesc.usage |= (MTLAccelerationStructureUsageRefit |
291 MTLAccelerationStructureUsagePreferFastBuild);
292 }
293
294 MTLAccelerationStructureSizes accelSizes = [mtl_device
295 accelerationStructureSizesWithDescriptor:accelDesc];
296 id<MTLAccelerationStructure> accel_uncompressed = [mtl_device
297 newAccelerationStructureWithSize:accelSizes.accelerationStructureSize];
298 id<MTLBuffer> scratchBuf = [mtl_device newBufferWithLength:accelSizes.buildScratchBufferSize
299 options:MTLResourceStorageModePrivate];
300 id<MTLBuffer> sizeBuf = [mtl_device newBufferWithLength:8
301 options:MTLResourceStorageModeShared];
302 id<MTLCommandBuffer> accelCommands = [queue commandBuffer];
303 id<MTLAccelerationStructureCommandEncoder> accelEnc =
304 [accelCommands accelerationStructureCommandEncoder];
305 if (refit) {
306 [accelEnc refitAccelerationStructure:accel_struct
307 descriptor:accelDesc
308 destination:accel_uncompressed
309 scratchBuffer:scratchBuf
310 scratchBufferOffset:0];
311 }
312 else {
313 [accelEnc buildAccelerationStructure:accel_uncompressed
314 descriptor:accelDesc
315 scratchBuffer:scratchBuf
316 scratchBufferOffset:0];
317 }
318 if (use_fast_trace_bvh) {
319 [accelEnc writeCompactedAccelerationStructureSize:accel_uncompressed
320 toBuffer:sizeBuf
321 offset:0
322 sizeDataType:MTLDataTypeULong];
323 }
324 [accelEnc endEncoding];
325
326 /* Estimated size of resources that will be wired for the GPU accelerated build.
327 * Acceleration-struct size is doubled to account for possible compaction step. */
328 size_t wired_size = posBuf.allocatedSize + indexBuf.allocatedSize + scratchBuf.allocatedSize +
329 accel_uncompressed.allocatedSize * 2;
330
331 [accelCommands addCompletedHandler:^(id<MTLCommandBuffer> /*command_buffer*/) {
332 /* free temp resources */
333 [scratchBuf release];
334 [indexBuf release];
335 [posBuf release];
336
337 if (use_fast_trace_bvh) {
338 /* Compact the accel structure */
339 uint64_t compressed_size = *(uint64_t *)sizeBuf.contents;
340
341 dispatch_async(dispatch_get_global_queue(DISPATCH_QUEUE_PRIORITY_DEFAULT, 0), ^{
342 id<MTLCommandBuffer> accelCommands = [queue commandBuffer];
343 id<MTLAccelerationStructureCommandEncoder> accelEnc =
344 [accelCommands accelerationStructureCommandEncoder];
345 id<MTLAccelerationStructure> accel = [mtl_device
346 newAccelerationStructureWithSize:compressed_size];
347 [accelEnc copyAndCompactAccelerationStructure:accel_uncompressed
348 toAccelerationStructure:accel];
349 [accelEnc endEncoding];
350 [accelCommands addCompletedHandler:^(id<MTLCommandBuffer> /*command_buffer*/) {
351 set_accel_struct(accel);
352 [accel_uncompressed release];
353
354 /* Signal that we've finished doing GPU acceleration struct build. */
355 g_bvh_build_throttler.release(wired_size);
356 }];
357 [accelCommands commit];
358 });
359 }
360 else {
361 /* set our acceleration structure to the uncompressed structure */
362 set_accel_struct(accel_uncompressed);
363
364 /* Signal that we've finished doing GPU acceleration struct build. */
365 g_bvh_build_throttler.release(wired_size);
366 }
367
368 [sizeBuf release];
369 }];
370
371 /* Wait until it's safe to proceed with GPU acceleration struct build. */
372 g_bvh_build_throttler.acquire(wired_size);
373 [accelCommands commit];
374
375 return true;
376 }
377 return false;
378}
379
380bool BVHMetal::build_BLAS_hair(Progress &progress,
381 id<MTLDevice> mtl_device,
382 id<MTLCommandQueue> queue,
383 Geometry *const geom,
384 bool refit)
385{
386# if defined(MAC_OS_VERSION_14_0)
387 if (@available(macos 14.0, *)) {
388 /* Build BLAS for hair curves */
389 Hair *hair = static_cast<Hair *>(geom);
390 if (hair->num_curves() == 0) {
391 return false;
392 }
393
394 const bool use_fast_trace_bvh = (params.bvh_type == BVH_TYPE_STATIC) || !support_refit_blas();
395
396 size_t num_motion_steps = 1;
398 if (motion_blur && hair->get_use_motion_blur() && motion_keys) {
399 num_motion_steps = hair->get_motion_steps();
400 }
401
402 id<MTLBuffer> cpBuffer = nil;
403 id<MTLBuffer> radiusBuffer = nil;
404 id<MTLBuffer> idxBuffer = nil;
405
406 MTLAccelerationStructureGeometryDescriptor *geomDesc;
407 if (num_motion_steps > 1) {
408 MTLAccelerationStructureMotionCurveGeometryDescriptor *geomDescCrv =
409 [MTLAccelerationStructureMotionCurveGeometryDescriptor descriptor];
410
411 uint64_t numKeys = hair->num_keys();
412 uint64_t numCurves = hair->num_curves();
413 const array<float> &radiuses = hair->get_curve_radius();
414
415 /* Gather the curve geometry. */
416 std::vector<float3> cpData;
417 std::vector<int> idxData;
418 std::vector<float> radiusData;
419 cpData.reserve(numKeys);
420 radiusData.reserve(numKeys);
421
422 std::vector<int> step_offsets;
423 for (size_t step = 0; step < num_motion_steps; ++step) {
424
425 /* The center step for motion vertices is not stored in the attribute. */
426 const float3 *keys = hair->get_curve_keys().data();
427 size_t center_step = (num_motion_steps - 1) / 2;
428 if (step != center_step) {
429 size_t attr_offset = (step > center_step) ? step - 1 : step;
430 /* Technically this is a float4 array, but sizeof(float3) == sizeof(float4). */
431 keys = motion_keys->data_float3() + attr_offset * numKeys;
432 }
433
434 step_offsets.push_back(cpData.size());
435
436 for (int c = 0; c < numCurves; ++c) {
437 const Hair::Curve curve = hair->get_curve(c);
438 int segCount = curve.num_segments();
439 int firstKey = curve.first_key;
440 uint64_t idxBase = cpData.size();
441 cpData.push_back(keys[firstKey]);
442 radiusData.push_back(radiuses[firstKey]);
443 for (int s = 0; s < segCount; ++s) {
444 if (step == 0) {
445 idxData.push_back(idxBase + s);
446 }
447 cpData.push_back(keys[firstKey + s]);
448 radiusData.push_back(radiuses[firstKey + s]);
449 }
450 cpData.push_back(keys[firstKey + curve.num_keys - 1]);
451 cpData.push_back(keys[firstKey + curve.num_keys - 1]);
452 radiusData.push_back(radiuses[firstKey + curve.num_keys - 1]);
453 radiusData.push_back(radiuses[firstKey + curve.num_keys - 1]);
454 }
455 }
456
457 /* Allocate and populate MTLBuffers for geometry. */
458 idxBuffer = [mtl_device newBufferWithBytes:idxData.data()
459 length:idxData.size() * sizeof(int)
460 options:MTLResourceStorageModeShared];
461
462 cpBuffer = [mtl_device newBufferWithBytes:cpData.data()
463 length:cpData.size() * sizeof(float3)
464 options:MTLResourceStorageModeShared];
465
466 radiusBuffer = [mtl_device newBufferWithBytes:radiusData.data()
467 length:radiusData.size() * sizeof(float)
468 options:MTLResourceStorageModeShared];
469
470 std::vector<MTLMotionKeyframeData *> cp_ptrs;
471 std::vector<MTLMotionKeyframeData *> radius_ptrs;
472 cp_ptrs.reserve(num_motion_steps);
473 radius_ptrs.reserve(num_motion_steps);
474
475 for (size_t step = 0; step < num_motion_steps; ++step) {
476 MTLMotionKeyframeData *k = [MTLMotionKeyframeData data];
477 k.buffer = cpBuffer;
478 k.offset = step_offsets[step] * sizeof(float3);
479 cp_ptrs.push_back(k);
480
481 k = [MTLMotionKeyframeData data];
482 k.buffer = radiusBuffer;
483 k.offset = step_offsets[step] * sizeof(float);
484 radius_ptrs.push_back(k);
485 }
486
487 geomDescCrv.controlPointBuffers = [NSArray arrayWithObjects:cp_ptrs.data()
488 count:cp_ptrs.size()];
489 geomDescCrv.radiusBuffers = [NSArray arrayWithObjects:radius_ptrs.data()
490 count:radius_ptrs.size()];
491
492 /* controlPointCount should specify the *per-step* control point count. */
493 geomDescCrv.controlPointCount = cpData.size() / num_motion_steps;
494 geomDescCrv.controlPointStride = sizeof(float3);
495 geomDescCrv.controlPointFormat = MTLAttributeFormatFloat3;
496 geomDescCrv.radiusStride = sizeof(float);
497 geomDescCrv.radiusFormat = MTLAttributeFormatFloat;
498 geomDescCrv.segmentCount = idxData.size();
499 geomDescCrv.segmentControlPointCount = 4;
500 geomDescCrv.curveType = (hair->curve_shape == CURVE_RIBBON) ? MTLCurveTypeFlat :
501 MTLCurveTypeRound;
502 geomDescCrv.curveBasis = MTLCurveBasisCatmullRom;
503 geomDescCrv.curveEndCaps = MTLCurveEndCapsDisk;
504 geomDescCrv.indexType = MTLIndexTypeUInt32;
505 geomDescCrv.indexBuffer = idxBuffer;
506 geomDescCrv.intersectionFunctionTableOffset = 1;
507
508 /* Force a single any-hit call, so shadow record-all behavior works correctly */
509 /* (Match optix behavior: unsigned int build_flags =
510 * OPTIX_GEOMETRY_FLAG_REQUIRE_SINGLE_ANYHIT_CALL;) */
511 geomDescCrv.allowDuplicateIntersectionFunctionInvocation = false;
512 geomDescCrv.opaque = true;
513 geomDesc = geomDescCrv;
514 }
515 else {
516 MTLAccelerationStructureCurveGeometryDescriptor *geomDescCrv =
517 [MTLAccelerationStructureCurveGeometryDescriptor descriptor];
518
519 uint64_t numKeys = hair->num_keys();
520 uint64_t numCurves = hair->num_curves();
521 const array<float> &radiuses = hair->get_curve_radius();
522
523 /* Gather the curve geometry. */
524 std::vector<float3> cpData;
525 std::vector<int> idxData;
526 std::vector<float> radiusData;
527 cpData.reserve(numKeys);
528 radiusData.reserve(numKeys);
529 auto keys = hair->get_curve_keys();
530 for (int c = 0; c < numCurves; ++c) {
531 const Hair::Curve curve = hair->get_curve(c);
532 int segCount = curve.num_segments();
533 int firstKey = curve.first_key;
534 radiusData.push_back(radiuses[firstKey]);
535 uint64_t idxBase = cpData.size();
536 cpData.push_back(keys[firstKey]);
537 for (int s = 0; s < segCount; ++s) {
538 idxData.push_back(idxBase + s);
539 cpData.push_back(keys[firstKey + s]);
540 radiusData.push_back(radiuses[firstKey + s]);
541 }
542 cpData.push_back(keys[firstKey + curve.num_keys - 1]);
543 cpData.push_back(keys[firstKey + curve.num_keys - 1]);
544 radiusData.push_back(radiuses[firstKey + curve.num_keys - 1]);
545 radiusData.push_back(radiuses[firstKey + curve.num_keys - 1]);
546 }
547
548 /* Allocate and populate MTLBuffers for geometry. */
549 idxBuffer = [mtl_device newBufferWithBytes:idxData.data()
550 length:idxData.size() * sizeof(int)
551 options:MTLResourceStorageModeShared];
552
553 cpBuffer = [mtl_device newBufferWithBytes:cpData.data()
554 length:cpData.size() * sizeof(float3)
555 options:MTLResourceStorageModeShared];
556
557 radiusBuffer = [mtl_device newBufferWithBytes:radiusData.data()
558 length:radiusData.size() * sizeof(float)
559 options:MTLResourceStorageModeShared];
560
561 geomDescCrv.controlPointBuffer = cpBuffer;
562 geomDescCrv.radiusBuffer = radiusBuffer;
563 geomDescCrv.controlPointCount = cpData.size();
564 geomDescCrv.controlPointStride = sizeof(float3);
565 geomDescCrv.controlPointFormat = MTLAttributeFormatFloat3;
566 geomDescCrv.controlPointBufferOffset = 0;
567 geomDescCrv.segmentCount = idxData.size();
568 geomDescCrv.segmentControlPointCount = 4;
569 geomDescCrv.curveType = (hair->curve_shape == CURVE_RIBBON) ? MTLCurveTypeFlat :
570 MTLCurveTypeRound;
571 geomDescCrv.curveBasis = MTLCurveBasisCatmullRom;
572 geomDescCrv.curveEndCaps = MTLCurveEndCapsDisk;
573 geomDescCrv.indexType = MTLIndexTypeUInt32;
574 geomDescCrv.indexBuffer = idxBuffer;
575 geomDescCrv.intersectionFunctionTableOffset = 1;
576
577 /* Force a single any-hit call, so shadow record-all behavior works correctly */
578 /* (Match optix behavior: unsigned int build_flags =
579 * OPTIX_GEOMETRY_FLAG_REQUIRE_SINGLE_ANYHIT_CALL;) */
580 geomDescCrv.allowDuplicateIntersectionFunctionInvocation = false;
581 geomDescCrv.opaque = true;
582 geomDesc = geomDescCrv;
583 }
584
585 MTLPrimitiveAccelerationStructureDescriptor *accelDesc =
586 [MTLPrimitiveAccelerationStructureDescriptor descriptor];
587 accelDesc.geometryDescriptors = @[ geomDesc ];
588
589 if (num_motion_steps > 1) {
590 accelDesc.motionStartTime = 0.0f;
591 accelDesc.motionEndTime = 1.0f;
592 accelDesc.motionStartBorderMode = MTLMotionBorderModeVanish;
593 accelDesc.motionEndBorderMode = MTLMotionBorderModeVanish;
594 accelDesc.motionKeyframeCount = num_motion_steps;
595
596 BVH_status("Building motion hair BLAS | %7d curves | %s | %7d motion keyframes",
597 (int)hair->num_curves(),
598 geom->name.c_str(),
599 (int)num_motion_steps);
600 }
601 else {
602 BVH_status(
603 "Building hair BLAS | %7d curves | %s", (int)hair->num_curves(), geom->name.c_str());
604 }
605
606 if (!use_fast_trace_bvh) {
607 accelDesc.usage |= (MTLAccelerationStructureUsageRefit |
608 MTLAccelerationStructureUsagePreferFastBuild);
609 }
610 accelDesc.usage |= MTLAccelerationStructureUsageExtendedLimits;
611
612 MTLAccelerationStructureSizes accelSizes = [mtl_device
613 accelerationStructureSizesWithDescriptor:accelDesc];
614 id<MTLAccelerationStructure> accel_uncompressed = [mtl_device
615 newAccelerationStructureWithSize:accelSizes.accelerationStructureSize];
616 id<MTLBuffer> scratchBuf = [mtl_device newBufferWithLength:accelSizes.buildScratchBufferSize
617 options:MTLResourceStorageModePrivate];
618 id<MTLBuffer> sizeBuf = [mtl_device newBufferWithLength:8
619 options:MTLResourceStorageModeShared];
620 id<MTLCommandBuffer> accelCommands = [queue commandBuffer];
621 id<MTLAccelerationStructureCommandEncoder> accelEnc =
622 [accelCommands accelerationStructureCommandEncoder];
623 if (refit) {
624 [accelEnc refitAccelerationStructure:accel_struct
625 descriptor:accelDesc
626 destination:accel_uncompressed
627 scratchBuffer:scratchBuf
628 scratchBufferOffset:0];
629 }
630 else {
631 [accelEnc buildAccelerationStructure:accel_uncompressed
632 descriptor:accelDesc
633 scratchBuffer:scratchBuf
634 scratchBufferOffset:0];
635 }
636 if (use_fast_trace_bvh) {
637 [accelEnc writeCompactedAccelerationStructureSize:accel_uncompressed
638 toBuffer:sizeBuf
639 offset:0
640 sizeDataType:MTLDataTypeULong];
641 }
642 [accelEnc endEncoding];
643
644 /* Estimated size of resources that will be wired for the GPU accelerated build.
645 * Acceleration-struct size is doubled to account for possible compaction step. */
646 size_t wired_size = cpBuffer.allocatedSize + radiusBuffer.allocatedSize +
647 idxBuffer.allocatedSize + scratchBuf.allocatedSize +
648 accel_uncompressed.allocatedSize * 2;
649
650 [accelCommands addCompletedHandler:^(id<MTLCommandBuffer> /*command_buffer*/) {
651 /* free temp resources */
652 [scratchBuf release];
653 [cpBuffer release];
654 [radiusBuffer release];
655 [idxBuffer release];
656
657 if (use_fast_trace_bvh) {
658 uint64_t compressed_size = *(uint64_t *)sizeBuf.contents;
659
660 dispatch_async(dispatch_get_global_queue(DISPATCH_QUEUE_PRIORITY_DEFAULT, 0), ^{
661 id<MTLCommandBuffer> accelCommands = [queue commandBuffer];
662 id<MTLAccelerationStructureCommandEncoder> accelEnc =
663 [accelCommands accelerationStructureCommandEncoder];
664 id<MTLAccelerationStructure> accel = [mtl_device
665 newAccelerationStructureWithSize:compressed_size];
666 [accelEnc copyAndCompactAccelerationStructure:accel_uncompressed
667 toAccelerationStructure:accel];
668 [accelEnc endEncoding];
669 [accelCommands addCompletedHandler:^(id<MTLCommandBuffer> /*command_buffer*/) {
670 set_accel_struct(accel);
671 [accel_uncompressed release];
672
673 /* Signal that we've finished doing GPU acceleration struct build. */
674 g_bvh_build_throttler.release(wired_size);
675 }];
676 [accelCommands commit];
677 });
678 }
679 else {
680 /* set our acceleration structure to the uncompressed structure */
681 set_accel_struct(accel_uncompressed);
682
683 /* Signal that we've finished doing GPU acceleration struct build. */
684 g_bvh_build_throttler.release(wired_size);
685 }
686
687 [sizeBuf release];
688 }];
689
690 /* Wait until it's safe to proceed with GPU acceleration struct build. */
691 g_bvh_build_throttler.acquire(wired_size);
692 [accelCommands commit];
693
694 return true;
695 }
696# else /* MAC_OS_VERSION_14_0 */
697 (void)progress;
698 (void)mtl_device;
699 (void)queue;
700 (void)geom;
701 (void)(refit);
702# endif /* MAC_OS_VERSION_14_0 */
703 return false;
704}
705
706bool BVHMetal::build_BLAS_pointcloud(Progress &progress,
707 id<MTLDevice> mtl_device,
708 id<MTLCommandQueue> queue,
709 Geometry *const geom,
710 bool refit)
711{
712 if (@available(macos 12.0, *)) {
713 /* Build BLAS for point cloud */
714 PointCloud *pointcloud = static_cast<PointCloud *>(geom);
715 if (pointcloud->num_points() == 0) {
716 return false;
717 }
718
719 const size_t num_points = pointcloud->get_points().size();
720 const float3 *points = pointcloud->get_points().data();
721 const float *radius = pointcloud->get_radius().data();
722
723 const bool use_fast_trace_bvh = (params.bvh_type == BVH_TYPE_STATIC) || !support_refit_blas();
724
725 size_t num_motion_steps = 1;
726 Attribute *motion_keys = pointcloud->attributes.find(ATTR_STD_MOTION_VERTEX_POSITION);
727 if (motion_blur && pointcloud->get_use_motion_blur() && motion_keys) {
728 num_motion_steps = pointcloud->get_motion_steps();
729 }
730
731 const size_t num_aabbs = num_motion_steps * num_points;
732
733 /* Allocate a GPU buffer for the AABB data and populate it */
734 id<MTLBuffer> aabbBuf = [mtl_device
735 newBufferWithLength:num_aabbs * sizeof(MTLAxisAlignedBoundingBox)
736 options:MTLResourceStorageModeShared];
737 MTLAxisAlignedBoundingBox *aabb_data = (MTLAxisAlignedBoundingBox *)[aabbBuf contents];
738
739 /* Get AABBs for each motion step */
740 size_t center_step = (num_motion_steps - 1) / 2;
741 for (size_t step = 0; step < num_motion_steps; ++step) {
742 if (step == center_step) {
743 /* The center step for motion vertices is not stored in the attribute */
744 for (size_t j = 0; j < num_points; ++j) {
745 const PointCloud::Point point = pointcloud->get_point(j);
747 point.bounds_grow(points, radius, bounds);
748
749 const size_t index = step * num_points + j;
750 aabb_data[index].min = (MTLPackedFloat3 &)bounds.min;
751 aabb_data[index].max = (MTLPackedFloat3 &)bounds.max;
752 }
753 }
754 else {
755 size_t attr_offset = (step > center_step) ? step - 1 : step;
756 float4 *motion_points = motion_keys->data_float4() + attr_offset * num_points;
757
758 for (size_t j = 0; j < num_points; ++j) {
759 const PointCloud::Point point = pointcloud->get_point(j);
761 point.bounds_grow(motion_points[j], bounds);
762
763 const size_t index = step * num_points + j;
764 aabb_data[index].min = (MTLPackedFloat3 &)bounds.min;
765 aabb_data[index].max = (MTLPackedFloat3 &)bounds.max;
766 }
767 }
768 }
769
770 MTLAccelerationStructureGeometryDescriptor *geomDesc;
771 if (num_motion_steps > 1) {
772 std::vector<MTLMotionKeyframeData *> aabb_ptrs;
773 aabb_ptrs.reserve(num_motion_steps);
774 for (size_t step = 0; step < num_motion_steps; ++step) {
775 MTLMotionKeyframeData *k = [MTLMotionKeyframeData data];
776 k.buffer = aabbBuf;
777 k.offset = step * num_points * sizeof(MTLAxisAlignedBoundingBox);
778 aabb_ptrs.push_back(k);
779 }
780
781 MTLAccelerationStructureMotionBoundingBoxGeometryDescriptor *geomDescMotion =
782 [MTLAccelerationStructureMotionBoundingBoxGeometryDescriptor descriptor];
783 geomDescMotion.boundingBoxBuffers = [NSArray arrayWithObjects:aabb_ptrs.data()
784 count:aabb_ptrs.size()];
785 geomDescMotion.boundingBoxCount = num_points;
786 geomDescMotion.boundingBoxStride = sizeof(aabb_data[0]);
787 geomDescMotion.intersectionFunctionTableOffset = 2;
788
789 /* Force a single any-hit call, so shadow record-all behavior works correctly */
790 /* (Match optix behavior: unsigned int build_flags =
791 * OPTIX_GEOMETRY_FLAG_REQUIRE_SINGLE_ANYHIT_CALL;) */
792 geomDescMotion.allowDuplicateIntersectionFunctionInvocation = false;
793 geomDescMotion.opaque = true;
794 geomDesc = geomDescMotion;
795 }
796 else {
797 MTLAccelerationStructureBoundingBoxGeometryDescriptor *geomDescNoMotion =
798 [MTLAccelerationStructureBoundingBoxGeometryDescriptor descriptor];
799 geomDescNoMotion.boundingBoxBuffer = aabbBuf;
800 geomDescNoMotion.boundingBoxBufferOffset = 0;
801 geomDescNoMotion.boundingBoxCount = int(num_aabbs);
802 geomDescNoMotion.boundingBoxStride = sizeof(aabb_data[0]);
803 geomDescNoMotion.intersectionFunctionTableOffset = 2;
804
805 /* Force a single any-hit call, so shadow record-all behavior works correctly */
806 /* (Match optix behavior: unsigned int build_flags =
807 * OPTIX_GEOMETRY_FLAG_REQUIRE_SINGLE_ANYHIT_CALL;) */
808 geomDescNoMotion.allowDuplicateIntersectionFunctionInvocation = false;
809 geomDescNoMotion.opaque = true;
810 geomDesc = geomDescNoMotion;
811 }
812
813 MTLPrimitiveAccelerationStructureDescriptor *accelDesc =
814 [MTLPrimitiveAccelerationStructureDescriptor descriptor];
815 accelDesc.geometryDescriptors = @[ geomDesc ];
816
817 if (num_motion_steps > 1) {
818 accelDesc.motionStartTime = 0.0f;
819 accelDesc.motionEndTime = 1.0f;
820 // accelDesc.motionStartBorderMode = MTLMotionBorderModeVanish;
821 // accelDesc.motionEndBorderMode = MTLMotionBorderModeVanish;
822 accelDesc.motionKeyframeCount = num_motion_steps;
823
824 BVH_status("Building motion pointcloud BLAS | %7d points | %s | %7d motion keyframes",
825 (int)pointcloud->num_points(),
826 geom->name.c_str(),
827 (int)num_motion_steps);
828 }
829 else {
830 BVH_status("Building pointcloud BLAS | %7d points | %s",
831 (int)pointcloud->num_points(),
832 geom->name.c_str());
833 }
834 accelDesc.usage |= MTLAccelerationStructureUsageExtendedLimits;
835
836 if (!use_fast_trace_bvh) {
837 accelDesc.usage |= (MTLAccelerationStructureUsageRefit |
838 MTLAccelerationStructureUsagePreferFastBuild);
839 }
840
841 MTLAccelerationStructureSizes accelSizes = [mtl_device
842 accelerationStructureSizesWithDescriptor:accelDesc];
843 id<MTLAccelerationStructure> accel_uncompressed = [mtl_device
844 newAccelerationStructureWithSize:accelSizes.accelerationStructureSize];
845 id<MTLBuffer> scratchBuf = [mtl_device newBufferWithLength:accelSizes.buildScratchBufferSize
846 options:MTLResourceStorageModePrivate];
847 id<MTLBuffer> sizeBuf = [mtl_device newBufferWithLength:8
848 options:MTLResourceStorageModeShared];
849 id<MTLCommandBuffer> accelCommands = [queue commandBuffer];
850 id<MTLAccelerationStructureCommandEncoder> accelEnc =
851 [accelCommands accelerationStructureCommandEncoder];
852 if (refit) {
853 [accelEnc refitAccelerationStructure:accel_struct
854 descriptor:accelDesc
855 destination:accel_uncompressed
856 scratchBuffer:scratchBuf
857 scratchBufferOffset:0];
858 }
859 else {
860 [accelEnc buildAccelerationStructure:accel_uncompressed
861 descriptor:accelDesc
862 scratchBuffer:scratchBuf
863 scratchBufferOffset:0];
864 }
865 if (use_fast_trace_bvh) {
866 [accelEnc writeCompactedAccelerationStructureSize:accel_uncompressed
867 toBuffer:sizeBuf
868 offset:0
869 sizeDataType:MTLDataTypeULong];
870 }
871 [accelEnc endEncoding];
872
873 /* Estimated size of resources that will be wired for the GPU accelerated build.
874 * Acceleration-struct size is doubled to account for possible compaction step. */
875 size_t wired_size = aabbBuf.allocatedSize + scratchBuf.allocatedSize +
876 accel_uncompressed.allocatedSize * 2;
877
878 [accelCommands addCompletedHandler:^(id<MTLCommandBuffer> /*command_buffer*/) {
879 /* free temp resources */
880 [scratchBuf release];
881 [aabbBuf release];
882
883 if (use_fast_trace_bvh) {
884 /* Compact the accel structure */
885 uint64_t compressed_size = *(uint64_t *)sizeBuf.contents;
886
887 dispatch_async(dispatch_get_global_queue(DISPATCH_QUEUE_PRIORITY_DEFAULT, 0), ^{
888 id<MTLCommandBuffer> accelCommands = [queue commandBuffer];
889 id<MTLAccelerationStructureCommandEncoder> accelEnc =
890 [accelCommands accelerationStructureCommandEncoder];
891 id<MTLAccelerationStructure> accel = [mtl_device
892 newAccelerationStructureWithSize:compressed_size];
893 [accelEnc copyAndCompactAccelerationStructure:accel_uncompressed
894 toAccelerationStructure:accel];
895 [accelEnc endEncoding];
896 [accelCommands addCompletedHandler:^(id<MTLCommandBuffer> /*command_buffer*/) {
897 set_accel_struct(accel);
898 [accel_uncompressed release];
899
900 /* Signal that we've finished doing GPU acceleration struct build. */
901 g_bvh_build_throttler.release(wired_size);
902 }];
903 [accelCommands commit];
904 });
905 }
906 else {
907 /* set our acceleration structure to the uncompressed structure */
908 set_accel_struct(accel_uncompressed);
909
910 /* Signal that we've finished doing GPU acceleration struct build. */
911 g_bvh_build_throttler.release(wired_size);
912 }
913
914 [sizeBuf release];
915 }];
916
917 /* Wait until it's safe to proceed with GPU acceleration struct build. */
918 g_bvh_build_throttler.acquire(wired_size);
919 [accelCommands commit];
920 return true;
921 }
922 return false;
923}
924
925bool BVHMetal::build_BLAS(Progress &progress,
926 id<MTLDevice> mtl_device,
927 id<MTLCommandQueue> queue,
928 bool refit)
929{
930 assert(objects.size() == 1 && geometry.size() == 1);
931
932 /* Build bottom level acceleration structures (BLAS) */
933 Geometry *const geom = geometry[0];
934 switch (geom->geometry_type) {
935 case Geometry::VOLUME:
936 case Geometry::MESH:
937 return build_BLAS_mesh(progress, mtl_device, queue, geom, refit);
938 case Geometry::HAIR:
939 return build_BLAS_hair(progress, mtl_device, queue, geom, refit);
941 return build_BLAS_pointcloud(progress, mtl_device, queue, geom, refit);
942 default:
943 return false;
944 }
945 return false;
946}
947
948# if defined(MAC_OS_VERSION_15_0)
949
950/* Return MTLComponentTransform from a DecomposedTransform. */
951static MTLComponentTransform decomposed_to_component_transform(const DecomposedTransform &src)
952{
953 MTLComponentTransform tfm;
954 tfm.scale = MTLPackedFloat3Make(src.y.w, src.z.w, src.w.w);
955 tfm.shear = MTLPackedFloat3Make(src.z.x, src.z.y, src.w.x);
956 tfm.pivot = MTLPackedFloat3Make(0.0f, 0.0f, 0.0f);
957 tfm.rotation = MTLPackedFloatQuaternionMake(src.x.x, src.x.y, src.x.z, src.x.w);
958 tfm.translation = MTLPackedFloat3Make(src.y.x, src.y.y, src.y.z);
959 return tfm;
960}
961
962/* Return unit MTLComponentTransform. */
963static MTLComponentTransform component_transform_make_unit()
964{
965 MTLComponentTransform tfm;
966 tfm.scale = MTLPackedFloat3Make(1.0f, 1.0f, 1.0f);
967 tfm.shear = MTLPackedFloat3Make(0.0f, 0.0f, 0.0f);
968 tfm.pivot = MTLPackedFloat3Make(0.0f, 0.0f, 0.0f);
969 tfm.rotation = MTLPackedFloatQuaternionMake(0.0f, 0.0f, 0.0f, 1.0f);
970 tfm.translation = MTLPackedFloat3Make(0.0f, 0.0f, 0.0f);
971 return tfm;
972}
973
974# endif
975
976bool BVHMetal::build_TLAS(Progress &progress,
977 id<MTLDevice> mtl_device,
978 id<MTLCommandQueue> queue,
979 bool refit)
980{
981 /* Wait for all BLAS builds to finish. */
982 g_bvh_build_throttler.wait_for_all();
983
984 if (@available(macos 12.0, *)) {
985 /* Defined inside available check, for return type to be available. */
986 auto make_null_BLAS = [](id<MTLDevice> mtl_device,
987 id<MTLCommandQueue> queue) -> id<MTLAccelerationStructure> {
988 id<MTLBuffer> nullBuf = [mtl_device newBufferWithLength:sizeof(float3)
989 options:MTLResourceStorageModeShared];
990
991 /* Create an acceleration structure. */
992 MTLAccelerationStructureTriangleGeometryDescriptor *geomDesc =
993 [MTLAccelerationStructureTriangleGeometryDescriptor descriptor];
994 geomDesc.vertexBuffer = nullBuf;
995 geomDesc.vertexBufferOffset = 0;
996 geomDesc.vertexStride = sizeof(float3);
997 geomDesc.indexBuffer = nullBuf;
998 geomDesc.indexBufferOffset = 0;
999 geomDesc.indexType = MTLIndexTypeUInt32;
1000 geomDesc.triangleCount = 0;
1001 geomDesc.intersectionFunctionTableOffset = 0;
1002 geomDesc.opaque = true;
1003 geomDesc.allowDuplicateIntersectionFunctionInvocation = false;
1004
1005 MTLPrimitiveAccelerationStructureDescriptor *accelDesc =
1006 [MTLPrimitiveAccelerationStructureDescriptor descriptor];
1007 accelDesc.geometryDescriptors = @[ geomDesc ];
1008 accelDesc.usage |= MTLAccelerationStructureUsageExtendedLimits;
1009
1010 MTLAccelerationStructureSizes accelSizes = [mtl_device
1011 accelerationStructureSizesWithDescriptor:accelDesc];
1012 id<MTLAccelerationStructure> accel_struct = [mtl_device
1013 newAccelerationStructureWithSize:accelSizes.accelerationStructureSize];
1014 id<MTLBuffer> scratchBuf = [mtl_device newBufferWithLength:accelSizes.buildScratchBufferSize
1015 options:MTLResourceStorageModePrivate];
1016 id<MTLBuffer> sizeBuf = [mtl_device newBufferWithLength:8
1017 options:MTLResourceStorageModeShared];
1018 id<MTLCommandBuffer> accelCommands = [queue commandBuffer];
1019 id<MTLAccelerationStructureCommandEncoder> accelEnc =
1020 [accelCommands accelerationStructureCommandEncoder];
1021 [accelEnc buildAccelerationStructure:accel_struct
1022 descriptor:accelDesc
1023 scratchBuffer:scratchBuf
1024 scratchBufferOffset:0];
1025 [accelEnc endEncoding];
1026 [accelCommands commit];
1027 [accelCommands waitUntilCompleted];
1028
1029 /* free temp resources */
1030 [scratchBuf release];
1031 [nullBuf release];
1032 [sizeBuf release];
1033
1034 return accel_struct;
1035 };
1036
1037 uint32_t num_instances = 0;
1038 uint32_t num_motion_transforms = 0;
1039 uint32_t num_motion_instances = 0;
1040 for (Object *ob : objects) {
1041 num_instances++;
1042
1043 if (ob->use_motion()) {
1044 num_motion_transforms += max((size_t)1, ob->get_motion().size());
1045 num_motion_instances++;
1046 }
1047 else {
1048 num_motion_transforms++;
1049 }
1050 }
1051
1052 const bool use_instance_motion = motion_blur && num_motion_instances;
1053 const bool use_fast_trace_bvh = (params.bvh_type == BVH_TYPE_STATIC) || !support_refit_blas();
1054
1055 NSMutableArray *all_blas = [NSMutableArray array];
1056 unordered_map<const BVHMetal *, int> instance_mapping;
1057
1058 /* Lambda function to build/retrieve the BLAS index mapping */
1059 auto get_blas_index = [&](const BVHMetal *blas) {
1060 auto it = instance_mapping.find(blas);
1061 if (it != instance_mapping.end()) {
1062 return it->second;
1063 }
1064 int blas_index = (int)[all_blas count];
1065 instance_mapping[blas] = blas_index;
1066 if (@available(macos 12.0, *)) {
1067 [all_blas addObject:(blas ? blas->accel_struct : null_BLAS)];
1068 }
1069 return blas_index;
1070 };
1071
1072 size_t instance_size;
1073 if (use_instance_motion) {
1074 instance_size = sizeof(MTLAccelerationStructureMotionInstanceDescriptor);
1075 }
1076 else {
1077 instance_size = sizeof(MTLAccelerationStructureUserIDInstanceDescriptor);
1078 }
1079
1080 /* Allocate a GPU buffer for the instance data and populate it */
1081 id<MTLBuffer> instanceBuf = [mtl_device newBufferWithLength:num_instances * instance_size
1082 options:MTLResourceStorageModeShared];
1083 id<MTLBuffer> motion_transforms_buf = nil;
1084 MTLPackedFloat4x3 *matrix_motion_transforms = nullptr;
1085# if defined(MAC_OS_VERSION_15_0)
1086 MTLComponentTransform *decomposed_motion_transforms = nullptr;
1087# endif
1088 if (use_instance_motion && num_motion_transforms) {
1089# if defined(MAC_OS_VERSION_15_0)
1090 if (use_pcmi) {
1091 if (@available(macos 15.0, *)) {
1092 motion_transforms_buf = [mtl_device
1093 newBufferWithLength:num_motion_transforms * sizeof(MTLComponentTransform)
1094 options:MTLResourceStorageModeShared];
1095 decomposed_motion_transforms = (MTLComponentTransform *)motion_transforms_buf.contents;
1096 }
1097 }
1098 else
1099# endif
1100 {
1101 motion_transforms_buf = [mtl_device
1102 newBufferWithLength:num_motion_transforms * sizeof(MTLPackedFloat4x3)
1103 options:MTLResourceStorageModeShared];
1104 matrix_motion_transforms = (MTLPackedFloat4x3 *)motion_transforms_buf.contents;
1105 }
1106 }
1107
1108 uint32_t instance_index = 0;
1109 uint32_t motion_transform_index = 0;
1110
1111 blas_array.clear();
1112 blas_array.reserve(num_instances);
1113
1114 for (Object *ob : objects) {
1115 /* Skip non-traceable objects */
1116 const Geometry *geom = ob->get_geometry();
1117 const BVHMetal *blas = static_cast<const BVHMetal *>(geom->bvh.get());
1118 if (!blas || !blas->accel_struct || !ob->is_traceable()) {
1119 /* Place a degenerate instance, to ensure [[instance_id]] equals ob->get_device_index()
1120 * in our intersection functions */
1121 blas = nullptr;
1122
1123 /* Workaround for issue in macOS <= 14.1: Insert degenerate BLAS instead of zero-filling
1124 * the descriptor. */
1125 if (!null_BLAS) {
1126 null_BLAS = make_null_BLAS(mtl_device, queue);
1127 }
1128 blas_array.push_back(null_BLAS);
1129 }
1130 else {
1131 blas_array.push_back(blas->accel_struct);
1132 }
1133
1134 uint32_t accel_struct_index = get_blas_index(blas);
1135
1136 /* Add some of the object visibility bits to the mask.
1137 * __prim_visibility contains the combined visibility bits of all instances, so is not
1138 * reliable if they differ between instances.
1139 */
1140 uint32_t mask = ob->visibility_for_tracing();
1141
1142 /* Have to have at least one bit in the mask, or else instance would always be culled. */
1143 if (0 == mask) {
1144 mask = 0xFF;
1145 }
1146
1147 /* Set user instance ID to object index */
1148 uint32_t primitive_offset = 0;
1149 int currIndex = instance_index++;
1150
1151 if (geom->is_hair()) {
1152 /* Build BLAS for curve primitives. */
1153 Hair *const hair = static_cast<Hair *const>(const_cast<Geometry *>(geom));
1154 primitive_offset = uint32_t(hair->curve_segment_offset);
1155 }
1156 else if (geom->is_mesh() || geom->is_volume()) {
1157 /* Build BLAS for triangle primitives. */
1158 Mesh *const mesh = static_cast<Mesh *const>(const_cast<Geometry *>(geom));
1159 primitive_offset = uint32_t(mesh->prim_offset);
1160 }
1161 else if (geom->is_pointcloud()) {
1162 /* Build BLAS for points primitives. */
1163 PointCloud *const pointcloud = static_cast<PointCloud *const>(
1164 const_cast<Geometry *>(geom));
1165 primitive_offset = uint32_t(pointcloud->prim_offset);
1166 }
1167
1168 /* Bake into the appropriate descriptor */
1169 if (use_instance_motion) {
1170 MTLAccelerationStructureMotionInstanceDescriptor *instances =
1171 (MTLAccelerationStructureMotionInstanceDescriptor *)[instanceBuf contents];
1172 MTLAccelerationStructureMotionInstanceDescriptor &desc = instances[currIndex];
1173
1174 desc.accelerationStructureIndex = accel_struct_index;
1175 desc.userID = primitive_offset;
1176 desc.mask = mask;
1177 desc.motionStartTime = 0.0f;
1178 desc.motionEndTime = 1.0f;
1179 desc.motionTransformsStartIndex = motion_transform_index;
1180 desc.motionStartBorderMode = MTLMotionBorderModeVanish;
1181 desc.motionEndBorderMode = MTLMotionBorderModeVanish;
1182 desc.intersectionFunctionTableOffset = 0;
1183
1184 array<DecomposedTransform> decomp(ob->get_motion().size());
1186 decomp.data(), ob->get_motion().data(), ob->get_motion().size());
1187
1188 int key_count = ob->get_motion().size();
1189 if (key_count) {
1190 desc.motionTransformsCount = key_count;
1191
1192# if defined(MAC_OS_VERSION_15_0)
1193 if (use_pcmi) {
1194 for (int i = 0; i < key_count; i++) {
1195 decomposed_motion_transforms[motion_transform_index++] =
1196 decomposed_to_component_transform(decomp[i]);
1197 }
1198 }
1199 else
1200# endif
1201 {
1202 Transform *keys = ob->get_motion().data();
1203 for (int i = 0; i < key_count; i++) {
1204 float *t = (float *)&matrix_motion_transforms[motion_transform_index++];
1205 /* Transpose transform */
1206 const auto *src = (const float *)&keys[i];
1207 for (int i = 0; i < 12; i++) {
1208 t[i] = src[(i / 3) + 4 * (i % 3)];
1209 }
1210 }
1211 }
1212 }
1213 else {
1214 desc.motionTransformsCount = 1;
1215
1216# if defined(MAC_OS_VERSION_15_0)
1217 if (use_pcmi) {
1218 if (ob->get_geometry()->is_instanced()) {
1219 DecomposedTransform decomp;
1220 transform_motion_decompose(&decomp, &ob->get_tfm(), 1);
1221 decomposed_motion_transforms[motion_transform_index++] =
1222 decomposed_to_component_transform(decomp);
1223 }
1224 else {
1225 decomposed_motion_transforms[motion_transform_index++] =
1226 component_transform_make_unit();
1227 }
1228 }
1229 else
1230# endif
1231 {
1232 float *t = (float *)&matrix_motion_transforms[motion_transform_index++];
1233 if (ob->get_geometry()->is_instanced()) {
1234 /* Transpose transform */
1235 const auto *src = (const float *)&ob->get_tfm();
1236 for (int i = 0; i < 12; i++) {
1237 t[i] = src[(i / 3) + 4 * (i % 3)];
1238 }
1239 }
1240 else {
1241 /* Clear transform to identity matrix */
1242 t[0] = t[4] = t[8] = 1.0f;
1243 }
1244 }
1245 }
1246 }
1247 else {
1248 MTLAccelerationStructureUserIDInstanceDescriptor *instances =
1249 (MTLAccelerationStructureUserIDInstanceDescriptor *)[instanceBuf contents];
1250 MTLAccelerationStructureUserIDInstanceDescriptor &desc = instances[currIndex];
1251
1252 desc.accelerationStructureIndex = accel_struct_index;
1253 desc.userID = primitive_offset;
1254 desc.mask = mask;
1255 desc.intersectionFunctionTableOffset = 0;
1256 desc.options = MTLAccelerationStructureInstanceOptionOpaque;
1257
1258 float *t = (float *)&desc.transformationMatrix;
1259 if (ob->get_geometry()->is_instanced()) {
1260 /* Transpose transform */
1261 const auto *src = (const float *)&ob->get_tfm();
1262 for (int i = 0; i < 12; i++) {
1263 t[i] = src[(i / 3) + 4 * (i % 3)];
1264 }
1265 }
1266 else {
1267 /* Clear transform to identity matrix */
1268 t[0] = t[4] = t[8] = 1.0f;
1269 }
1270 }
1271 }
1272
1273 if (use_instance_motion) {
1274 BVH_status(
1275 "Building motion TLAS | %7d instances | %7d motion instances | %7d motion "
1276 "transforms",
1277 (int)num_instances,
1278 (int)num_motion_instances,
1279 (int)num_motion_transforms);
1280 }
1281 else {
1282 BVH_status("Building TLAS | %7d instances", (int)num_instances);
1283 }
1284
1285 MTLInstanceAccelerationStructureDescriptor *accelDesc =
1286 [MTLInstanceAccelerationStructureDescriptor descriptor];
1287 accelDesc.instanceCount = num_instances;
1288 accelDesc.instanceDescriptorType = MTLAccelerationStructureInstanceDescriptorTypeUserID;
1289 accelDesc.instanceDescriptorBuffer = instanceBuf;
1290 accelDesc.instanceDescriptorBufferOffset = 0;
1291 accelDesc.instanceDescriptorStride = instance_size;
1292 accelDesc.instancedAccelerationStructures = all_blas;
1293
1294 if (use_instance_motion) {
1295 accelDesc.instanceDescriptorType = MTLAccelerationStructureInstanceDescriptorTypeMotion;
1296 accelDesc.motionTransformBuffer = motion_transforms_buf;
1297 accelDesc.motionTransformCount = num_motion_transforms;
1298# if defined(MAC_OS_VERSION_15_0)
1299 if (@available(macos 15.0, *)) {
1300 accelDesc.motionTransformStride = 0;
1301 accelDesc.motionTransformType = use_pcmi ? MTLTransformTypeComponent :
1302 MTLTransformTypePackedFloat4x3;
1303 }
1304# endif
1305 }
1306
1307 accelDesc.usage |= MTLAccelerationStructureUsageExtendedLimits;
1308 if (!use_fast_trace_bvh) {
1309 accelDesc.usage |= (MTLAccelerationStructureUsageRefit |
1310 MTLAccelerationStructureUsagePreferFastBuild);
1311 }
1312
1313 MTLAccelerationStructureSizes accelSizes = [mtl_device
1314 accelerationStructureSizesWithDescriptor:accelDesc];
1315 id<MTLAccelerationStructure> accel = [mtl_device
1316 newAccelerationStructureWithSize:accelSizes.accelerationStructureSize];
1317 id<MTLBuffer> scratchBuf = [mtl_device newBufferWithLength:accelSizes.buildScratchBufferSize
1318 options:MTLResourceStorageModePrivate];
1319 id<MTLCommandBuffer> accelCommands = [queue commandBuffer];
1320 id<MTLAccelerationStructureCommandEncoder> accelEnc =
1321 [accelCommands accelerationStructureCommandEncoder];
1322 if (refit) {
1323 [accelEnc refitAccelerationStructure:accel_struct
1324 descriptor:accelDesc
1325 destination:accel
1326 scratchBuffer:scratchBuf
1327 scratchBufferOffset:0];
1328 }
1329 else {
1330 [accelEnc buildAccelerationStructure:accel
1331 descriptor:accelDesc
1332 scratchBuffer:scratchBuf
1333 scratchBufferOffset:0];
1334 }
1335 [accelEnc endEncoding];
1336 [accelCommands commit];
1337 [accelCommands waitUntilCompleted];
1338
1339 if (motion_transforms_buf) {
1340 [motion_transforms_buf release];
1341 }
1342 [instanceBuf release];
1343 [scratchBuf release];
1344
1345 /* Cache top and bottom-level acceleration structs */
1346 set_accel_struct(accel);
1347
1348 unique_blas_array.clear();
1349 unique_blas_array.reserve(all_blas.count);
1350 [all_blas enumerateObjectsUsingBlock:^(id<MTLAccelerationStructure> blas, NSUInteger, BOOL *) {
1351 unique_blas_array.push_back(blas);
1352 }];
1353
1354 return true;
1355 }
1356 return false;
1357}
1358
1359bool BVHMetal::build(Progress &progress,
1360 id<MTLDevice> mtl_device,
1361 id<MTLCommandQueue> queue,
1362 bool refit)
1363{
1364 if (@available(macos 12.0, *)) {
1365 if (refit) {
1366 /* It isn't valid to refit a non-existent BVH, or one which wasn't constructed as dynamic.
1367 * In such cases, assert in development but try to recover in the wild. */
1368 if (params.bvh_type != BVH_TYPE_DYNAMIC) {
1369 assert(!"Can't refit static Metal BVH");
1370 refit = false;
1371 }
1372 else if (!accel_struct) {
1373 assert(!"Can't refit non-existing Metal BVH");
1374 refit = false;
1375 }
1376 }
1377
1378 if (!refit) {
1379 set_accel_struct(nil);
1380 }
1381 }
1382
1383 if (!support_refit_blas()) {
1384 refit = false;
1385 }
1386
1387 @autoreleasepool {
1388 if (!params.top_level) {
1389 return build_BLAS(progress, mtl_device, queue, refit);
1390 }
1391 return build_TLAS(progress, mtl_device, queue, refit);
1392 }
1393}
1394
1396
1397#endif /* WITH_METAL */
float progress
Definition WM_types.hh:1019
volatile int lock
for(;discarded_id_iter !=nullptr;discarded_id_iter=static_cast< ID * >(discarded_id_iter->next))
Definition blendfile.cc:634
BMesh const char void * data
unsigned long long int uint64_t
static btDbvtVolume bounds(btDbvtNode **leaves, int count)
Definition btDbvt.cpp:299
void refit(btStridingMeshInterface *triangles, const btVector3 &aabbMin, const btVector3 &aabbMax)
Attribute * find(ustring name) const
Definition bvh/bvh.h:67
Type geometry_type
bool is_volume() const
bool is_pointcloud() const
bool is_hair() const
size_t prim_offset
unique_ptr< BVH > bvh
AttributeSet attributes
bool is_mesh() const
Definition hair.h:13
Curve get_curve(const size_t i) const
Definition hair.h:111
size_t curve_segment_offset
Definition hair.h:90
size_t num_curves() const
Definition hair.h:126
CurveShapeType curve_shape
Definition hair.h:91
size_t num_keys() const
Definition hair.h:121
size_t size() const
CCL_NAMESPACE_BEGIN struct Options options
#define CCL_NAMESPACE_END
static float verts[][3]
ThreadMutex mutex
VecBase< float, 4 > float4
#define assert(assertion)
VecBase< float, D > step(VecOp< float, D >, VecOp< float, D >) RET
float length(VecOp< float, D >) RET
uiWidgetBaseParameters params[MAX_WIDGET_BASE_BATCH]
int count
@ ATTR_STD_MOTION_VERTEX_POSITION
@ CURVE_RIBBON
descriptor
ccl_device_inline float2 mask(const MaskType mask, const float2 a)
@ BVH_TYPE_DYNAMIC
Definition params.h:33
@ BVH_TYPE_STATIC
Definition params.h:40
float3 * data_float3()
float4 * data_float4()
int first_key
Definition hair.h:19
int num_segments() const
Definition hair.h:22
int num_keys
Definition hair.h:20
size_t num_triangles() const
Definition scene/mesh.h:77
ustring name
Definition graph/node.h:177
void bounds_grow(const float3 *points, const float *radius, BoundBox &bounds) const
Point get_point(const int i) const
size_t num_points() const
i
Definition text_draw.cc:230
max
Definition text_draw.cc:251
std::mutex thread_mutex
Definition thread.h:27
std::unique_lock< std::mutex > thread_scoped_lock
Definition thread.h:28
void transform_motion_decompose(DecomposedTransform *decomp, const Transform *motion, const size_t size)