Blender V4.3
bvh.mm
Go to the documentation of this file.
1/* SPDX-FileCopyrightText: 2021-2022 Blender Foundation
2 *
3 * SPDX-License-Identifier: Apache-2.0 */
4
5#ifdef WITH_METAL
6
7# include "scene/hair.h"
8# include "scene/mesh.h"
9# include "scene/object.h"
10# include "scene/pointcloud.h"
11
12# include "util/progress.h"
13
14# include "device/metal/bvh.h"
15# include "device/metal/util.h"
16
18
19# define BVH_status(...) \
20 { \
21 string str = string_printf(__VA_ARGS__); \
22 progress.set_substatus(str); \
23 metal_printf("%s\n", str.c_str()); \
24 }
25
26// # define BVH_THROTTLE_DIAGNOSTICS
27# ifdef BVH_THROTTLE_DIAGNOSTICS
28# define bvh_throttle_printf(...) printf("BVHMetalBuildThrottler::" __VA_ARGS__)
29# else
30# define bvh_throttle_printf(...)
31# endif
32
33/* Limit the number of concurrent BVH builds so that we don't approach unsafe GPU working set
34 * sizes. */
35struct BVHMetalBuildThrottler {
37 size_t wired_memory = 0;
38 size_t safe_wired_limit = 0;
39 int requests_in_flight = 0;
40
41 BVHMetalBuildThrottler()
42 {
43 /* The default device will always be the one that supports MetalRT if the machine supports it.
44 */
45 id<MTLDevice> mtlDevice = MTLCreateSystemDefaultDevice();
46
47 /* Set a conservative limit, but which will still only throttle in extreme cases. */
48 safe_wired_limit = [mtlDevice recommendedMaxWorkingSetSize] / 4;
49 bvh_throttle_printf("safe_wired_limit = %zu\n", safe_wired_limit);
50 }
51
52 /* Block until we're safely able to wire the requested resources. */
53 void acquire(size_t bytes_to_be_wired)
54 {
55 bool throttled = false;
56 while (true) {
57 {
59
60 /* Always allow a BVH build to proceed if no other is in flight, otherwise
61 * only proceed if we're within safe limits. */
62 if (wired_memory == 0 || wired_memory + bytes_to_be_wired <= safe_wired_limit) {
63 wired_memory += bytes_to_be_wired;
64 requests_in_flight += 1;
65 bvh_throttle_printf("acquire -- success (requests_in_flight = %d, wired_memory = %zu)\n",
66 requests_in_flight,
67 wired_memory);
68 return;
69 }
70
71 if (!throttled) {
72 bvh_throttle_printf(
73 "acquire -- throttling (requests_in_flight = %d, wired_memory = %zu, "
74 "bytes_to_be_wired = %zu)\n",
75 requests_in_flight,
76 wired_memory,
77 bytes_to_be_wired);
78 }
79 throttled = true;
80 }
81
82 std::this_thread::sleep_for(std::chrono::milliseconds(10));
83 }
84 }
85
86 /* Notify of resources that have stopped being wired. */
87 void release(size_t bytes_just_unwired)
88 {
90 wired_memory -= bytes_just_unwired;
91 requests_in_flight -= 1;
92 bvh_throttle_printf("release (requests_in_flight = %d, wired_memory = %zu)\n",
93 requests_in_flight,
94 wired_memory);
95 }
96
97 /* Wait for all outstanding work to finish. */
98 void wait_for_all()
99 {
100 while (true) {
101 {
103 if (wired_memory == 0) {
104 return;
105 }
106 }
107 std::this_thread::sleep_for(std::chrono::milliseconds(10));
108 }
109 }
110} g_bvh_build_throttler;
111
112BVHMetal::BVHMetal(const BVHParams &params_,
113 const vector<Geometry *> &geometry_,
114 const vector<Object *> &objects_,
115 Device *device)
116 : BVH(params_, geometry_, objects_), device(device)
117{
118}
119
120BVHMetal::~BVHMetal()
121{
122 if (@available(macos 12.0, *)) {
123 set_accel_struct(nil);
124 if (null_BLAS) {
125 [null_BLAS release];
126 }
127 }
128}
129
130API_AVAILABLE(macos(11.0))
131void BVHMetal::set_accel_struct(id<MTLAccelerationStructure> new_accel_struct)
132{
133 if (@available(macos 12.0, *)) {
134 if (accel_struct) {
135 device->stats.mem_free(accel_struct.allocatedSize);
136 [accel_struct release];
137 accel_struct = nil;
138 }
139
140 if (new_accel_struct) {
141 accel_struct = new_accel_struct;
142 device->stats.mem_alloc(accel_struct.allocatedSize);
143 }
144 }
145}
146
147bool BVHMetal::build_BLAS_mesh(Progress &progress,
148 id<MTLDevice> mtl_device,
149 id<MTLCommandQueue> queue,
150 Geometry *const geom,
151 bool refit)
152{
153 if (@available(macos 12.0, *)) {
154 /* Build BLAS for triangle primitives */
155 Mesh *const mesh = static_cast<Mesh *const>(geom);
156 if (mesh->num_triangles() == 0) {
157 return false;
158 }
159
160 /*------------------------------------------------*/
161 BVH_status(
162 "Building mesh BLAS | %7d tris | %s", (int)mesh->num_triangles(), geom->name.c_str());
163 /*------------------------------------------------*/
164
165 const bool use_fast_trace_bvh = (params.bvh_type == BVH_TYPE_STATIC);
166
167 const array<float3> &verts = mesh->get_verts();
168 const array<int> &tris = mesh->get_triangles();
169 const size_t num_verts = verts.size();
170 const size_t num_indices = tris.size();
171
172 size_t num_motion_steps = 1;
173 Attribute *motion_keys = mesh->attributes.find(ATTR_STD_MOTION_VERTEX_POSITION);
174 if (motion_blur && mesh->get_use_motion_blur() && motion_keys) {
175 num_motion_steps = mesh->get_motion_steps();
176 }
177
178 MTLResourceOptions storage_mode;
179 if (mtl_device.hasUnifiedMemory) {
180 storage_mode = MTLResourceStorageModeShared;
181 }
182 else {
183 storage_mode = MTLResourceStorageModeManaged;
184 }
185
186 /* Upload the mesh data to the GPU */
187 id<MTLBuffer> posBuf = nil;
188 id<MTLBuffer> indexBuf = [mtl_device newBufferWithBytes:tris.data()
189 length:num_indices * sizeof(tris.data()[0])
190 options:storage_mode];
191
192 if (num_motion_steps == 1) {
193 posBuf = [mtl_device newBufferWithBytes:verts.data()
194 length:num_verts * sizeof(verts.data()[0])
195 options:storage_mode];
196 }
197 else {
198 posBuf = [mtl_device
199 newBufferWithLength:num_verts * num_motion_steps * sizeof(verts.data()[0])
200 options:storage_mode];
201 float3 *dest_data = (float3 *)[posBuf contents];
202 size_t center_step = (num_motion_steps - 1) / 2;
203 for (size_t step = 0; step < num_motion_steps; ++step) {
204 const float3 *verts = mesh->get_verts().data();
205
206 /* The center step for motion vertices is not stored in the attribute. */
207 if (step != center_step) {
208 verts = motion_keys->data_float3() + (step > center_step ? step - 1 : step) * num_verts;
209 }
210 memcpy(dest_data + num_verts * step, verts, num_verts * sizeof(float3));
211 }
212 if (storage_mode == MTLResourceStorageModeManaged) {
213 [posBuf didModifyRange:NSMakeRange(0, posBuf.length)];
214 }
215 }
216
217 /* Create an acceleration structure. */
218 MTLAccelerationStructureGeometryDescriptor *geomDesc;
219 if (num_motion_steps > 1) {
220 std::vector<MTLMotionKeyframeData *> vertex_ptrs;
221 vertex_ptrs.reserve(num_motion_steps);
222 for (size_t step = 0; step < num_motion_steps; ++step) {
223 MTLMotionKeyframeData *k = [MTLMotionKeyframeData data];
224 k.buffer = posBuf;
225 k.offset = num_verts * step * sizeof(float3);
226 vertex_ptrs.push_back(k);
227 }
228
229 MTLAccelerationStructureMotionTriangleGeometryDescriptor *geomDescMotion =
230 [MTLAccelerationStructureMotionTriangleGeometryDescriptor descriptor];
231 geomDescMotion.vertexBuffers = [NSArray arrayWithObjects:vertex_ptrs.data()
232 count:vertex_ptrs.size()];
233 geomDescMotion.vertexStride = sizeof(verts.data()[0]);
234 geomDescMotion.indexBuffer = indexBuf;
235 geomDescMotion.indexBufferOffset = 0;
236 geomDescMotion.indexType = MTLIndexTypeUInt32;
237 geomDescMotion.triangleCount = num_indices / 3;
238 geomDescMotion.intersectionFunctionTableOffset = 0;
239 geomDescMotion.opaque = true;
240
241 geomDesc = geomDescMotion;
242 }
243 else {
244 MTLAccelerationStructureTriangleGeometryDescriptor *geomDescNoMotion =
245 [MTLAccelerationStructureTriangleGeometryDescriptor descriptor];
246 geomDescNoMotion.vertexBuffer = posBuf;
247 geomDescNoMotion.vertexBufferOffset = 0;
248 geomDescNoMotion.vertexStride = sizeof(verts.data()[0]);
249 geomDescNoMotion.indexBuffer = indexBuf;
250 geomDescNoMotion.indexBufferOffset = 0;
251 geomDescNoMotion.indexType = MTLIndexTypeUInt32;
252 geomDescNoMotion.triangleCount = num_indices / 3;
253 geomDescNoMotion.intersectionFunctionTableOffset = 0;
254 geomDescNoMotion.opaque = true;
255
256 geomDesc = geomDescNoMotion;
257 }
258
259 /* Force a single any-hit call, so shadow record-all behavior works correctly */
260 /* (Match optix behavior: unsigned int build_flags =
261 * OPTIX_GEOMETRY_FLAG_REQUIRE_SINGLE_ANYHIT_CALL;) */
262 geomDesc.allowDuplicateIntersectionFunctionInvocation = false;
263
264 MTLPrimitiveAccelerationStructureDescriptor *accelDesc =
265 [MTLPrimitiveAccelerationStructureDescriptor descriptor];
266 accelDesc.geometryDescriptors = @[ geomDesc ];
267 if (num_motion_steps > 1) {
268 accelDesc.motionStartTime = 0.0f;
269 accelDesc.motionEndTime = 1.0f;
270 accelDesc.motionStartBorderMode = MTLMotionBorderModeClamp;
271 accelDesc.motionEndBorderMode = MTLMotionBorderModeClamp;
272 accelDesc.motionKeyframeCount = num_motion_steps;
273 }
274 accelDesc.usage |= MTLAccelerationStructureUsageExtendedLimits;
275
276 if (!use_fast_trace_bvh) {
277 accelDesc.usage |= (MTLAccelerationStructureUsageRefit |
278 MTLAccelerationStructureUsagePreferFastBuild);
279 }
280
281 MTLAccelerationStructureSizes accelSizes = [mtl_device
282 accelerationStructureSizesWithDescriptor:accelDesc];
283 id<MTLAccelerationStructure> accel_uncompressed = [mtl_device
284 newAccelerationStructureWithSize:accelSizes.accelerationStructureSize];
285 id<MTLBuffer> scratchBuf = [mtl_device newBufferWithLength:accelSizes.buildScratchBufferSize
286 options:MTLResourceStorageModePrivate];
287 id<MTLBuffer> sizeBuf = [mtl_device newBufferWithLength:8
288 options:MTLResourceStorageModeShared];
289 id<MTLCommandBuffer> accelCommands = [queue commandBuffer];
290 id<MTLAccelerationStructureCommandEncoder> accelEnc =
291 [accelCommands accelerationStructureCommandEncoder];
292 if (refit) {
293 [accelEnc refitAccelerationStructure:accel_struct
294 descriptor:accelDesc
295 destination:accel_uncompressed
296 scratchBuffer:scratchBuf
297 scratchBufferOffset:0];
298 }
299 else {
300 [accelEnc buildAccelerationStructure:accel_uncompressed
301 descriptor:accelDesc
302 scratchBuffer:scratchBuf
303 scratchBufferOffset:0];
304 }
305 if (use_fast_trace_bvh) {
306 [accelEnc writeCompactedAccelerationStructureSize:accel_uncompressed
307 toBuffer:sizeBuf
308 offset:0
309 sizeDataType:MTLDataTypeULong];
310 }
311 [accelEnc endEncoding];
312
313 /* Estimated size of resources that will be wired for the GPU accelerated build.
314 * Acceleration-struct size is doubled to account for possible compaction step. */
315 size_t wired_size = posBuf.allocatedSize + indexBuf.allocatedSize + scratchBuf.allocatedSize +
316 accel_uncompressed.allocatedSize * 2;
317
318 [accelCommands addCompletedHandler:^(id<MTLCommandBuffer> /*command_buffer*/) {
319 /* free temp resources */
320 [scratchBuf release];
321 [indexBuf release];
322 [posBuf release];
323
324 if (use_fast_trace_bvh) {
325 /* Compact the accel structure */
326 uint64_t compressed_size = *(uint64_t *)sizeBuf.contents;
327
328 dispatch_async(dispatch_get_global_queue(DISPATCH_QUEUE_PRIORITY_DEFAULT, 0), ^{
329 id<MTLCommandBuffer> accelCommands = [queue commandBuffer];
330 id<MTLAccelerationStructureCommandEncoder> accelEnc =
331 [accelCommands accelerationStructureCommandEncoder];
332 id<MTLAccelerationStructure> accel = [mtl_device
333 newAccelerationStructureWithSize:compressed_size];
334 [accelEnc copyAndCompactAccelerationStructure:accel_uncompressed
335 toAccelerationStructure:accel];
336 [accelEnc endEncoding];
337 [accelCommands addCompletedHandler:^(id<MTLCommandBuffer> /*command_buffer*/) {
338 set_accel_struct(accel);
339 [accel_uncompressed release];
340
341 /* Signal that we've finished doing GPU acceleration struct build. */
342 g_bvh_build_throttler.release(wired_size);
343 }];
344 [accelCommands commit];
345 });
346 }
347 else {
348 /* set our acceleration structure to the uncompressed structure */
349 set_accel_struct(accel_uncompressed);
350
351 /* Signal that we've finished doing GPU acceleration struct build. */
352 g_bvh_build_throttler.release(wired_size);
353 }
354
355 [sizeBuf release];
356 }];
357
358 /* Wait until it's safe to proceed with GPU acceleration struct build. */
359 g_bvh_build_throttler.acquire(wired_size);
360 [accelCommands commit];
361
362 return true;
363 }
364 return false;
365}
366
367bool BVHMetal::build_BLAS_hair(Progress &progress,
368 id<MTLDevice> mtl_device,
369 id<MTLCommandQueue> queue,
370 Geometry *const geom,
371 bool refit)
372{
373# if defined(MAC_OS_VERSION_14_0)
374 if (@available(macos 14.0, *)) {
375 /* Build BLAS for hair curves */
376 Hair *hair = static_cast<Hair *>(geom);
377 if (hair->num_curves() == 0) {
378 return false;
379 }
380
381 /*------------------------------------------------*/
382 BVH_status(
383 "Building hair BLAS | %7d curves | %s", (int)hair->num_curves(), geom->name.c_str());
384 /*------------------------------------------------*/
385
386 const bool use_fast_trace_bvh = (params.bvh_type == BVH_TYPE_STATIC);
387
388 size_t num_motion_steps = 1;
389 Attribute *motion_keys = hair->attributes.find(ATTR_STD_MOTION_VERTEX_POSITION);
390 if (motion_blur && hair->get_use_motion_blur() && motion_keys) {
391 num_motion_steps = hair->get_motion_steps();
392 }
393
394 MTLResourceOptions storage_mode;
395 if (mtl_device.hasUnifiedMemory) {
396 storage_mode = MTLResourceStorageModeShared;
397 }
398 else {
399 storage_mode = MTLResourceStorageModeManaged;
400 }
401
402 id<MTLBuffer> cpBuffer = nil;
403 id<MTLBuffer> radiusBuffer = nil;
404 id<MTLBuffer> idxBuffer = nil;
405
406 MTLAccelerationStructureGeometryDescriptor *geomDesc;
407 if (motion_blur) {
408 MTLAccelerationStructureMotionCurveGeometryDescriptor *geomDescCrv =
409 [MTLAccelerationStructureMotionCurveGeometryDescriptor descriptor];
410
411 uint64_t numKeys = hair->num_keys();
412 uint64_t numCurves = hair->num_curves();
413 const array<float> &radiuses = hair->get_curve_radius();
414
415 /* Gather the curve geometry. */
416 std::vector<float3> cpData;
417 std::vector<int> idxData;
418 std::vector<float> radiusData;
419 cpData.reserve(numKeys);
420 radiusData.reserve(numKeys);
421
422 std::vector<int> step_offsets;
423 for (size_t step = 0; step < num_motion_steps; ++step) {
424
425 /* The center step for motion vertices is not stored in the attribute. */
426 const float3 *keys = hair->get_curve_keys().data();
427 size_t center_step = (num_motion_steps - 1) / 2;
428 if (step != center_step) {
429 size_t attr_offset = (step > center_step) ? step - 1 : step;
430 /* Technically this is a float4 array, but sizeof(float3) == sizeof(float4). */
431 keys = motion_keys->data_float3() + attr_offset * numKeys;
432 }
433
434 step_offsets.push_back(cpData.size());
435
436 for (int c = 0; c < numCurves; ++c) {
437 const Hair::Curve curve = hair->get_curve(c);
438 int segCount = curve.num_segments();
439 int firstKey = curve.first_key;
440 uint64_t idxBase = cpData.size();
441 cpData.push_back(keys[firstKey]);
442 radiusData.push_back(radiuses[firstKey]);
443 for (int s = 0; s < segCount; ++s) {
444 if (step == 0) {
445 idxData.push_back(idxBase + s);
446 }
447 cpData.push_back(keys[firstKey + s]);
448 radiusData.push_back(radiuses[firstKey + s]);
449 }
450 cpData.push_back(keys[firstKey + curve.num_keys - 1]);
451 cpData.push_back(keys[firstKey + curve.num_keys - 1]);
452 radiusData.push_back(radiuses[firstKey + curve.num_keys - 1]);
453 radiusData.push_back(radiuses[firstKey + curve.num_keys - 1]);
454 }
455 }
456
457 /* Allocate and populate MTLBuffers for geometry. */
458 idxBuffer = [mtl_device newBufferWithBytes:idxData.data()
459 length:idxData.size() * sizeof(int)
460 options:storage_mode];
461
462 cpBuffer = [mtl_device newBufferWithBytes:cpData.data()
463 length:cpData.size() * sizeof(float3)
464 options:storage_mode];
465
466 radiusBuffer = [mtl_device newBufferWithBytes:radiusData.data()
467 length:radiusData.size() * sizeof(float)
468 options:storage_mode];
469
470 std::vector<MTLMotionKeyframeData *> cp_ptrs;
471 std::vector<MTLMotionKeyframeData *> radius_ptrs;
472 cp_ptrs.reserve(num_motion_steps);
473 radius_ptrs.reserve(num_motion_steps);
474
475 for (size_t step = 0; step < num_motion_steps; ++step) {
476 MTLMotionKeyframeData *k = [MTLMotionKeyframeData data];
477 k.buffer = cpBuffer;
478 k.offset = step_offsets[step] * sizeof(float3);
479 cp_ptrs.push_back(k);
480
481 k = [MTLMotionKeyframeData data];
482 k.buffer = radiusBuffer;
483 k.offset = step_offsets[step] * sizeof(float);
484 radius_ptrs.push_back(k);
485 }
486
487 if (storage_mode == MTLResourceStorageModeManaged) {
488 [cpBuffer didModifyRange:NSMakeRange(0, cpBuffer.length)];
489 [idxBuffer didModifyRange:NSMakeRange(0, idxBuffer.length)];
490 [radiusBuffer didModifyRange:NSMakeRange(0, radiusBuffer.length)];
491 }
492
493 geomDescCrv.controlPointBuffers = [NSArray arrayWithObjects:cp_ptrs.data()
494 count:cp_ptrs.size()];
495 geomDescCrv.radiusBuffers = [NSArray arrayWithObjects:radius_ptrs.data()
496 count:radius_ptrs.size()];
497
498 geomDescCrv.controlPointCount = cpData.size();
499 geomDescCrv.controlPointStride = sizeof(float3);
500 geomDescCrv.controlPointFormat = MTLAttributeFormatFloat3;
501 geomDescCrv.radiusStride = sizeof(float);
502 geomDescCrv.radiusFormat = MTLAttributeFormatFloat;
503 geomDescCrv.segmentCount = idxData.size();
504 geomDescCrv.segmentControlPointCount = 4;
505 geomDescCrv.curveType = (hair->curve_shape == CURVE_RIBBON) ? MTLCurveTypeFlat :
506 MTLCurveTypeRound;
507 geomDescCrv.curveBasis = MTLCurveBasisCatmullRom;
508 geomDescCrv.curveEndCaps = MTLCurveEndCapsDisk;
509 geomDescCrv.indexType = MTLIndexTypeUInt32;
510 geomDescCrv.indexBuffer = idxBuffer;
511 geomDescCrv.intersectionFunctionTableOffset = 1;
512
513 /* Force a single any-hit call, so shadow record-all behavior works correctly */
514 /* (Match optix behavior: unsigned int build_flags =
515 * OPTIX_GEOMETRY_FLAG_REQUIRE_SINGLE_ANYHIT_CALL;) */
516 geomDescCrv.allowDuplicateIntersectionFunctionInvocation = false;
517 geomDescCrv.opaque = true;
518 geomDesc = geomDescCrv;
519 }
520 else {
521 MTLAccelerationStructureCurveGeometryDescriptor *geomDescCrv =
522 [MTLAccelerationStructureCurveGeometryDescriptor descriptor];
523
524 uint64_t numKeys = hair->num_keys();
525 uint64_t numCurves = hair->num_curves();
526 const array<float> &radiuses = hair->get_curve_radius();
527
528 /* Gather the curve geometry. */
529 std::vector<float3> cpData;
530 std::vector<int> idxData;
531 std::vector<float> radiusData;
532 cpData.reserve(numKeys);
533 radiusData.reserve(numKeys);
534 auto keys = hair->get_curve_keys();
535 for (int c = 0; c < numCurves; ++c) {
536 const Hair::Curve curve = hair->get_curve(c);
537 int segCount = curve.num_segments();
538 int firstKey = curve.first_key;
539 radiusData.push_back(radiuses[firstKey]);
540 uint64_t idxBase = cpData.size();
541 cpData.push_back(keys[firstKey]);
542 for (int s = 0; s < segCount; ++s) {
543 idxData.push_back(idxBase + s);
544 cpData.push_back(keys[firstKey + s]);
545 radiusData.push_back(radiuses[firstKey + s]);
546 }
547 cpData.push_back(keys[firstKey + curve.num_keys - 1]);
548 cpData.push_back(keys[firstKey + curve.num_keys - 1]);
549 radiusData.push_back(radiuses[firstKey + curve.num_keys - 1]);
550 radiusData.push_back(radiuses[firstKey + curve.num_keys - 1]);
551 }
552
553 /* Allocate and populate MTLBuffers for geometry. */
554 idxBuffer = [mtl_device newBufferWithBytes:idxData.data()
555 length:idxData.size() * sizeof(int)
556 options:storage_mode];
557
558 cpBuffer = [mtl_device newBufferWithBytes:cpData.data()
559 length:cpData.size() * sizeof(float3)
560 options:storage_mode];
561
562 radiusBuffer = [mtl_device newBufferWithBytes:radiusData.data()
563 length:radiusData.size() * sizeof(float)
564 options:storage_mode];
565
566 if (storage_mode == MTLResourceStorageModeManaged) {
567 [cpBuffer didModifyRange:NSMakeRange(0, cpBuffer.length)];
568 [idxBuffer didModifyRange:NSMakeRange(0, idxBuffer.length)];
569 [radiusBuffer didModifyRange:NSMakeRange(0, radiusBuffer.length)];
570 }
571 geomDescCrv.controlPointBuffer = cpBuffer;
572 geomDescCrv.radiusBuffer = radiusBuffer;
573 geomDescCrv.controlPointCount = cpData.size();
574 geomDescCrv.controlPointStride = sizeof(float3);
575 geomDescCrv.controlPointFormat = MTLAttributeFormatFloat3;
576 geomDescCrv.controlPointBufferOffset = 0;
577 geomDescCrv.segmentCount = idxData.size();
578 geomDescCrv.segmentControlPointCount = 4;
579 geomDescCrv.curveType = (hair->curve_shape == CURVE_RIBBON) ? MTLCurveTypeFlat :
580 MTLCurveTypeRound;
581 geomDescCrv.curveBasis = MTLCurveBasisCatmullRom;
582 geomDescCrv.curveEndCaps = MTLCurveEndCapsDisk;
583 geomDescCrv.indexType = MTLIndexTypeUInt32;
584 geomDescCrv.indexBuffer = idxBuffer;
585 geomDescCrv.intersectionFunctionTableOffset = 1;
586
587 /* Force a single any-hit call, so shadow record-all behavior works correctly */
588 /* (Match optix behavior: unsigned int build_flags =
589 * OPTIX_GEOMETRY_FLAG_REQUIRE_SINGLE_ANYHIT_CALL;) */
590 geomDescCrv.allowDuplicateIntersectionFunctionInvocation = false;
591 geomDescCrv.opaque = true;
592 geomDesc = geomDescCrv;
593 }
594
595 MTLPrimitiveAccelerationStructureDescriptor *accelDesc =
596 [MTLPrimitiveAccelerationStructureDescriptor descriptor];
597 accelDesc.geometryDescriptors = @[ geomDesc ];
598
599 if (motion_blur) {
600 accelDesc.motionStartTime = 0.0f;
601 accelDesc.motionEndTime = 1.0f;
602 accelDesc.motionStartBorderMode = MTLMotionBorderModeVanish;
603 accelDesc.motionEndBorderMode = MTLMotionBorderModeVanish;
604 accelDesc.motionKeyframeCount = num_motion_steps;
605 }
606
607 if (!use_fast_trace_bvh) {
608 accelDesc.usage |= (MTLAccelerationStructureUsageRefit |
609 MTLAccelerationStructureUsagePreferFastBuild);
610 }
611 accelDesc.usage |= MTLAccelerationStructureUsageExtendedLimits;
612
613 MTLAccelerationStructureSizes accelSizes = [mtl_device
614 accelerationStructureSizesWithDescriptor:accelDesc];
615 id<MTLAccelerationStructure> accel_uncompressed = [mtl_device
616 newAccelerationStructureWithSize:accelSizes.accelerationStructureSize];
617 id<MTLBuffer> scratchBuf = [mtl_device newBufferWithLength:accelSizes.buildScratchBufferSize
618 options:MTLResourceStorageModePrivate];
619 id<MTLBuffer> sizeBuf = [mtl_device newBufferWithLength:8
620 options:MTLResourceStorageModeShared];
621 id<MTLCommandBuffer> accelCommands = [queue commandBuffer];
622 id<MTLAccelerationStructureCommandEncoder> accelEnc =
623 [accelCommands accelerationStructureCommandEncoder];
624 if (refit) {
625 [accelEnc refitAccelerationStructure:accel_struct
626 descriptor:accelDesc
627 destination:accel_uncompressed
628 scratchBuffer:scratchBuf
629 scratchBufferOffset:0];
630 }
631 else {
632 [accelEnc buildAccelerationStructure:accel_uncompressed
633 descriptor:accelDesc
634 scratchBuffer:scratchBuf
635 scratchBufferOffset:0];
636 }
637 if (use_fast_trace_bvh) {
638 [accelEnc writeCompactedAccelerationStructureSize:accel_uncompressed
639 toBuffer:sizeBuf
640 offset:0
641 sizeDataType:MTLDataTypeULong];
642 }
643 [accelEnc endEncoding];
644
645 /* Estimated size of resources that will be wired for the GPU accelerated build.
646 * Acceleration-struct size is doubled to account for possible compaction step. */
647 size_t wired_size = cpBuffer.allocatedSize + radiusBuffer.allocatedSize +
648 idxBuffer.allocatedSize + scratchBuf.allocatedSize +
649 accel_uncompressed.allocatedSize * 2;
650
651 [accelCommands addCompletedHandler:^(id<MTLCommandBuffer> /*command_buffer*/) {
652 /* free temp resources */
653 [scratchBuf release];
654 [cpBuffer release];
655 [radiusBuffer release];
656 [idxBuffer release];
657
658 if (use_fast_trace_bvh) {
659 uint64_t compressed_size = *(uint64_t *)sizeBuf.contents;
660
661 dispatch_async(dispatch_get_global_queue(DISPATCH_QUEUE_PRIORITY_DEFAULT, 0), ^{
662 id<MTLCommandBuffer> accelCommands = [queue commandBuffer];
663 id<MTLAccelerationStructureCommandEncoder> accelEnc =
664 [accelCommands accelerationStructureCommandEncoder];
665 id<MTLAccelerationStructure> accel = [mtl_device
666 newAccelerationStructureWithSize:compressed_size];
667 [accelEnc copyAndCompactAccelerationStructure:accel_uncompressed
668 toAccelerationStructure:accel];
669 [accelEnc endEncoding];
670 [accelCommands addCompletedHandler:^(id<MTLCommandBuffer> /*command_buffer*/) {
671 set_accel_struct(accel);
672 [accel_uncompressed release];
673
674 /* Signal that we've finished doing GPU acceleration struct build. */
675 g_bvh_build_throttler.release(wired_size);
676 }];
677 [accelCommands commit];
678 });
679 }
680 else {
681 /* set our acceleration structure to the uncompressed structure */
682 set_accel_struct(accel_uncompressed);
683
684 /* Signal that we've finished doing GPU acceleration struct build. */
685 g_bvh_build_throttler.release(wired_size);
686 }
687
688 [sizeBuf release];
689 }];
690
691 /* Wait until it's safe to proceed with GPU acceleration struct build. */
692 g_bvh_build_throttler.acquire(wired_size);
693 [accelCommands commit];
694
695 return true;
696 }
697# else /* MAC_OS_VERSION_14_0 */
698 (void)progress;
699 (void)mtl_device;
700 (void)queue;
701 (void)geom;
702 (void)(refit);
703# endif /* MAC_OS_VERSION_14_0 */
704 return false;
705}
706
707bool BVHMetal::build_BLAS_pointcloud(Progress &progress,
708 id<MTLDevice> mtl_device,
709 id<MTLCommandQueue> queue,
710 Geometry *const geom,
711 bool refit)
712{
713 if (@available(macos 12.0, *)) {
714 /* Build BLAS for point cloud */
715 PointCloud *pointcloud = static_cast<PointCloud *>(geom);
716 if (pointcloud->num_points() == 0) {
717 return false;
718 }
719
720 /*------------------------------------------------*/
721 BVH_status("Building pointcloud BLAS | %7d points | %s",
722 (int)pointcloud->num_points(),
723 geom->name.c_str());
724 /*------------------------------------------------*/
725
726 const size_t num_points = pointcloud->get_points().size();
727 const float3 *points = pointcloud->get_points().data();
728 const float *radius = pointcloud->get_radius().data();
729
730 const bool use_fast_trace_bvh = (params.bvh_type == BVH_TYPE_STATIC);
731
732 size_t num_motion_steps = 1;
733 Attribute *motion_keys = pointcloud->attributes.find(ATTR_STD_MOTION_VERTEX_POSITION);
734 if (motion_blur && pointcloud->get_use_motion_blur() && motion_keys) {
735 num_motion_steps = pointcloud->get_motion_steps();
736 }
737
738 const size_t num_aabbs = num_motion_steps * num_points;
739
740 MTLResourceOptions storage_mode;
741 if (mtl_device.hasUnifiedMemory) {
742 storage_mode = MTLResourceStorageModeShared;
743 }
744 else {
745 storage_mode = MTLResourceStorageModeManaged;
746 }
747
748 /* Allocate a GPU buffer for the AABB data and populate it */
749 id<MTLBuffer> aabbBuf = [mtl_device
750 newBufferWithLength:num_aabbs * sizeof(MTLAxisAlignedBoundingBox)
751 options:storage_mode];
752 MTLAxisAlignedBoundingBox *aabb_data = (MTLAxisAlignedBoundingBox *)[aabbBuf contents];
753
754 /* Get AABBs for each motion step */
755 size_t center_step = (num_motion_steps - 1) / 2;
756 for (size_t step = 0; step < num_motion_steps; ++step) {
757 if (step == center_step) {
758 /* The center step for motion vertices is not stored in the attribute */
759 for (size_t j = 0; j < num_points; ++j) {
760 const PointCloud::Point point = pointcloud->get_point(j);
762 point.bounds_grow(points, radius, bounds);
763
764 const size_t index = step * num_points + j;
765 aabb_data[index].min = (MTLPackedFloat3 &)bounds.min;
766 aabb_data[index].max = (MTLPackedFloat3 &)bounds.max;
767 }
768 }
769 else {
770 size_t attr_offset = (step > center_step) ? step - 1 : step;
771 float4 *motion_points = motion_keys->data_float4() + attr_offset * num_points;
772
773 for (size_t j = 0; j < num_points; ++j) {
774 const PointCloud::Point point = pointcloud->get_point(j);
776 point.bounds_grow(motion_points[j], bounds);
777
778 const size_t index = step * num_points + j;
779 aabb_data[index].min = (MTLPackedFloat3 &)bounds.min;
780 aabb_data[index].max = (MTLPackedFloat3 &)bounds.max;
781 }
782 }
783 }
784
785 if (storage_mode == MTLResourceStorageModeManaged) {
786 [aabbBuf didModifyRange:NSMakeRange(0, aabbBuf.length)];
787 }
788
789# if 0
790 for (size_t i=0; i<num_aabbs && i < 400; i++) {
791 MTLAxisAlignedBoundingBox& bb = aabb_data[i];
792 printf(" %d: %.1f,%.1f,%.1f -- %.1f,%.1f,%.1f\n", int(i), bb.min.x, bb.min.y, bb.min.z, bb.max.x, bb.max.y, bb.max.z);
793 }
794# endif
795
796 MTLAccelerationStructureGeometryDescriptor *geomDesc;
797 if (motion_blur) {
798 std::vector<MTLMotionKeyframeData *> aabb_ptrs;
799 aabb_ptrs.reserve(num_motion_steps);
800 for (size_t step = 0; step < num_motion_steps; ++step) {
801 MTLMotionKeyframeData *k = [MTLMotionKeyframeData data];
802 k.buffer = aabbBuf;
803 k.offset = step * num_points * sizeof(MTLAxisAlignedBoundingBox);
804 aabb_ptrs.push_back(k);
805 }
806
807 MTLAccelerationStructureMotionBoundingBoxGeometryDescriptor *geomDescMotion =
808 [MTLAccelerationStructureMotionBoundingBoxGeometryDescriptor descriptor];
809 geomDescMotion.boundingBoxBuffers = [NSArray arrayWithObjects:aabb_ptrs.data()
810 count:aabb_ptrs.size()];
811 geomDescMotion.boundingBoxCount = num_points;
812 geomDescMotion.boundingBoxStride = sizeof(aabb_data[0]);
813 geomDescMotion.intersectionFunctionTableOffset = 2;
814
815 /* Force a single any-hit call, so shadow record-all behavior works correctly */
816 /* (Match optix behavior: unsigned int build_flags =
817 * OPTIX_GEOMETRY_FLAG_REQUIRE_SINGLE_ANYHIT_CALL;) */
818 geomDescMotion.allowDuplicateIntersectionFunctionInvocation = false;
819 geomDescMotion.opaque = true;
820 geomDesc = geomDescMotion;
821 }
822 else {
823 MTLAccelerationStructureBoundingBoxGeometryDescriptor *geomDescNoMotion =
824 [MTLAccelerationStructureBoundingBoxGeometryDescriptor descriptor];
825 geomDescNoMotion.boundingBoxBuffer = aabbBuf;
826 geomDescNoMotion.boundingBoxBufferOffset = 0;
827 geomDescNoMotion.boundingBoxCount = int(num_aabbs);
828 geomDescNoMotion.boundingBoxStride = sizeof(aabb_data[0]);
829 geomDescNoMotion.intersectionFunctionTableOffset = 2;
830
831 /* Force a single any-hit call, so shadow record-all behavior works correctly */
832 /* (Match optix behavior: unsigned int build_flags =
833 * OPTIX_GEOMETRY_FLAG_REQUIRE_SINGLE_ANYHIT_CALL;) */
834 geomDescNoMotion.allowDuplicateIntersectionFunctionInvocation = false;
835 geomDescNoMotion.opaque = true;
836 geomDesc = geomDescNoMotion;
837 }
838
839 MTLPrimitiveAccelerationStructureDescriptor *accelDesc =
840 [MTLPrimitiveAccelerationStructureDescriptor descriptor];
841 accelDesc.geometryDescriptors = @[ geomDesc ];
842
843 if (motion_blur) {
844 accelDesc.motionStartTime = 0.0f;
845 accelDesc.motionEndTime = 1.0f;
846 // accelDesc.motionStartBorderMode = MTLMotionBorderModeVanish;
847 // accelDesc.motionEndBorderMode = MTLMotionBorderModeVanish;
848 accelDesc.motionKeyframeCount = num_motion_steps;
849 }
850 accelDesc.usage |= MTLAccelerationStructureUsageExtendedLimits;
851
852 if (!use_fast_trace_bvh) {
853 accelDesc.usage |= (MTLAccelerationStructureUsageRefit |
854 MTLAccelerationStructureUsagePreferFastBuild);
855 }
856
857 MTLAccelerationStructureSizes accelSizes = [mtl_device
858 accelerationStructureSizesWithDescriptor:accelDesc];
859 id<MTLAccelerationStructure> accel_uncompressed = [mtl_device
860 newAccelerationStructureWithSize:accelSizes.accelerationStructureSize];
861 id<MTLBuffer> scratchBuf = [mtl_device newBufferWithLength:accelSizes.buildScratchBufferSize
862 options:MTLResourceStorageModePrivate];
863 id<MTLBuffer> sizeBuf = [mtl_device newBufferWithLength:8
864 options:MTLResourceStorageModeShared];
865 id<MTLCommandBuffer> accelCommands = [queue commandBuffer];
866 id<MTLAccelerationStructureCommandEncoder> accelEnc =
867 [accelCommands accelerationStructureCommandEncoder];
868 if (refit) {
869 [accelEnc refitAccelerationStructure:accel_struct
870 descriptor:accelDesc
871 destination:accel_uncompressed
872 scratchBuffer:scratchBuf
873 scratchBufferOffset:0];
874 }
875 else {
876 [accelEnc buildAccelerationStructure:accel_uncompressed
877 descriptor:accelDesc
878 scratchBuffer:scratchBuf
879 scratchBufferOffset:0];
880 }
881 if (use_fast_trace_bvh) {
882 [accelEnc writeCompactedAccelerationStructureSize:accel_uncompressed
883 toBuffer:sizeBuf
884 offset:0
885 sizeDataType:MTLDataTypeULong];
886 }
887 [accelEnc endEncoding];
888
889 /* Estimated size of resources that will be wired for the GPU accelerated build.
890 * Acceleration-struct size is doubled to account for possible compaction step. */
891 size_t wired_size = aabbBuf.allocatedSize + scratchBuf.allocatedSize +
892 accel_uncompressed.allocatedSize * 2;
893
894 [accelCommands addCompletedHandler:^(id<MTLCommandBuffer> /*command_buffer*/) {
895 /* free temp resources */
896 [scratchBuf release];
897 [aabbBuf release];
898
899 if (use_fast_trace_bvh) {
900 /* Compact the accel structure */
901 uint64_t compressed_size = *(uint64_t *)sizeBuf.contents;
902
903 dispatch_async(dispatch_get_global_queue(DISPATCH_QUEUE_PRIORITY_DEFAULT, 0), ^{
904 id<MTLCommandBuffer> accelCommands = [queue commandBuffer];
905 id<MTLAccelerationStructureCommandEncoder> accelEnc =
906 [accelCommands accelerationStructureCommandEncoder];
907 id<MTLAccelerationStructure> accel = [mtl_device
908 newAccelerationStructureWithSize:compressed_size];
909 [accelEnc copyAndCompactAccelerationStructure:accel_uncompressed
910 toAccelerationStructure:accel];
911 [accelEnc endEncoding];
912 [accelCommands addCompletedHandler:^(id<MTLCommandBuffer> /*command_buffer*/) {
913 set_accel_struct(accel);
914 [accel_uncompressed release];
915
916 /* Signal that we've finished doing GPU acceleration struct build. */
917 g_bvh_build_throttler.release(wired_size);
918 }];
919 [accelCommands commit];
920 });
921 }
922 else {
923 /* set our acceleration structure to the uncompressed structure */
924 set_accel_struct(accel_uncompressed);
925
926 /* Signal that we've finished doing GPU acceleration struct build. */
927 g_bvh_build_throttler.release(wired_size);
928 }
929
930 [sizeBuf release];
931 }];
932
933 /* Wait until it's safe to proceed with GPU acceleration struct build. */
934 g_bvh_build_throttler.acquire(wired_size);
935 [accelCommands commit];
936 return true;
937 }
938 return false;
939}
940
941bool BVHMetal::build_BLAS(Progress &progress,
942 id<MTLDevice> mtl_device,
943 id<MTLCommandQueue> queue,
944 bool refit)
945{
946 assert(objects.size() == 1 && geometry.size() == 1);
947
948 /* Build bottom level acceleration structures (BLAS) */
949 Geometry *const geom = geometry[0];
950 switch (geom->geometry_type) {
951 case Geometry::VOLUME:
952 case Geometry::MESH:
953 return build_BLAS_mesh(progress, mtl_device, queue, geom, refit);
954 case Geometry::HAIR:
955 return build_BLAS_hair(progress, mtl_device, queue, geom, refit);
957 return build_BLAS_pointcloud(progress, mtl_device, queue, geom, refit);
958 default:
959 return false;
960 }
961 return false;
962}
963
964bool BVHMetal::build_TLAS(Progress &progress,
965 id<MTLDevice> mtl_device,
966 id<MTLCommandQueue> queue,
967 bool refit)
968{
969 /* Wait for all BLAS builds to finish. */
970 g_bvh_build_throttler.wait_for_all();
971
972 if (@available(macos 12.0, *)) {
973 /* Defined inside available check, for return type to be available. */
974 auto make_null_BLAS = [](id<MTLDevice> mtl_device,
975 id<MTLCommandQueue> queue) -> id<MTLAccelerationStructure> {
976 MTLResourceOptions storage_mode = MTLResourceStorageModeManaged;
977 if (mtl_device.hasUnifiedMemory) {
978 storage_mode = MTLResourceStorageModeShared;
979 }
980
981 id<MTLBuffer> nullBuf = [mtl_device newBufferWithLength:sizeof(float3) options:storage_mode];
982
983 /* Create an acceleration structure. */
984 MTLAccelerationStructureTriangleGeometryDescriptor *geomDesc =
985 [MTLAccelerationStructureTriangleGeometryDescriptor descriptor];
986 geomDesc.vertexBuffer = nullBuf;
987 geomDesc.vertexBufferOffset = 0;
988 geomDesc.vertexStride = sizeof(float3);
989 geomDesc.indexBuffer = nullBuf;
990 geomDesc.indexBufferOffset = 0;
991 geomDesc.indexType = MTLIndexTypeUInt32;
992 geomDesc.triangleCount = 0;
993 geomDesc.intersectionFunctionTableOffset = 0;
994 geomDesc.opaque = true;
995 geomDesc.allowDuplicateIntersectionFunctionInvocation = false;
996
997 MTLPrimitiveAccelerationStructureDescriptor *accelDesc =
998 [MTLPrimitiveAccelerationStructureDescriptor descriptor];
999 accelDesc.geometryDescriptors = @[ geomDesc ];
1000 accelDesc.usage |= MTLAccelerationStructureUsageExtendedLimits;
1001
1002 MTLAccelerationStructureSizes accelSizes = [mtl_device
1003 accelerationStructureSizesWithDescriptor:accelDesc];
1004 id<MTLAccelerationStructure> accel_struct = [mtl_device
1005 newAccelerationStructureWithSize:accelSizes.accelerationStructureSize];
1006 id<MTLBuffer> scratchBuf = [mtl_device newBufferWithLength:accelSizes.buildScratchBufferSize
1007 options:MTLResourceStorageModePrivate];
1008 id<MTLBuffer> sizeBuf = [mtl_device newBufferWithLength:8
1009 options:MTLResourceStorageModeShared];
1010 id<MTLCommandBuffer> accelCommands = [queue commandBuffer];
1011 id<MTLAccelerationStructureCommandEncoder> accelEnc =
1012 [accelCommands accelerationStructureCommandEncoder];
1013 [accelEnc buildAccelerationStructure:accel_struct
1014 descriptor:accelDesc
1015 scratchBuffer:scratchBuf
1016 scratchBufferOffset:0];
1017 [accelEnc endEncoding];
1018 [accelCommands commit];
1019 [accelCommands waitUntilCompleted];
1020
1021 /* free temp resources */
1022 [scratchBuf release];
1023 [nullBuf release];
1024 [sizeBuf release];
1025
1026 return accel_struct;
1027 };
1028
1029 uint32_t num_instances = 0;
1030 uint32_t num_motion_transforms = 0;
1031 for (Object *ob : objects) {
1032 num_instances++;
1033
1034 if (ob->use_motion()) {
1035 num_motion_transforms += max((size_t)1, ob->get_motion().size());
1036 }
1037 else {
1038 num_motion_transforms++;
1039 }
1040 }
1041
1042 if (num_instances == 0) {
1043 return false;
1044 }
1045
1046 /*------------------------------------------------*/
1047 BVH_status("Building TLAS | %7d instances", (int)num_instances);
1048 /*------------------------------------------------*/
1049
1050 const bool use_fast_trace_bvh = (params.bvh_type == BVH_TYPE_STATIC);
1051
1052 NSMutableArray *all_blas = [NSMutableArray array];
1053 unordered_map<BVHMetal const *, int> instance_mapping;
1054
1055 /* Lambda function to build/retrieve the BLAS index mapping */
1056 auto get_blas_index = [&](BVHMetal const *blas) {
1057 auto it = instance_mapping.find(blas);
1058 if (it != instance_mapping.end()) {
1059 return it->second;
1060 }
1061 else {
1062 int blas_index = (int)[all_blas count];
1063 instance_mapping[blas] = blas_index;
1064 if (@available(macos 12.0, *)) {
1065 [all_blas addObject:(blas ? blas->accel_struct : null_BLAS)];
1066 }
1067 return blas_index;
1068 }
1069 };
1070
1071 MTLResourceOptions storage_mode;
1072 if (mtl_device.hasUnifiedMemory) {
1073 storage_mode = MTLResourceStorageModeShared;
1074 }
1075 else {
1076 storage_mode = MTLResourceStorageModeManaged;
1077 }
1078
1079 size_t instance_size;
1080 if (motion_blur) {
1081 instance_size = sizeof(MTLAccelerationStructureMotionInstanceDescriptor);
1082 }
1083 else {
1084 instance_size = sizeof(MTLAccelerationStructureUserIDInstanceDescriptor);
1085 }
1086
1087 /* Allocate a GPU buffer for the instance data and populate it */
1088 id<MTLBuffer> instanceBuf = [mtl_device newBufferWithLength:num_instances * instance_size
1089 options:storage_mode];
1090 id<MTLBuffer> motion_transforms_buf = nil;
1091 MTLPackedFloat4x3 *motion_transforms = nullptr;
1092 if (motion_blur && num_motion_transforms) {
1093 motion_transforms_buf = [mtl_device
1094 newBufferWithLength:num_motion_transforms * sizeof(MTLPackedFloat4x3)
1095 options:storage_mode];
1096 motion_transforms = (MTLPackedFloat4x3 *)motion_transforms_buf.contents;
1097 }
1098
1099 uint32_t instance_index = 0;
1100 uint32_t motion_transform_index = 0;
1101
1102 blas_array.clear();
1103 blas_array.reserve(num_instances);
1104
1105 for (Object *ob : objects) {
1106 /* Skip non-traceable objects */
1107 Geometry const *geom = ob->get_geometry();
1108 BVHMetal const *blas = static_cast<BVHMetal const *>(geom->bvh);
1109 if (!blas || !blas->accel_struct || !ob->is_traceable()) {
1110 /* Place a degenerate instance, to ensure [[instance_id]] equals ob->get_device_index()
1111 * in our intersection functions */
1112 blas = nullptr;
1113
1114 /* Workaround for issue in macOS <= 14.1: Insert degenerate BLAS instead of zero-filling
1115 * the descriptor. */
1116 if (!null_BLAS) {
1117 null_BLAS = make_null_BLAS(mtl_device, queue);
1118 }
1119 blas_array.push_back(null_BLAS);
1120 }
1121 else {
1122 blas_array.push_back(blas->accel_struct);
1123 }
1124
1125 uint32_t accel_struct_index = get_blas_index(blas);
1126
1127 /* Add some of the object visibility bits to the mask.
1128 * __prim_visibility contains the combined visibility bits of all instances, so is not
1129 * reliable if they differ between instances.
1130 */
1131 uint32_t mask = ob->visibility_for_tracing();
1132
1133 /* Have to have at least one bit in the mask, or else instance would always be culled. */
1134 if (0 == mask) {
1135 mask = 0xFF;
1136 }
1137
1138 /* Set user instance ID to object index */
1139 uint32_t primitive_offset = 0;
1140 int currIndex = instance_index++;
1141
1142 if (geom->geometry_type == Geometry::HAIR) {
1143 /* Build BLAS for curve primitives. */
1144 Hair *const hair = static_cast<Hair *const>(const_cast<Geometry *>(geom));
1145 primitive_offset = uint32_t(hair->curve_segment_offset);
1146 }
1147 else if (geom->geometry_type == Geometry::MESH || geom->geometry_type == Geometry::VOLUME) {
1148 /* Build BLAS for triangle primitives. */
1149 Mesh *const mesh = static_cast<Mesh *const>(const_cast<Geometry *>(geom));
1150 primitive_offset = uint32_t(mesh->prim_offset);
1151 }
1152 else if (geom->geometry_type == Geometry::POINTCLOUD) {
1153 /* Build BLAS for points primitives. */
1154 PointCloud *const pointcloud = static_cast<PointCloud *const>(
1155 const_cast<Geometry *>(geom));
1156 primitive_offset = uint32_t(pointcloud->prim_offset);
1157 }
1158
1159 /* Bake into the appropriate descriptor */
1160 if (motion_blur) {
1161 MTLAccelerationStructureMotionInstanceDescriptor *instances =
1162 (MTLAccelerationStructureMotionInstanceDescriptor *)[instanceBuf contents];
1163 MTLAccelerationStructureMotionInstanceDescriptor &desc = instances[currIndex];
1164
1165 desc.accelerationStructureIndex = accel_struct_index;
1166 desc.userID = primitive_offset;
1167 desc.mask = mask;
1168 desc.motionStartTime = 0.0f;
1169 desc.motionEndTime = 1.0f;
1170 desc.motionTransformsStartIndex = motion_transform_index;
1171 desc.motionStartBorderMode = MTLMotionBorderModeVanish;
1172 desc.motionEndBorderMode = MTLMotionBorderModeVanish;
1173 desc.intersectionFunctionTableOffset = 0;
1174
1175 int key_count = ob->get_motion().size();
1176 if (key_count) {
1177 desc.motionTransformsCount = key_count;
1178
1179 Transform *keys = ob->get_motion().data();
1180 for (int i = 0; i < key_count; i++) {
1181 float *t = (float *)&motion_transforms[motion_transform_index++];
1182 /* Transpose transform */
1183 auto src = (float const *)&keys[i];
1184 for (int i = 0; i < 12; i++) {
1185 t[i] = src[(i / 3) + 4 * (i % 3)];
1186 }
1187 }
1188 }
1189 else {
1190 desc.motionTransformsCount = 1;
1191
1192 float *t = (float *)&motion_transforms[motion_transform_index++];
1193 if (ob->get_geometry()->is_instanced()) {
1194 /* Transpose transform */
1195 auto src = (float const *)&ob->get_tfm();
1196 for (int i = 0; i < 12; i++) {
1197 t[i] = src[(i / 3) + 4 * (i % 3)];
1198 }
1199 }
1200 else {
1201 /* Clear transform to identity matrix */
1202 t[0] = t[4] = t[8] = 1.0f;
1203 }
1204 }
1205 }
1206 else {
1207 MTLAccelerationStructureUserIDInstanceDescriptor *instances =
1208 (MTLAccelerationStructureUserIDInstanceDescriptor *)[instanceBuf contents];
1209 MTLAccelerationStructureUserIDInstanceDescriptor &desc = instances[currIndex];
1210
1211 desc.accelerationStructureIndex = accel_struct_index;
1212 desc.userID = primitive_offset;
1213 desc.mask = mask;
1214 desc.intersectionFunctionTableOffset = 0;
1215 desc.options = MTLAccelerationStructureInstanceOptionOpaque;
1216
1217 float *t = (float *)&desc.transformationMatrix;
1218 if (ob->get_geometry()->is_instanced()) {
1219 /* Transpose transform */
1220 auto src = (float const *)&ob->get_tfm();
1221 for (int i = 0; i < 12; i++) {
1222 t[i] = src[(i / 3) + 4 * (i % 3)];
1223 }
1224 }
1225 else {
1226 /* Clear transform to identity matrix */
1227 t[0] = t[4] = t[8] = 1.0f;
1228 }
1229 }
1230 }
1231
1232 if (storage_mode == MTLResourceStorageModeManaged) {
1233 [instanceBuf didModifyRange:NSMakeRange(0, instanceBuf.length)];
1234 if (motion_transforms_buf) {
1235 [motion_transforms_buf didModifyRange:NSMakeRange(0, motion_transforms_buf.length)];
1236 assert(num_motion_transforms == motion_transform_index);
1237 }
1238 }
1239
1240 MTLInstanceAccelerationStructureDescriptor *accelDesc =
1241 [MTLInstanceAccelerationStructureDescriptor descriptor];
1242 accelDesc.instanceCount = num_instances;
1243 accelDesc.instanceDescriptorType = MTLAccelerationStructureInstanceDescriptorTypeUserID;
1244 accelDesc.instanceDescriptorBuffer = instanceBuf;
1245 accelDesc.instanceDescriptorBufferOffset = 0;
1246 accelDesc.instanceDescriptorStride = instance_size;
1247 accelDesc.instancedAccelerationStructures = all_blas;
1248
1249 if (motion_blur) {
1250 accelDesc.instanceDescriptorType = MTLAccelerationStructureInstanceDescriptorTypeMotion;
1251 accelDesc.motionTransformBuffer = motion_transforms_buf;
1252 accelDesc.motionTransformCount = num_motion_transforms;
1253 }
1254
1255 accelDesc.usage |= MTLAccelerationStructureUsageExtendedLimits;
1256 if (!use_fast_trace_bvh) {
1257 accelDesc.usage |= (MTLAccelerationStructureUsageRefit |
1258 MTLAccelerationStructureUsagePreferFastBuild);
1259 }
1260
1261 MTLAccelerationStructureSizes accelSizes = [mtl_device
1262 accelerationStructureSizesWithDescriptor:accelDesc];
1263 id<MTLAccelerationStructure> accel = [mtl_device
1264 newAccelerationStructureWithSize:accelSizes.accelerationStructureSize];
1265 id<MTLBuffer> scratchBuf = [mtl_device newBufferWithLength:accelSizes.buildScratchBufferSize
1266 options:MTLResourceStorageModePrivate];
1267 id<MTLCommandBuffer> accelCommands = [queue commandBuffer];
1268 id<MTLAccelerationStructureCommandEncoder> accelEnc =
1269 [accelCommands accelerationStructureCommandEncoder];
1270 if (refit) {
1271 [accelEnc refitAccelerationStructure:accel_struct
1272 descriptor:accelDesc
1273 destination:accel
1274 scratchBuffer:scratchBuf
1275 scratchBufferOffset:0];
1276 }
1277 else {
1278 [accelEnc buildAccelerationStructure:accel
1279 descriptor:accelDesc
1280 scratchBuffer:scratchBuf
1281 scratchBufferOffset:0];
1282 }
1283 [accelEnc endEncoding];
1284 [accelCommands commit];
1285 [accelCommands waitUntilCompleted];
1286
1287 if (motion_transforms_buf) {
1288 [motion_transforms_buf release];
1289 }
1290 [instanceBuf release];
1291 [scratchBuf release];
1292
1293 /* Cache top and bottom-level acceleration structs */
1294 set_accel_struct(accel);
1295
1296 unique_blas_array.clear();
1297 unique_blas_array.reserve(all_blas.count);
1298 [all_blas enumerateObjectsUsingBlock:^(id<MTLAccelerationStructure> blas, NSUInteger, BOOL *) {
1299 unique_blas_array.push_back(blas);
1300 }];
1301
1302 return true;
1303 }
1304 return false;
1305}
1306
1307bool BVHMetal::build(Progress &progress,
1308 id<MTLDevice> mtl_device,
1309 id<MTLCommandQueue> queue,
1310 bool refit)
1311{
1312 if (@available(macos 12.0, *)) {
1313 if (refit) {
1314 /* It isn't valid to refit a non-existent BVH, or one which wasn't constructed as dynamic.
1315 * In such cases, assert in development but try to recover in the wild. */
1316 if (params.bvh_type != BVH_TYPE_DYNAMIC || !accel_struct) {
1317 assert(false);
1318 refit = false;
1319 }
1320 }
1321
1322 if (!refit) {
1323 set_accel_struct(nil);
1324 }
1325 }
1326
1327 @autoreleasepool {
1328 if (!params.top_level) {
1329 return build_BLAS(progress, mtl_device, queue, refit);
1330 }
1331 else {
1332 return build_TLAS(progress, mtl_device, queue, refit);
1333 }
1334 }
1335}
1336
1338
1339#endif /* WITH_METAL */
ThreadMutex mutex
volatile int lock
static btDbvtVolume bounds(btDbvtNode **leaves, int count)
Definition btDbvt.cpp:299
void refit(btStridingMeshInterface *triangles, const btVector3 &aabbMin, const btVector3 &aabbMax)
SIMD_FORCE_INLINE btScalar length() const
Return the length of the vector.
Definition btVector3.h:257
Attribute * find(ustring name) const
float3 * data_float3()
float4 * data_float4()
Definition bvh/bvh.h:66
Type geometry_type
size_t prim_offset
AttributeSet attributes
Definition hair.h:14
size_t size() const
#define printf
CCL_NAMESPACE_BEGIN struct Options options
#define CCL_NAMESPACE_END
draw_view in_light_buf[] float
draw_view push_constant(Type::INT, "radiance_src") .push_constant(Type capture_info_buf storage_buf(1, Qualifier::READ, "ObjectBounds", "bounds_buf[]") .push_constant(Type draw_view int
static float verts[][3]
uiWidgetBaseParameters params[MAX_WIDGET_BASE_BATCH]
int count
@ ATTR_STD_MOTION_VERTEX_POSITION
@ CURVE_RIBBON
descriptor
ccl_device_inline float4 mask(const int4 mask, const float4 a)
T step(const T &edge, const T &value)
VecBase< float, 4 > float4
@ BVH_TYPE_DYNAMIC
Definition params.h:34
@ BVH_TYPE_STATIC
Definition params.h:41
unsigned int uint32_t
Definition stdint.h:80
unsigned __int64 uint64_t
Definition stdint.h:90
int num_segments() const
Definition hair.h:23
ustring name
Definition graph/node.h:177
Point get_point(int i) const
size_t num_points() const
std::unique_lock< std::mutex > thread_scoped_lock
Definition thread.h:30
CCL_NAMESPACE_BEGIN typedef std::mutex thread_mutex
Definition thread.h:29
float max