Blender V4.3
mtl_index_buffer.mm
Go to the documentation of this file.
1/* SPDX-FileCopyrightText: 2022-2023 Blender Authors
2 *
3 * SPDX-License-Identifier: GPL-2.0-or-later */
4
9#include "mtl_index_buffer.hh"
10#include "mtl_context.hh"
11#include "mtl_debug.hh"
12#include "mtl_storage_buffer.hh"
13
14#include "BLI_span.hh"
15
16namespace blender::gpu {
17
18/* -------------------------------------------------------------------- */
23{
24 if (ibo_ != nullptr && !this->is_subrange_) {
25 ibo_->free();
26 }
27 this->free_optimized_buffer();
28
29 if (ssbo_wrapper_) {
30 delete ssbo_wrapper_;
31 ssbo_wrapper_ = nullptr;
32 }
33}
34
35void MTLIndexBuf::free_optimized_buffer()
36{
37 if (optimized_ibo_) {
38 optimized_ibo_->free();
39 optimized_ibo_ = nullptr;
40 }
41}
42
44{
45 /* Flag buffer as incompatible with optimized/patched buffers as contents
46 * can now have partial modifications from the GPU. */
47 this->flag_can_optimize(false);
48 this->free_optimized_buffer();
49
50 /* Ensure resource is initialized. */
51 this->upload_data();
52
53 /* Ensure we have a valid IBO. */
54 BLI_assert(this->ibo_);
55
56 /* Create MTLStorageBuffer to wrap this resource and use conventional binding. */
57 if (ssbo_wrapper_ == nullptr) {
58 /* Buffer's size in bytes is required to be multiple of 16. */
59 int multiple_of_16 = ceil_to_multiple_u(alloc_size_, 16);
60 ssbo_wrapper_ = new MTLStorageBuf(this, multiple_of_16);
61 }
62 ssbo_wrapper_->bind(binding);
63}
64
65void MTLIndexBuf::read(uint32_t *data) const
66{
67 if (ibo_ != nullptr) {
68 /* Fetch active context. */
70 BLI_assert(ctx);
71
72 /* Ensure data is flushed for host caches. */
73 id<MTLBuffer> source_buffer = ibo_->get_metal_buffer();
74 if (source_buffer.storageMode == MTLStorageModeManaged) {
75 id<MTLBlitCommandEncoder> enc = ctx->main_command_buffer.ensure_begin_blit_encoder();
76 [enc synchronizeResource:source_buffer];
77 }
78
79 /* Ensure GPU has finished operating on commands which may modify data. */
80 GPU_finish();
81
82 /* Read data. */
83 void *host_ptr = ibo_->get_host_ptr();
84 memcpy(data, host_ptr, size_get());
85 return;
86 }
87 BLI_assert(false && "Index buffer not ready to be read.");
88}
89
91{
92 /* Handle sub-range upload. */
93 if (is_subrange_) {
94 MTLIndexBuf *mtlsrc = static_cast<MTLIndexBuf *>(src_);
95 mtlsrc->upload_data();
96
97#ifndef NDEBUG
98 BLI_assert_msg(!mtlsrc->point_restarts_stripped_,
99 "Cannot use sub-range on stripped point buffer.");
100#endif
101
102 /* If parent sub-range allocation has changed,
103 * update our index buffer. */
104 if (alloc_size_ != mtlsrc->alloc_size_ || ibo_ != mtlsrc->ibo_) {
105
106 /* Update index buffer and allocation from source. */
107 alloc_size_ = mtlsrc->alloc_size_;
108 ibo_ = mtlsrc->ibo_;
109
110 /* Reset any allocated patched or optimized index buffers. */
111 this->free_optimized_buffer();
112 }
113 return;
114 }
115
116 /* If new data ready, and index buffer already exists, release current. */
117 if ((ibo_ != nullptr) && (this->data_ != nullptr)) {
118 MTL_LOG_INFO("Re-creating index buffer with new data. IndexBuf %p", this);
119 ibo_->free();
120 ibo_ = nullptr;
121 }
122
123 /* Prepare Buffer and Upload Data. */
124 if (ibo_ == nullptr) {
125 alloc_size_ = this->size_get();
126 if (alloc_size_ == 0) {
127 MTL_LOG_WARNING("Warning! Trying to allocate index buffer with size=0 bytes");
128 }
129 else {
130 if (data_) {
132 alloc_size_, true, data_);
133 }
134 else {
135 ibo_ = MTLContext::get_global_memory_manager()->allocate(alloc_size_, true);
136 }
137 BLI_assert(ibo_);
138 ibo_->set_label(@"Index Buffer");
139 }
140
141 /* No need to keep copy of data_ in system memory. */
142 if (data_) {
144 }
145 }
146}
147
148void MTLIndexBuf::update_sub(uint32_t start, uint32_t len, const void *data)
149{
151
152 /* If host-side data still exists, modify and upload as normal */
153 if (data_ != nullptr) {
154
155 /* Free index buffer if one exists. */
156 if (ibo_ != nullptr && !this->is_subrange_) {
157 ibo_->free();
158 ibo_ = nullptr;
159 }
160
162
163 /* Apply start byte offset to data pointer. */
164 void *modified_base_ptr = data_;
165 uint8_t *ptr = static_cast<uint8_t *>(modified_base_ptr);
166 ptr += start;
167 modified_base_ptr = static_cast<void *>(ptr);
168
169 /* Modify host-side data. */
170 memcpy(modified_base_ptr, data, len);
171 return;
172 }
173
174 /* Verify buffer. */
175 BLI_assert(ibo_ != nullptr);
176
177 /* Otherwise, we will inject a data update, using staged data, into the command stream.
178 * Stage update contents in temporary buffer. */
180 BLI_assert(ctx);
182 memcpy(range.data, data, len);
183
184 /* Copy updated contents into primary buffer.
185 * These changes need to be uploaded via blit to ensure the data copies happen in-order. */
186 id<MTLBuffer> dest_buffer = ibo_->get_metal_buffer();
187 BLI_assert(dest_buffer != nil);
188
189 id<MTLBlitCommandEncoder> enc = ctx->main_command_buffer.ensure_begin_blit_encoder();
190 [enc copyFromBuffer:range.metal_buffer
191 sourceOffset:(uint32_t)range.buffer_offset
192 toBuffer:dest_buffer
193 destinationOffset:start
194 size:len];
195
196 /* Synchronize changes back to host to ensure CPU-side data is up-to-date for non
197 * Shared buffers. */
198 if (dest_buffer.storageMode == MTLStorageModeManaged) {
199 [enc synchronizeResource:dest_buffer];
200 }
201
202 /* Invalidate patched/optimized buffers. */
203 this->free_optimized_buffer();
204
205 /* Flag buffer as incompatible with optimized/patched buffers as contents
206 * have partial modifications. */
207 this->flag_can_optimize(false);
208
209 BLI_assert(false);
210}
211
212void MTLIndexBuf::flag_can_optimize(bool can_optimize)
213{
214 can_optimize_ = can_optimize;
215
216 /* NOTE: Index buffer optimization needs to be disabled for Indirect draws, as the index count is
217 * unknown at submission time. However, if the index buffer has already been optimized by a
218 * separate draw pass, errors will occur and these cases need to be resolved at the high-level,
219 * ensuring primitive types without primitive restart are used instead, as these perform far
220 * more optimally on hardware. */
221 BLI_assert_msg(can_optimize_ || (optimized_ibo_ == nullptr),
222 "Index buffer optimization disabled, but optimal buffer already generated.");
223}
224
234/* Returns total vertices in new buffer. */
235template<typename T>
237 MutableSpan<T> output_data,
238 uint32_t input_index_len)
239{
240 /* Generate #TriangleList from #TriangleStrip. */
241 uint32_t current_vert_len = 0;
242 uint32_t current_output_ind = 0;
243 T indices[3];
244
245 for (int c_index = 0; c_index < input_index_len; c_index++) {
246 T current_index = original_data[c_index];
247 if (current_index == T(-1)) {
248 /* Stop current primitive. Move onto next. */
249 current_vert_len = 0;
250 }
251 else {
252 if (current_vert_len < 3) {
253 /* Prepare first triangle.
254 * Cache indices before generating a triangle, in case we have bad primitive-restarts. */
255 indices[current_vert_len] = current_index;
256 }
257
258 /* Emit triangle once we reach 3 input verts in current strip. */
259 if (current_vert_len == 3) {
260 /* First triangle in strip. */
261 output_data[current_output_ind++] = indices[0];
262 output_data[current_output_ind++] = indices[1];
263 output_data[current_output_ind++] = indices[2];
264 }
265 else if (current_vert_len > 3) {
266 /* All other triangles in strip.
267 * These triangles are populated using data from previous 2 vertices
268 * and the latest index. */
269 uint32_t tri_id = current_vert_len - 3;
270 uint32_t base_output_ind = current_output_ind;
271 if ((tri_id % 2) == 0) {
272 output_data[base_output_ind + 0] = output_data[base_output_ind - 2];
273 output_data[base_output_ind + 1] = current_index;
274 output_data[base_output_ind + 2] = output_data[base_output_ind - 1];
275 }
276 else {
277 output_data[base_output_ind + 0] = output_data[base_output_ind - 1];
278 output_data[base_output_ind + 1] = output_data[base_output_ind - 2];
279 output_data[base_output_ind + 2] = current_index;
280 }
281 current_output_ind += 3;
282 }
283
284 /* Increment relative vertex index. */
285 current_vert_len++;
286 }
287 }
288 return current_output_ind;
289}
290
291/* Returns total vertices in new buffer. */
292template<typename T>
294 MutableSpan<T> output_data,
295 uint32_t input_index_len)
296{
297 /* Generate #TriangleList from #TriangleFan. */
298 T base_prim_ind_val = 0;
299 uint32_t current_vert_len = 0;
300 uint32_t current_output_ind = 0;
301 T indices[3];
302
303 for (int c_index = 0; c_index < input_index_len; c_index++) {
304 T current_index = original_data[c_index];
305 if (current_index == T(-1)) {
306 /* Stop current primitive. Move onto next. */
307 current_vert_len = 0;
308 }
309 else {
310 if (current_vert_len < 3) {
311 /* Prepare first triangle.
312 * Cache indices before generating a triangle, in case we have bad primitive-restarts. */
313 indices[current_vert_len] = current_index;
314 }
315
316 /* emit triangle once we reach 3 input verts in current strip. */
317 if (current_vert_len == 3) {
318 /* First triangle in strip. */
319 output_data[current_output_ind++] = indices[0];
320 output_data[current_output_ind++] = indices[1];
321 output_data[current_output_ind++] = indices[2];
322 base_prim_ind_val = indices[0];
323 }
324 else if (current_vert_len > 3) {
325 /* All other triangles in strip.
326 * These triangles are populated using data from previous 2 vertices
327 * and the latest index. */
328 uint32_t base_output_ind = current_output_ind;
329
330 output_data[base_output_ind + 0] = base_prim_ind_val;
331 output_data[base_output_ind + 1] = output_data[base_output_ind - 1];
332 output_data[base_output_ind + 2] = current_index;
333 current_output_ind += 3;
334 }
335
336 /* Increment relative vertex index. */
337 current_vert_len++;
338 }
339 }
340 return current_output_ind;
341}
342
343id<MTLBuffer> MTLIndexBuf::get_index_buffer(GPUPrimType &in_out_primitive_type,
344 uint32_t &in_out_v_count)
345{
346 /* Determine whether to return the original index buffer, or whether we
347 * should emulate an unsupported primitive type, or optimize a restart-
348 * compatible type for faster performance. */
349 bool should_optimize_or_emulate = (in_out_primitive_type == GPU_PRIM_TRI_FAN) ||
350 (in_out_primitive_type == GPU_PRIM_TRI_STRIP);
351 if (!should_optimize_or_emulate || is_subrange_ || !can_optimize_) {
352 /* Ensure we are not optimized. */
353 BLI_assert(this->optimized_ibo_ == nullptr);
354
355 /* Return regular index buffer. */
356 BLI_assert(this->ibo_ && this->ibo_->get_metal_buffer());
357 return this->ibo_->get_metal_buffer();
358 }
359
360 /* Perform optimization on type. */
361 GPUPrimType input_prim_type = in_out_primitive_type;
362 this->upload_data();
363 if (!ibo_ && optimized_ibo_ == nullptr) {
364 /* Cannot optimize buffer if no source IBO exists. */
365 return nil;
366 }
367
368 /* Verify whether existing index buffer is valid. */
369 if (optimized_ibo_ != nullptr && optimized_primitive_type_ != input_prim_type) {
370 BLI_assert_msg(false,
371 "Cannot change the optimized primitive format after generation, as source "
372 "index buffer data is discarded.");
373 return nil;
374 }
375
376 /* Generate optimized index buffer. */
377 if (optimized_ibo_ == nullptr) {
378
379 /* Generate unwrapped index buffer. */
380 switch (input_prim_type) {
381 case GPU_PRIM_TRI_FAN: {
382
383 /* Calculate maximum size. */
384 uint32_t max_possible_verts = (this->index_len_ - 2) * 3;
385 BLI_assert(max_possible_verts > 0);
386
387 /* Allocate new buffer. */
389 max_possible_verts *
390 ((index_type_ == GPU_INDEX_U16) ? sizeof(uint16_t) : sizeof(uint32_t)),
391 true);
392
393 /* Populate new index buffer. */
394 if (index_type_ == GPU_INDEX_U16) {
395 Span<uint16_t> orig_data(static_cast<const uint16_t *>(ibo_->get_host_ptr()),
396 this->index_len_);
397 MutableSpan<uint16_t> output_data(
398 static_cast<uint16_t *>(optimized_ibo_->get_host_ptr()), max_possible_verts);
400 orig_data, output_data, this->index_len_);
401 }
402 else {
403 Span<uint32_t> orig_data(static_cast<const uint32_t *>(ibo_->get_host_ptr()),
404 this->index_len_);
405 MutableSpan<uint32_t> output_data(
406 static_cast<uint32_t *>(optimized_ibo_->get_host_ptr()), max_possible_verts);
408 orig_data, output_data, this->index_len_);
409 }
410
411 BLI_assert(emulated_v_count <= max_possible_verts);
412
413 /* Flush buffer and output. */
414 optimized_ibo_->flush();
415 optimized_primitive_type_ = input_prim_type;
416 in_out_v_count = emulated_v_count;
417 in_out_primitive_type = GPU_PRIM_TRIS;
418 }
419
420 case GPU_PRIM_TRI_STRIP: {
421
422 /* Calculate maximum size. */
423 uint32_t max_possible_verts = (this->index_len_ - 2) * 3;
424 BLI_assert(max_possible_verts > 0);
425
426 /* Allocate new buffer. */
428 max_possible_verts *
429 ((index_type_ == GPU_INDEX_U16) ? sizeof(uint16_t) : sizeof(uint32_t)),
430 true);
431
432 /* Populate new index buffer. */
433 if (index_type_ == GPU_INDEX_U16) {
434 Span<uint16_t> orig_data(static_cast<const uint16_t *>(ibo_->get_host_ptr()),
435 this->index_len_);
436 MutableSpan<uint16_t> output_data(
437 static_cast<uint16_t *>(optimized_ibo_->get_host_ptr()), max_possible_verts);
439 orig_data, output_data, this->index_len_);
440 }
441 else {
442 Span<uint32_t> orig_data(static_cast<const uint32_t *>(ibo_->get_host_ptr()),
443 this->index_len_);
444 MutableSpan<uint32_t> output_data(
445 static_cast<uint32_t *>(optimized_ibo_->get_host_ptr()), max_possible_verts);
447 orig_data, output_data, this->index_len_);
448 }
449
450 BLI_assert(emulated_v_count <= max_possible_verts);
451
452 /* Flush buffer and output. */
453 optimized_ibo_->flush();
454 optimized_primitive_type_ = input_prim_type;
455 in_out_v_count = emulated_v_count;
456 in_out_primitive_type = GPU_PRIM_TRIS;
457 } break;
458
459 case GPU_PRIM_LINE_STRIP: {
460 /* TODO(Metal): Line strip topology types would benefit from optimization to remove
461 * primitive restarts, however, these do not occur frequently, nor with
462 * significant geometry counts. */
463 MTL_LOG_INFO("TODO: Primitive topology: Optimize line strip topology types");
464 } break;
465
466 case GPU_PRIM_LINE_LOOP: {
467 /* TODO(Metal): Line Loop primitive type requires use of optimized index buffer for
468 * emulation, if used with indexed rendering. This path is currently not hit as #LineLoop
469 * does not currently appear to be used alongside an index buffer. */
471 "TODO: Primitive topology: Line Loop Index buffer optimization required for "
472 "emulation.");
473 } break;
474
475 case GPU_PRIM_TRIS:
476 case GPU_PRIM_LINES:
477 case GPU_PRIM_POINTS: {
478 /* Should not get here - TRIS/LINES/POINTS do not require emulation or optimization. */
480 return nil;
481 }
482
483 default:
484 /* Should not get here - Invalid primitive type. */
486 break;
487 }
488 }
489
490 /* Return optimized buffer. */
491 if (optimized_ibo_ != nullptr) {
492
493 /* Delete original buffer if one still exists, as we do no need it. */
494 if (ibo_ != nullptr) {
495 ibo_->free();
496 ibo_ = nullptr;
497 }
498
499 /* Output params. */
500 in_out_v_count = emulated_v_count;
501 in_out_primitive_type = GPU_PRIM_TRIS;
502 return optimized_ibo_->get_metal_buffer();
503 }
504 return nil;
505}
506
507void MTLIndexBuf::strip_restart_indices()
508{
509 /* We remove point buffer primitive restart indices by swapping restart indices
510 * with the first valid index at the end of the index buffer and reducing the
511 * length. Primitive restarts are invalid in Metal for non-restart-compatible
512 * primitive types. We also cannot just use zero unlike for Lines and Triangles,
513 * as we cannot create de-generative point primitives to hide geometry, as each
514 * point is independent.
515 * Instead, we must remove these hidden indices from the index buffer.
516 * NOTE: This happens prior to index squeezing so operate on 32-bit indices. */
517 MutableSpan<uint32_t> uint_idx(static_cast<uint32_t *>(data_), index_len_);
518 for (uint i = 0; i < index_len_; i++) {
519 if (uint_idx[i] == 0xFFFFFFFFu) {
520
521 /* Find swap index at end of index buffer. */
522 int swap_index = -1;
523 for (uint j = index_len_ - 1; j >= i && index_len_ > 0; j--) {
524 /* If end index is restart, just reduce length. */
525 if (uint_idx[j] == 0xFFFFFFFFu) {
526 index_len_--;
527 continue;
528 }
529 /* Otherwise assign swap index. */
530 swap_index = j;
531 break;
532 }
533
534 /* If index_len_ == 0, this means all indices were flagged as hidden, with restart index
535 * values. Hence we will entirely skip the draw. */
536 if (index_len_ > 0) {
537 /* If swap index is not valid, then there were no valid non-restart indices
538 * to swap with. However, the above loop will have removed these indices by
539 * reducing the length of indices. Debug assertions verify that the restart
540 * index is no longer included. */
541 if (swap_index == -1) {
543 }
544 else {
545 /* If we have found an index we can swap with, flip the values.
546 * We also reduce the length. As per above loop, swap_index should
547 * now be outside the index length range. */
548 uint32_t swap_index_value = uint_idx[swap_index];
549 uint_idx[i] = swap_index_value;
550 uint_idx[swap_index] = 0xFFFFFFFFu;
551 index_len_--;
552 BLI_assert(index_len_ <= swap_index);
553 }
554 }
555 }
556 }
557
558#ifndef NDEBUG
559 /* Flag as having been stripped to ensure invalid usage is tracked. */
560 point_restarts_stripped_ = true;
561#endif
562}
563
566} // namespace blender::gpu
#define BLI_assert_unreachable()
Definition BLI_assert.h:97
#define BLI_assert(a)
Definition BLI_assert.h:50
#define BLI_assert_msg(a, msg)
Definition BLI_assert.h:57
MINLINE uint ceil_to_multiple_u(uint a, uint b)
unsigned int uint
GPUPrimType
@ GPU_PRIM_TRI_FAN
@ GPU_PRIM_LINE_LOOP
@ GPU_PRIM_LINES
@ GPU_PRIM_POINTS
@ GPU_PRIM_LINE_STRIP
@ GPU_PRIM_TRI_STRIP
@ GPU_PRIM_TRIS
void GPU_finish()
Definition gpu_state.cc:299
#define MEM_SAFE_FREE(v)
static DBVT_INLINE btScalar size(const btDbvtVolume &a)
Definition btDbvt.cpp:52
GPUIndexBufType index_type_
gpu::MTLBuffer * allocate_with_data(uint64_t size, bool cpu_visible, const void *data=nullptr)
gpu::MTLBuffer * allocate(uint64_t size, bool cpu_visible)
Definition mtl_memory.mm:96
void * get_host_ptr() const
void set_label(NSString *str)
id< MTLBuffer > get_metal_buffer() const
id< MTLBlitCommandEncoder > ensure_begin_blit_encoder()
static MTLContext * get()
MTLCommandBufferManager main_command_buffer
MTLScratchBufferManager & get_scratchbuffer_manager()
static MTLBufferPool * get_global_memory_manager()
void read(uint32_t *data) const override
id< MTLBuffer > get_index_buffer(GPUPrimType &in_out_primitive_type, uint &in_out_v_count)
void bind_as_ssbo(uint32_t binding) override
void update_sub(uint32_t start, uint32_t len, const void *data) override
void flag_can_optimize(bool can_optimize)
MTLTemporaryBuffer scratch_buffer_allocate_range(uint64_t alloc_size)
void bind(int slot) override
int len
#define T
#define MTL_LOG_INFO(info,...)
Definition mtl_debug.hh:51
#define MTL_LOG_WARNING(info,...)
Definition mtl_debug.hh:44
static uint32_t populate_emulated_tri_fan_buf(Span< T > original_data, MutableSpan< T > output_data, uint32_t input_index_len)
static uint32_t populate_optimized_tri_strip_buf(Span< T > original_data, MutableSpan< T > output_data, uint32_t input_index_len)
unsigned short uint16_t
Definition stdint.h:79
unsigned int uint32_t
Definition stdint.h:80
unsigned char uint8_t
Definition stdint.h:78
PointerRNA * ptr
Definition wm_files.cc:4126