Blender V4.3
gpu_index_buffer.cc
Go to the documentation of this file.
1/* SPDX-FileCopyrightText: 2016 by Mike Erwin. All rights reserved.
2 *
3 * SPDX-License-Identifier: GPL-2.0-or-later */
4
11#include "MEM_guardedalloc.h"
12
13#include "BLI_array_utils.hh"
14#include "BLI_math_base.h"
15#include "BLI_utildefines.h"
16
17#include "gpu_backend.hh"
18
19#include "GPU_index_buffer.hh"
20
21#include "GPU_capabilities.hh"
22#include "GPU_compute.hh"
23#include "GPU_platform.hh"
24
25#include <algorithm> /* For `min/max`. */
26#include <cstring>
27
28#define KEEP_SINGLE_COPY 1
29
30/* -------------------------------------------------------------------- */
34using namespace blender;
35using namespace blender::gpu;
36
38 GPUPrimType prim_type,
39 uint index_len,
40 uint vertex_len)
41{
42 builder->max_allowed_index = vertex_len - 1;
43 builder->max_index_len = index_len;
44 builder->index_len = 0; // start empty
45 builder->index_min = UINT32_MAX;
46 builder->index_max = 0;
47 builder->prim_type = prim_type;
48
49#ifdef __APPLE__
50 /* Only encode restart indices for restart-compatible primitive types.
51 * Resolves out-of-bounds read error on macOS. Using 0-index will ensure
52 * degenerative primitives when skipping primitives is required and will
53 * incur no additional performance cost for rendering. */
55 /* We will still use restart-indices for point primitives and then
56 * patch these during IndexBuf::init, as we cannot benefit from degenerative
57 * primitives to eliminate these. */
58 builder->restart_index_value = (is_restart_compatible(prim_type) ||
59 prim_type == GPU_PRIM_POINTS) ?
61 0;
62 }
63 else {
65 }
66#else
68#endif
69 builder->uses_restart_indices = false;
70 builder->data = (uint *)MEM_callocN(builder->max_index_len * sizeof(uint), "IndexBuf data");
71}
72
74 GPUPrimType prim_type,
75 uint prim_len,
76 uint vertex_len)
77{
78 int verts_per_prim = GPU_indexbuf_primitive_len(prim_type);
79#if TRUST_NO_ONE
80 assert(verts_per_prim != -1);
81#endif
82 GPU_indexbuf_init_ex(builder, prim_type, prim_len * uint(verts_per_prim), vertex_len);
83}
84
86{
88 GPU_indexbuf_init_build_on_device(elem_, index_len);
89 return elem_;
90}
91
93{
94 IndexBuf *elem_ = elem;
95 elem_->init_build_on_device(index_len);
96}
97
102
103void GPU_indexbuf_join(GPUIndexBufBuilder *builder_to, const GPUIndexBufBuilder *builder_from)
104{
105 BLI_assert(builder_to->data == builder_from->data);
106 builder_to->index_len = max_uu(builder_to->index_len, builder_from->index_len);
107 builder_to->index_min = min_uu(builder_to->index_min, builder_from->index_min);
108 builder_to->index_max = max_uu(builder_to->index_max, builder_from->index_max);
109}
110
112{
113#if TRUST_NO_ONE
114 assert(builder->data != nullptr);
115 assert(builder->index_len < builder->max_index_len);
116 assert(v <= builder->max_allowed_index);
117#endif
118 builder->data[builder->index_len++] = v;
119 builder->index_min = std::min(builder->index_min, v);
120 builder->index_max = std::max(builder->index_max, v);
121}
122
124{
125#if TRUST_NO_ONE
126 assert(builder->data != nullptr);
127 assert(builder->index_len < builder->max_index_len);
128#endif
129 builder->data[builder->index_len++] = builder->restart_index_value;
130 builder->uses_restart_indices = true;
131}
132
134{
135#if TRUST_NO_ONE
136 assert(builder->prim_type == GPU_PRIM_POINTS);
137#endif
139}
140
142{
143#if TRUST_NO_ONE
144 assert(builder->prim_type == GPU_PRIM_LINES);
145 assert(v1 != v2);
146#endif
149}
150
152{
153#if TRUST_NO_ONE
154 assert(builder->prim_type == GPU_PRIM_TRIS);
155 assert(v1 != v2 && v2 != v3 && v3 != v1);
156#endif
160}
161
163 GPUIndexBufBuilder *builder, uint v1, uint v2, uint v3, uint v4)
164{
165#if TRUST_NO_ONE
166 assert(builder->prim_type == GPU_PRIM_LINES_ADJ);
167 assert(v2 != v3); /* only the line need diff indices */
168#endif
173}
174
176{
178 BLI_assert(elem < builder->max_index_len);
179 builder->data[elem++] = v1;
180 builder->index_min = std::min(builder->index_min, v1);
181 builder->index_max = std::max(builder->index_max, v1);
182 builder->index_len = std::max(builder->index_len, elem);
183}
184
186{
188 BLI_assert(v1 != v2);
189 BLI_assert(v1 <= builder->max_allowed_index);
190 BLI_assert(v2 <= builder->max_allowed_index);
191 BLI_assert((elem + 1) * 2 <= builder->max_index_len);
192 uint idx = elem * 2;
193 builder->data[idx++] = v1;
194 builder->data[idx++] = v2;
195 builder->index_min = std::min({builder->index_min, v1, v2});
196 builder->index_max = std::max({builder->index_max, v1, v2});
197 builder->index_len = std::max(builder->index_len, idx);
198}
199
201{
203 BLI_assert(v1 != v2 && v2 != v3 && v3 != v1);
204 BLI_assert(v1 <= builder->max_allowed_index);
205 BLI_assert(v2 <= builder->max_allowed_index);
206 BLI_assert(v3 <= builder->max_allowed_index);
207 BLI_assert((elem + 1) * 3 <= builder->max_index_len);
208 uint idx = elem * 3;
209 builder->data[idx++] = v1;
210 builder->data[idx++] = v2;
211 builder->data[idx++] = v3;
212
213 builder->index_min = std::min({builder->index_min, v1, v2, v3});
214 builder->index_max = std::max({builder->index_max, v1, v2, v3});
215 builder->index_len = std::max(builder->index_len, idx);
216}
217
219{
221 BLI_assert(elem < builder->max_index_len);
222 builder->data[elem++] = builder->restart_index_value;
223 builder->index_len = std::max(builder->index_len, elem);
224 builder->uses_restart_indices = true;
225}
226
228{
230 BLI_assert((elem + 1) * 2 <= builder->max_index_len);
231 uint idx = elem * 2;
232 builder->data[idx++] = builder->restart_index_value;
233 builder->data[idx++] = builder->restart_index_value;
234 builder->index_len = std::max(builder->index_len, idx);
235 builder->uses_restart_indices = true;
236}
237
239{
241 BLI_assert((elem + 1) * 3 <= builder->max_index_len);
242 uint idx = elem * 3;
243 builder->data[idx++] = builder->restart_index_value;
244 builder->data[idx++] = builder->restart_index_value;
245 builder->data[idx++] = builder->restart_index_value;
246 builder->index_len = std::max(builder->index_len, idx);
247 builder->uses_restart_indices = true;
248}
249
251 uint curves_num,
252 uint verts_per_curve)
253{
254 uint64_t dispatch_x_dim = verts_per_curve;
256 dispatch_x_dim += 1;
257 }
258 uint64_t grid_x, grid_y, grid_z;
259 uint64_t max_grid_x = GPU_max_work_group_count(0), max_grid_y = GPU_max_work_group_count(1),
260 max_grid_z = GPU_max_work_group_count(2);
261 grid_x = min_uu(max_grid_x, (dispatch_x_dim + 15) / 16);
262 grid_y = (curves_num + 15) / 16;
263 if (grid_y <= max_grid_y) {
264 grid_z = 1;
265 }
266 else {
267 grid_y = grid_z = uint64_t(ceil(sqrt(double(grid_y))));
268 grid_y = min_uu(grid_y, max_grid_y);
269 grid_z = min_uu(grid_z, max_grid_z);
270 }
271 bool tris = (prim_type == GPU_PRIM_TRIS);
272 bool lines = (prim_type == GPU_PRIM_LINES);
276 GPU_shader_bind(shader);
277 IndexBuf *ibo = GPU_indexbuf_build_on_device(curves_num * dispatch_x_dim);
278 int resolution;
279 if (tris) {
280 resolution = 6;
281 }
282 else if (lines) {
283 resolution = 2;
284 }
285 else {
286 resolution = 1;
287 }
288 GPU_shader_uniform_1i(shader, "elements_per_curve", dispatch_x_dim / resolution);
289 GPU_shader_uniform_1i(shader, "ncurves", curves_num);
290 GPU_indexbuf_bind_as_ssbo(ibo, GPU_shader_get_ssbo_binding(shader, "out_indices"));
291 GPU_compute_dispatch(shader, grid_x, grid_y, grid_z);
292
295 return ibo;
296}
297
300/* -------------------------------------------------------------------- */
304namespace blender::gpu {
305
307{
308 if (!is_subrange_) {
310 }
311}
312
313void IndexBuf::init(uint indices_len,
314 uint32_t *indices,
315 uint min_index,
316 uint max_index,
317 GPUPrimType prim_type,
318 bool uses_restart_indices)
319{
320 is_init_ = true;
321 data_ = indices;
322 index_start_ = 0;
323 index_len_ = indices_len;
324 is_empty_ = min_index > max_index;
325
326 /* Patch index buffer to remove restart indices from
327 * non-restart-compatible primitive types. Restart indices
328 * are situationally added to selectively hide vertices.
329 * Metal does not support restart-indices for non-restart-compatible
330 * types, as such we should remove these indices.
331 *
332 * We only need to perform this for point primitives, as
333 * line primitives/triangle primitives can use index 0 for all
334 * vertices to create a degenerative primitive, where all
335 * vertices share the same index and skip rendering via HW
336 * culling. */
337 if (prim_type == GPU_PRIM_POINTS && uses_restart_indices) {
338 this->strip_restart_indices();
339 }
340
341#if GPU_TRACK_INDEX_RANGE
342 /* Everything remains 32 bit while building to keep things simple.
343 * Find min/max after, then convert to smallest index type possible. */
344 uint range = min_index < max_index ? max_index - min_index : 0;
345 /* count the primitive restart index. */
346 range += 1;
347
348 if (range <= 0xFFFF) {
350 bool do_clamp_indices = false;
351# ifdef __APPLE__
352 /* NOTE: For the Metal Backend, we use degenerative primitives to hide vertices
353 * which are not restart compatible. When this is done, we need to ensure
354 * that compressed index ranges clamp all index values within the valid
355 * range, rather than maximally clamping against the USHORT restart index
356 * value of 0xFFFFu, as this will cause an out-of-bounds read during
357 * vertex assembly. */
358 do_clamp_indices = GPU_type_matches_ex(
360# endif
361 this->squeeze_indices_short(min_index, max_index, prim_type, do_clamp_indices);
362 }
363#endif
364}
365
367{
368 is_init_ = true;
369 index_start_ = 0;
370 index_len_ = index_len;
372 data_ = nullptr;
373}
374
375void IndexBuf::init_subrange(IndexBuf *elem_src, uint start, uint length)
376{
377 /* We don't support nested sub-ranges. */
378 BLI_assert(elem_src && elem_src->is_subrange_ == false);
379 BLI_assert((length == 0) || (start + length <= elem_src->index_len_));
380
381 is_init_ = true;
382 is_subrange_ = true;
383 src_ = elem_src;
384 index_start_ = start;
386 index_base_ = elem_src->index_base_;
387 index_type_ = elem_src->index_type_;
388}
389
390uint IndexBuf::index_range(uint *r_min, uint *r_max)
391{
392 if (index_len_ == 0) {
393 *r_min = *r_max = 0;
394 return 0;
395 }
396 const uint32_t *uint_idx = (uint32_t *)data_;
397 uint min_value = RESTART_INDEX;
398 uint max_value = 0;
399 for (uint i = 0; i < index_len_; i++) {
400 const uint value = uint_idx[i];
401 if (value == RESTART_INDEX) {
402 continue;
403 }
404 if (value < min_value) {
405 min_value = value;
406 }
407 else if (value > max_value) {
408 max_value = value;
409 }
410 }
411 if (min_value == RESTART_INDEX) {
412 *r_min = *r_max = 0;
413 return 0;
414 }
415 *r_min = min_value;
416 *r_max = max_value;
417 return max_value - min_value;
418}
419
420void IndexBuf::squeeze_indices_short(uint min_idx,
421 uint max_idx,
422 GPUPrimType prim_type,
423 bool clamp_indices_in_range)
424{
425 /* data will never be *larger* than builder->data...
426 * converting in place to avoid extra allocation */
427 uint16_t *ushort_idx = (uint16_t *)data_;
428 const uint32_t *uint_idx = (uint32_t *)data_;
429
430 if (max_idx >= 0xFFFF) {
431 index_base_ = min_idx;
432 /* NOTE: When using restart_index=0 for degenerative primitives indices,
433 * the compressed index will go below zero and wrap around when min_idx > 0.
434 * In order to ensure the resulting index is still within range, we instead
435 * clamp index to the maximum within the index range.
436 *
437 * `clamp_max_idx` represents the maximum possible index to clamp against. If primitive is
438 * restart-compatible, we can just clamp against the primitive-restart value, otherwise, we
439 * must assign to a valid index within the range.
440 *
441 * NOTE: For OpenGL we skip this by disabling clamping, as we still need to use
442 * restart index values for point primitives to disable rendering. */
443 uint16_t clamp_max_idx = (is_restart_compatible(prim_type) || !clamp_indices_in_range) ?
444 0xFFFFu :
445 (max_idx - min_idx);
446 for (uint i = 0; i < index_len_; i++) {
447 ushort_idx[i] = std::min<uint16_t>(clamp_max_idx, uint_idx[i] - min_idx);
448 }
449 }
450 else {
451 index_base_ = 0;
452 for (uint i = 0; i < index_len_; i++) {
453 ushort_idx[i] = uint16_t(uint_idx[i]);
454 }
455 }
456}
457
458} // namespace blender::gpu
459
462/* -------------------------------------------------------------------- */
470
472{
474 GPU_indexbuf_build_in_place(builder, elem);
475 return elem;
476}
477
479{
481 GPU_indexbuf_create_subrange_in_place(elem, elem_src, start, length);
482 return elem;
483}
484
486{
487 BLI_assert(builder->data != nullptr);
488 /* Transfer data ownership to IndexBuf.
489 * It will be uploaded upon first use. */
490 elem->init(builder->index_len,
491 builder->data,
492 builder->index_min,
493 builder->index_max,
494 builder->prim_type,
495 builder->uses_restart_indices);
496 builder->data = nullptr;
497}
498
500 const uint index_min,
501 const uint index_max,
502 const bool uses_restart_indices,
503 IndexBuf *elem)
504{
505 BLI_assert(builder->data != nullptr);
506 /* Transfer data ownership to IndexBuf.
507 * It will be uploaded upon first use. */
508 elem->init(builder->max_index_len,
509 builder->data,
510 index_min,
511 index_max,
512 builder->prim_type,
513 uses_restart_indices);
514 builder->data = nullptr;
515}
516
518 const GPUPrimType prim_type,
519 const uint32_t *data,
520 const int32_t data_len,
521 const int32_t index_min,
522 const int32_t index_max,
523 const bool uses_restart_indices)
524{
525 const uint32_t indices_num = data_len * indices_per_primitive(prim_type);
526 /* TODO: The need for this copy is meant to be temporary. The data should be uploaded directly to
527 * the GPU here rather than copied to an array owned by the IBO first. */
528 uint32_t *copy = static_cast<uint32_t *>(
529 MEM_malloc_arrayN(indices_num, sizeof(uint32_t), __func__));
530 threading::memory_bandwidth_bound_task(sizeof(uint32_t) * indices_num * 2, [&]() {
531 array_utils::copy(Span(data, indices_num), MutableSpan(copy, indices_num));
532 });
533 ibo->init(indices_num, copy, index_min, index_max, prim_type, uses_restart_indices);
534}
535
537 IndexBuf *elem_src,
538 uint start,
539 uint length)
540{
541 elem->init_subrange(elem_src, start, length);
542}
543
545{
546 return elem->read(data);
547}
548
550{
551 delete elem;
552}
553
555{
556 return elem->is_init();
557}
558
560{
561 return indices_per_primitive(prim_type);
562}
563
565{
566 elem->upload_data();
567}
568
569void GPU_indexbuf_bind_as_ssbo(IndexBuf *elem, int binding)
570{
571 elem->bind_as_ssbo(binding);
572}
573
574void GPU_indexbuf_update_sub(IndexBuf *elem, uint start, uint len, const void *data)
575{
576 elem->update_sub(start, len, data);
577}
578
#define BLI_assert(a)
Definition BLI_assert.h:50
sqrt(x)+1/max(0
MINLINE uint min_uu(uint a, uint b)
MINLINE uint max_uu(uint a, uint b)
unsigned int uint
#define ELEM(...)
int GPU_max_work_group_count(int index)
void GPU_compute_dispatch(GPUShader *shader, uint groups_x_len, uint groups_y_len, uint groups_z_len)
void GPU_indexbuf_discard(blender::gpu::IndexBuf *elem)
void GPU_indexbuf_build_in_place_ex(GPUIndexBufBuilder *builder, uint index_min, uint index_max, bool uses_restart_indices, blender::gpu::IndexBuf *elem)
int GPU_indexbuf_primitive_len(GPUPrimType prim_type)
void GPU_indexbuf_create_subrange_in_place(blender::gpu::IndexBuf *elem, blender::gpu::IndexBuf *elem_src, uint start, uint length)
void GPU_indexbuf_bind_as_ssbo(blender::gpu::IndexBuf *elem, int binding)
blender::gpu::IndexBuf * GPU_indexbuf_calloc()
blender::gpu::IndexBuf * GPU_indexbuf_build(GPUIndexBufBuilder *)
void GPU_indexbuf_read(blender::gpu::IndexBuf *elem, uint32_t *data)
void GPU_indexbuf_build_in_place(GPUIndexBufBuilder *, blender::gpu::IndexBuf *)
void GPU_indexbuf_use(blender::gpu::IndexBuf *elem)
void GPU_indexbuf_build_in_place_from_memory(blender::gpu::IndexBuf *ibo, GPUPrimType prim_type, const uint32_t *data, int32_t data_len, int32_t index_min, int32_t index_max, bool uses_restart_indices)
blender::gpu::IndexBuf * GPU_indexbuf_create_subrange(blender::gpu::IndexBuf *elem_src, uint start, uint length)
void GPU_indexbuf_update_sub(blender::gpu::IndexBuf *elem, uint start, uint len, const void *data)
bool GPU_indexbuf_is_init(blender::gpu::IndexBuf *elem)
@ GPU_DRIVER_ANY
bool GPU_type_matches_ex(eGPUDeviceType device, eGPUOSType os, eGPUDriverType driver, eGPUBackendType backend)
@ GPU_OS_MAC
@ GPU_DEVICE_ANY
GPUPrimType
@ GPU_PRIM_LINES
@ GPU_PRIM_POINTS
@ GPU_PRIM_LINES_ADJ
@ GPU_PRIM_LINE_STRIP
@ GPU_PRIM_TRI_STRIP
@ GPU_PRIM_TRIS
bool is_restart_compatible(GPUPrimType type)
void GPU_shader_uniform_1i(GPUShader *sh, const char *name, int value)
int GPU_shader_get_ssbo_binding(GPUShader *shader, const char *name)
void GPU_shader_bind(GPUShader *shader)
void GPU_shader_unbind()
GPUShader * GPU_shader_get_builtin_shader(eGPUBuiltinShader shader)
@ GPU_SHADER_INDEXBUF_TRIS
@ GPU_SHADER_INDEXBUF_LINES
@ GPU_SHADER_INDEXBUF_POINTS
void GPU_memory_barrier(eGPUBarrier barrier)
Definition gpu_state.cc:374
@ GPU_BARRIER_ELEMENT_ARRAY
Definition GPU_state.hh:52
Read Guarded memory(de)allocation.
#define MEM_SAFE_FREE(v)
struct GPUShader GPUShader
ATTR_WARN_UNUSED_RESULT const BMVert * v2
ATTR_WARN_UNUSED_RESULT const BMVert * v
SIMD_FORCE_INLINE btScalar length() const
Return the length of the vector.
Definition btVector3.h:257
static GPUBackend * get()
virtual IndexBuf * indexbuf_alloc()=0
virtual void upload_data()=0
virtual void read(uint32_t *data) const =0
GPUIndexBufType index_type_
virtual void update_sub(uint start, uint len, const void *data)=0
void init_build_on_device(uint index_len)
void init_subrange(IndexBuf *elem_src, uint start, uint length)
virtual void bind_as_ssbo(uint binding)=0
void init(uint indices_len, uint32_t *indices, uint min_index, uint max_index, GPUPrimType prim_type, bool uses_restart_indices)
int len
static ushort indices[]
void GPU_indexbuf_set_point_vert(GPUIndexBufBuilder *builder, uint elem, uint v1)
int GPU_indexbuf_primitive_len(GPUPrimType prim_type)
void GPU_indexbuf_add_generic_vert(GPUIndexBufBuilder *builder, uint v)
void GPU_indexbuf_set_point_restart(GPUIndexBufBuilder *builder, uint elem)
IndexBuf * GPU_indexbuf_build_on_device(uint index_len)
IndexBuf * GPU_indexbuf_build_curves_on_device(GPUPrimType prim_type, uint curves_num, uint verts_per_curve)
IndexBuf * GPU_indexbuf_calloc()
blender::MutableSpan< uint32_t > GPU_indexbuf_get_data(GPUIndexBufBuilder *builder)
void GPU_indexbuf_set_line_verts(GPUIndexBufBuilder *builder, uint elem, uint v1, uint v2)
void GPU_indexbuf_set_tri_verts(GPUIndexBufBuilder *builder, uint elem, uint v1, uint v2, uint v3)
void GPU_indexbuf_set_line_restart(GPUIndexBufBuilder *builder, uint elem)
void GPU_indexbuf_add_line_adj_verts(GPUIndexBufBuilder *builder, uint v1, uint v2, uint v3, uint v4)
void GPU_indexbuf_bind_as_ssbo(IndexBuf *elem, int binding)
void GPU_indexbuf_set_tri_restart(GPUIndexBufBuilder *builder, uint elem)
void GPU_indexbuf_join(GPUIndexBufBuilder *builder_to, const GPUIndexBufBuilder *builder_from)
void GPU_indexbuf_add_primitive_restart(GPUIndexBufBuilder *builder)
void GPU_indexbuf_add_point_vert(GPUIndexBufBuilder *builder, uint v)
void GPU_indexbuf_init_build_on_device(IndexBuf *elem, uint index_len)
void GPU_indexbuf_init_ex(GPUIndexBufBuilder *builder, GPUPrimType prim_type, uint index_len, uint vertex_len)
void GPU_indexbuf_add_tri_verts(GPUIndexBufBuilder *builder, uint v1, uint v2, uint v3)
void GPU_indexbuf_init(GPUIndexBufBuilder *builder, GPUPrimType prim_type, uint prim_len, uint vertex_len)
void GPU_indexbuf_add_line_verts(GPUIndexBufBuilder *builder, uint v1, uint v2)
void *(* MEM_malloc_arrayN)(size_t len, size_t size, const char *str)
Definition mallocn.cc:45
void *(* MEM_callocN)(size_t len, const char *str)
Definition mallocn.cc:42
ccl_device_inline float3 ceil(const float3 a)
void copy(const GVArray &src, GMutableSpan dst, int64_t grain_size=4096)
constexpr uint32_t RESTART_INDEX
int indices_per_primitive(GPUPrimType prim_type)
void memory_bandwidth_bound_task(const int64_t approximate_bytes_touched, const Function &function)
Definition BLI_task.hh:243
static void copy(bNodeTree *dest_ntree, bNode *dest_node, const bNode *src_node)
unsigned short uint16_t
Definition stdint.h:79
unsigned int uint32_t
Definition stdint.h:80
signed int int32_t
Definition stdint.h:77
#define UINT32_MAX
Definition stdint.h:142
unsigned __int64 uint64_t
Definition stdint.h:90