Blender V5.0
gpu_index_buffer.cc
Go to the documentation of this file.
1/* SPDX-FileCopyrightText: 2016 by Mike Erwin. All rights reserved.
2 *
3 * SPDX-License-Identifier: GPL-2.0-or-later */
4
10
11#include "MEM_guardedalloc.h"
12
13#include "BLI_array_utils.hh"
14#include "BLI_math_base.h"
15#include "BLI_utildefines.h"
16
17#include "gpu_backend.hh"
18
19#include "GPU_index_buffer.hh"
20
21#include "GPU_capabilities.hh"
22#include "GPU_compute.hh"
23#include "GPU_platform.hh"
24#include "GPU_state.hh"
25
26#include <algorithm> /* For `min/max`. */
27#include <cstring>
28
29/* -------------------------------------------------------------------- */
32
33using namespace blender;
34using namespace blender::gpu;
35
37 GPUPrimType prim_type,
38 uint index_len,
39 uint vertex_len)
40{
41 builder->max_allowed_index = vertex_len - 1;
42 builder->max_index_len = index_len;
43 builder->index_len = 0; // start empty
44 builder->index_min = UINT32_MAX;
45 builder->index_max = 0;
46 builder->prim_type = prim_type;
47
48#ifdef __APPLE__
49 /* Only encode restart indices for restart-compatible primitive types.
50 * Resolves out-of-bounds read error on macOS. Using 0-index will ensure
51 * degenerative primitives when skipping primitives is required and will
52 * incur no additional performance cost for rendering. */
54 /* We will still use restart-indices for point primitives and then
55 * patch these during IndexBuf::init, as we cannot benefit from degenerative
56 * primitives to eliminate these. */
57 builder->restart_index_value = (is_restart_compatible(prim_type) ||
58 prim_type == GPU_PRIM_POINTS) ?
60 0;
61 }
62 else {
64 }
65#else
67#endif
68 builder->uses_restart_indices = false;
69 builder->data = MEM_malloc_arrayN<uint>(builder->max_index_len, "IndexBuf data");
70}
71
73 GPUPrimType prim_type,
74 uint prim_len,
75 uint vertex_len)
76{
77 int verts_per_prim = GPU_indexbuf_primitive_len(prim_type);
78 BLI_assert(verts_per_prim != -1);
79 GPU_indexbuf_init_ex(builder, prim_type, prim_len * uint(verts_per_prim), vertex_len);
80}
81
83{
85 GPU_indexbuf_init_build_on_device(elem_, index_len);
86 return elem_;
87}
88
90{
91 IndexBuf *elem_ = elem;
92 elem_->init_build_on_device(index_len);
93}
94
99
100void GPU_indexbuf_join(GPUIndexBufBuilder *builder_to, const GPUIndexBufBuilder *builder_from)
101{
102 BLI_assert(builder_to->data == builder_from->data);
103 builder_to->index_len = max_uu(builder_to->index_len, builder_from->index_len);
104 builder_to->index_min = min_uu(builder_to->index_min, builder_from->index_min);
105 builder_to->index_max = max_uu(builder_to->index_max, builder_from->index_max);
106}
107
109{
110 BLI_assert(builder->data != nullptr);
111 BLI_assert(builder->index_len < builder->max_index_len);
112 BLI_assert(v <= builder->max_allowed_index);
113 builder->data[builder->index_len++] = v;
114 builder->index_min = std::min(builder->index_min, v);
115 builder->index_max = std::max(builder->index_max, v);
116}
117
119{
120 BLI_assert(builder->data != nullptr);
121 BLI_assert(builder->index_len < builder->max_index_len);
122 builder->data[builder->index_len++] = builder->restart_index_value;
123 builder->uses_restart_indices = true;
124}
125
131
139
141{
143 BLI_assert(v1 != v2 && v2 != v3 && v3 != v1);
147}
148
150 GPUIndexBufBuilder *builder, uint v1, uint v2, uint v3, uint v4)
151{
153 BLI_assert(v2 != v3); /* only the line need diff indices */
158}
159
161{
163 BLI_assert(elem < builder->max_index_len);
164 builder->data[elem++] = v1;
165 builder->index_min = std::min(builder->index_min, v1);
166 builder->index_max = std::max(builder->index_max, v1);
167 builder->index_len = std::max(builder->index_len, elem);
168}
169
171{
173 BLI_assert(v1 != v2);
174 BLI_assert(v1 <= builder->max_allowed_index);
175 BLI_assert(v2 <= builder->max_allowed_index);
176 BLI_assert((elem + 1) * 2 <= builder->max_index_len);
177 uint idx = elem * 2;
178 builder->data[idx++] = v1;
179 builder->data[idx++] = v2;
180 builder->index_min = std::min({builder->index_min, v1, v2});
181 builder->index_max = std::max({builder->index_max, v1, v2});
182 builder->index_len = std::max(builder->index_len, idx);
183}
184
186{
188 BLI_assert(v1 != v2 && v2 != v3 && v3 != v1);
189 BLI_assert(v1 <= builder->max_allowed_index);
190 BLI_assert(v2 <= builder->max_allowed_index);
191 BLI_assert(v3 <= builder->max_allowed_index);
192 BLI_assert((elem + 1) * 3 <= builder->max_index_len);
193 uint idx = elem * 3;
194 builder->data[idx++] = v1;
195 builder->data[idx++] = v2;
196 builder->data[idx++] = v3;
197
198 builder->index_min = std::min({builder->index_min, v1, v2, v3});
199 builder->index_max = std::max({builder->index_max, v1, v2, v3});
200 builder->index_len = std::max(builder->index_len, idx);
201}
202
204{
206 BLI_assert(elem < builder->max_index_len);
207 builder->data[elem++] = builder->restart_index_value;
208 builder->index_len = std::max(builder->index_len, elem);
209 builder->uses_restart_indices = true;
210}
211
213{
215 BLI_assert((elem + 1) * 2 <= builder->max_index_len);
216 uint idx = elem * 2;
217 builder->data[idx++] = builder->restart_index_value;
218 builder->data[idx++] = builder->restart_index_value;
219 builder->index_len = std::max(builder->index_len, idx);
220 builder->uses_restart_indices = true;
221}
222
224{
226 BLI_assert((elem + 1) * 3 <= builder->max_index_len);
227 uint idx = elem * 3;
228 builder->data[idx++] = builder->restart_index_value;
229 builder->data[idx++] = builder->restart_index_value;
230 builder->data[idx++] = builder->restart_index_value;
231 builder->index_len = std::max(builder->index_len, idx);
232 builder->uses_restart_indices = true;
233}
234
236 uint curves_num,
237 uint verts_per_curve)
238{
239 uint64_t dispatch_x_dim = verts_per_curve;
241 dispatch_x_dim += 1;
242 }
243 uint64_t grid_x, grid_y, grid_z;
244 uint64_t max_grid_x = GPU_max_work_group_count(0), max_grid_y = GPU_max_work_group_count(1),
245 max_grid_z = GPU_max_work_group_count(2);
246 grid_x = min_uu(max_grid_x, (dispatch_x_dim + 15) / 16);
247 grid_y = (curves_num + 15) / 16;
248 if (grid_y <= max_grid_y) {
249 grid_z = 1;
250 }
251 else {
252 grid_y = grid_z = uint64_t(ceil(sqrt(double(grid_y))));
253 grid_y = min_uu(grid_y, max_grid_y);
254 grid_z = min_uu(grid_z, max_grid_z);
255 }
256 bool tris = (prim_type == GPU_PRIM_TRIS);
257 bool lines = (prim_type == GPU_PRIM_LINES);
262 IndexBuf *ibo = GPU_indexbuf_build_on_device(curves_num * dispatch_x_dim);
263 int resolution;
264 if (tris) {
265 resolution = 6;
266 }
267 else if (lines) {
268 resolution = 2;
269 }
270 else {
271 resolution = 1;
272 }
273 GPU_shader_uniform_1i(shader, "elements_per_curve", dispatch_x_dim / resolution);
274 GPU_shader_uniform_1i(shader, "ncurves", curves_num);
276 GPU_compute_dispatch(shader, grid_x, grid_y, grid_z);
277
280 return ibo;
281}
282
284
285/* -------------------------------------------------------------------- */
288
289namespace blender::gpu {
290
292{
293 if (!is_subrange_) {
295 }
296}
297
298void IndexBuf::init(uint indices_len,
299 uint32_t *indices,
300 uint min_index,
301 uint max_index,
302 GPUPrimType prim_type,
303 bool uses_restart_indices)
304{
305 is_init_ = true;
306 data_ = indices;
307 index_start_ = 0;
308 index_len_ = indices_len;
309 is_empty_ = min_index > max_index;
310
311 /* Patch index buffer to remove restart indices from
312 * non-restart-compatible primitive types. Restart indices
313 * are situationally added to selectively hide vertices.
314 * Metal does not support restart-indices for non-restart-compatible
315 * types, as such we should remove these indices.
316 *
317 * We only need to perform this for point primitives, as
318 * line primitives/triangle primitives can use index 0 for all
319 * vertices to create a degenerative primitive, where all
320 * vertices share the same index and skip rendering via HW
321 * culling. */
322 if (prim_type == GPU_PRIM_POINTS && uses_restart_indices) {
323 this->strip_restart_indices();
324 }
325
326#if GPU_TRACK_INDEX_RANGE
327 /* Everything remains 32 bit while building to keep things simple.
328 * Find min/max after, then convert to smallest index type possible. */
329 uint range = min_index < max_index ? max_index - min_index : 0;
330 /* count the primitive restart index. */
331 range += 1;
332
333 if (range <= 0xFFFF) {
335 bool do_clamp_indices = false;
336# ifdef __APPLE__
337 /* NOTE: For the Metal Backend, we use degenerative primitives to hide vertices
338 * which are not restart compatible. When this is done, we need to ensure
339 * that compressed index ranges clamp all index values within the valid
340 * range, rather than maximally clamping against the USHORT restart index
341 * value of 0xFFFFu, as this will cause an out-of-bounds read during
342 * vertex assembly. */
343 do_clamp_indices = GPU_type_matches_ex(
345# endif
346 this->squeeze_indices_short(min_index, max_index, prim_type, do_clamp_indices);
347 }
348#endif
349}
350
352{
353 is_init_ = true;
354 index_start_ = 0;
355 index_len_ = index_len;
357 data_ = nullptr;
358}
359
361{
362 /* We don't support nested sub-ranges. */
363 BLI_assert(elem_src && elem_src->is_subrange_ == false);
365
366 is_init_ = true;
367 is_subrange_ = true;
368 src_ = elem_src;
369 index_start_ = start;
371 index_base_ = elem_src->index_base_;
372 index_type_ = elem_src->index_type_;
373}
374
375void IndexBuf::squeeze_indices_short(uint min_idx,
376 uint max_idx,
377 GPUPrimType prim_type,
378 bool clamp_indices_in_range)
379{
380 /* data will never be *larger* than builder->data...
381 * converting in place to avoid extra allocation */
382 uint16_t *ushort_idx = (uint16_t *)data_;
383 const uint32_t *uint_idx = (uint32_t *)data_;
384
385 if (max_idx >= 0xFFFF) {
386 index_base_ = min_idx;
387 /* NOTE: When using restart_index=0 for degenerative primitives indices,
388 * the compressed index will go below zero and wrap around when min_idx > 0.
389 * In order to ensure the resulting index is still within range, we instead
390 * clamp index to the maximum within the index range.
391 *
392 * `clamp_max_idx` represents the maximum possible index to clamp against. If primitive is
393 * restart-compatible, we can just clamp against the primitive-restart value, otherwise, we
394 * must assign to a valid index within the range.
395 *
396 * NOTE: For OpenGL we skip this by disabling clamping, as we still need to use
397 * restart index values for point primitives to disable rendering. */
398 uint16_t clamp_max_idx = (is_restart_compatible(prim_type) || !clamp_indices_in_range) ?
399 0xFFFFu :
400 (max_idx - min_idx);
401 for (uint i = 0; i < index_len_; i++) {
402 ushort_idx[i] = std::min<uint16_t>(clamp_max_idx, uint_idx[i] - min_idx);
403 }
404 }
405 else {
406 index_base_ = 0;
407 for (uint i = 0; i < index_len_; i++) {
408 ushort_idx[i] = uint16_t(uint_idx[i]);
409 }
410 }
411}
412
413} // namespace blender::gpu
414
416
417/* -------------------------------------------------------------------- */
420
425
427{
429 GPU_indexbuf_build_in_place(builder, elem);
430 return elem;
431}
432
434{
436 GPU_indexbuf_create_subrange_in_place(elem, elem_src, start, length);
437 return elem;
438}
439
441{
442 BLI_assert(builder->data != nullptr);
443 /* Transfer data ownership to IndexBuf.
444 * It will be uploaded upon first use. */
445 elem->init(builder->index_len,
446 builder->data,
447 builder->index_min,
448 builder->index_max,
449 builder->prim_type,
450 builder->uses_restart_indices);
451 builder->data = nullptr;
452}
453
455 const uint index_min,
456 const uint index_max,
457 const bool uses_restart_indices,
458 IndexBuf *elem)
459{
460 BLI_assert(builder->data != nullptr);
461 /* Transfer data ownership to IndexBuf.
462 * It will be uploaded upon first use. */
463 elem->init(builder->max_index_len,
464 builder->data,
465 index_min,
466 index_max,
467 builder->prim_type,
468 uses_restart_indices);
469 builder->data = nullptr;
470}
471
473 const uint index_min,
474 const uint index_max,
475 const bool uses_restart_indices)
476{
478 GPU_indexbuf_build_in_place_ex(builder, index_min, index_max, uses_restart_indices, elem);
479 return elem;
480}
481
483 const uint32_t *data,
484 const int32_t data_len,
485 const int32_t index_min,
486 const int32_t index_max,
487 const bool uses_restart_indices)
488{
489 const uint32_t indices_num = data_len * indices_per_primitive(prim_type);
490 /* TODO: The need for this copy is meant to be temporary. The data should be uploaded directly to
491 * the GPU here rather than copied to an array owned by the IBO first. */
492 uint32_t *copy = MEM_malloc_arrayN<uint32_t>(indices_num, __func__);
493 threading::memory_bandwidth_bound_task(sizeof(uint32_t) * indices_num * 2, [&]() {
494 array_utils::copy(Span(data, indices_num), MutableSpan(copy, indices_num));
495 });
497 ibo->init(indices_num, copy, index_min, index_max, prim_type, uses_restart_indices);
498 return ibo;
499}
500
502 IndexBuf *elem_src,
503 uint start,
504 uint length)
505{
506 elem->init_subrange(elem_src, start, length);
507}
508
509void GPU_indexbuf_read(IndexBuf *elem, uint32_t *data)
510{
511 elem->read(data);
512}
513
515{
516 delete elem;
517}
518
520{
521 return elem->is_init();
522}
523
525{
526 return indices_per_primitive(prim_type);
527}
528
530{
531 elem->upload_data();
532}
533
534void GPU_indexbuf_bind_as_ssbo(IndexBuf *elem, int binding)
535{
536 elem->bind_as_ssbo(binding);
537}
538
539void GPU_indexbuf_update_sub(IndexBuf *elem, uint start, uint len, const void *data)
540{
541 elem->update_sub(start, len, data);
542}
543
#define BLI_assert(a)
Definition BLI_assert.h:46
MINLINE uint min_uu(uint a, uint b)
MINLINE uint max_uu(uint a, uint b)
unsigned int uint
#define ELEM(...)
int GPU_max_work_group_count(int index)
void GPU_compute_dispatch(blender::gpu::Shader *shader, uint groups_x_len, uint groups_y_len, uint groups_z_len, const blender::gpu::shader::SpecializationConstants *constants_state=nullptr)
void GPU_indexbuf_discard(blender::gpu::IndexBuf *elem)
void GPU_indexbuf_build_in_place_ex(GPUIndexBufBuilder *builder, uint index_min, uint index_max, bool uses_restart_indices, blender::gpu::IndexBuf *elem)
int GPU_indexbuf_primitive_len(GPUPrimType prim_type)
void GPU_indexbuf_create_subrange_in_place(blender::gpu::IndexBuf *elem, blender::gpu::IndexBuf *elem_src, uint start, uint length)
blender::gpu::IndexBuf * GPU_indexbuf_build_ex(GPUIndexBufBuilder *builder, uint index_min, uint index_max, bool uses_restart_indices)
void GPU_indexbuf_bind_as_ssbo(blender::gpu::IndexBuf *elem, int binding)
blender::gpu::IndexBuf * GPU_indexbuf_calloc()
blender::gpu::IndexBuf * GPU_indexbuf_build(GPUIndexBufBuilder *)
void GPU_indexbuf_read(blender::gpu::IndexBuf *elem, uint32_t *data)
void GPU_indexbuf_build_in_place(GPUIndexBufBuilder *, blender::gpu::IndexBuf *)
void GPU_indexbuf_use(blender::gpu::IndexBuf *elem)
blender::gpu::IndexBuf * GPU_indexbuf_create_subrange(blender::gpu::IndexBuf *elem_src, uint start, uint length)
void GPU_indexbuf_update_sub(blender::gpu::IndexBuf *elem, uint start, uint len, const void *data)
blender::gpu::IndexBuf * GPU_indexbuf_build_from_memory(GPUPrimType prim_type, const uint32_t *data, int32_t data_len, int32_t index_min, int32_t index_max, bool uses_restart_indices)
bool GPU_indexbuf_is_init(blender::gpu::IndexBuf *elem)
@ GPU_DEVICE_ANY
@ GPU_DRIVER_ANY
@ GPU_OS_MAC
bool GPU_type_matches_ex(GPUDeviceType device, GPUOSType os, GPUDriverType driver, GPUBackendType backend)
GPUPrimType
@ GPU_PRIM_LINES
@ GPU_PRIM_POINTS
@ GPU_PRIM_LINES_ADJ
@ GPU_PRIM_LINE_STRIP
@ GPU_PRIM_TRI_STRIP
@ GPU_PRIM_TRIS
bool is_restart_compatible(GPUPrimType type)
void GPU_shader_bind(blender::gpu::Shader *shader, const blender::gpu::shader::SpecializationConstants *constants_state=nullptr)
int GPU_shader_get_ssbo_binding(blender::gpu::Shader *shader, const char *name)
void GPU_shader_uniform_1i(blender::gpu::Shader *sh, const char *name, int value)
void GPU_shader_unbind()
blender::gpu::Shader * GPU_shader_get_builtin_shader(GPUBuiltinShader shader)
@ GPU_SHADER_INDEXBUF_TRIS
@ GPU_SHADER_INDEXBUF_LINES
@ GPU_SHADER_INDEXBUF_POINTS
@ GPU_BARRIER_ELEMENT_ARRAY
Definition GPU_state.hh:52
void GPU_memory_barrier(GPUBarrier barrier)
Definition gpu_state.cc:326
Read Guarded memory(de)allocation.
#define MEM_SAFE_FREE(v)
BMesh const char void * data
ATTR_WARN_UNUSED_RESULT const BMVert * v2
ATTR_WARN_UNUSED_RESULT const BMVert * v
unsigned long long int uint64_t
SIMD_FORCE_INLINE btScalar length() const
Return the length of the vector.
Definition btVector3.h:257
static GPUBackend * get()
virtual IndexBuf * indexbuf_alloc()=0
virtual void upload_data()=0
virtual void read(uint32_t *data) const =0
GPUIndexBufType index_type_
virtual void update_sub(uint start, uint len, const void *data)=0
void init_build_on_device(uint index_len)
void init_subrange(IndexBuf *elem_src, uint start, uint length)
virtual void bind_as_ssbo(uint binding)=0
void init(uint indices_len, uint32_t *indices, uint min_index, uint max_index, GPUPrimType prim_type, bool uses_restart_indices)
static ushort indices[]
#define UINT32_MAX
void GPU_indexbuf_set_point_vert(GPUIndexBufBuilder *builder, uint elem, uint v1)
int GPU_indexbuf_primitive_len(GPUPrimType prim_type)
void GPU_indexbuf_add_generic_vert(GPUIndexBufBuilder *builder, uint v)
void GPU_indexbuf_set_point_restart(GPUIndexBufBuilder *builder, uint elem)
IndexBuf * GPU_indexbuf_build_on_device(uint index_len)
IndexBuf * GPU_indexbuf_build_curves_on_device(GPUPrimType prim_type, uint curves_num, uint verts_per_curve)
IndexBuf * GPU_indexbuf_calloc()
blender::MutableSpan< uint32_t > GPU_indexbuf_get_data(GPUIndexBufBuilder *builder)
void GPU_indexbuf_set_line_verts(GPUIndexBufBuilder *builder, uint elem, uint v1, uint v2)
void GPU_indexbuf_set_tri_verts(GPUIndexBufBuilder *builder, uint elem, uint v1, uint v2, uint v3)
void GPU_indexbuf_set_line_restart(GPUIndexBufBuilder *builder, uint elem)
void GPU_indexbuf_add_line_adj_verts(GPUIndexBufBuilder *builder, uint v1, uint v2, uint v3, uint v4)
void GPU_indexbuf_bind_as_ssbo(IndexBuf *elem, int binding)
void GPU_indexbuf_set_tri_restart(GPUIndexBufBuilder *builder, uint elem)
void GPU_indexbuf_join(GPUIndexBufBuilder *builder_to, const GPUIndexBufBuilder *builder_from)
void GPU_indexbuf_add_primitive_restart(GPUIndexBufBuilder *builder)
void GPU_indexbuf_add_point_vert(GPUIndexBufBuilder *builder, uint v)
void GPU_indexbuf_init_build_on_device(IndexBuf *elem, uint index_len)
void GPU_indexbuf_init_ex(GPUIndexBufBuilder *builder, GPUPrimType prim_type, uint index_len, uint vertex_len)
void GPU_indexbuf_add_tri_verts(GPUIndexBufBuilder *builder, uint v1, uint v2, uint v3)
void GPU_indexbuf_init(GPUIndexBufBuilder *builder, GPUPrimType prim_type, uint prim_len, uint vertex_len)
void GPU_indexbuf_add_line_verts(GPUIndexBufBuilder *builder, uint v1, uint v2)
#define ceil
#define sqrt
float length(VecOp< float, D >) RET
void * MEM_malloc_arrayN(size_t len, size_t size, const char *str)
Definition mallocn.cc:133
void copy(const GVArray &src, GMutableSpan dst, int64_t grain_size=4096)
constexpr uint32_t RESTART_INDEX
int indices_per_primitive(GPUPrimType prim_type)
void memory_bandwidth_bound_task(const int64_t approximate_bytes_touched, const Function &function)
Definition BLI_task.hh:265
static void copy(bNodeTree *dest_ntree, bNode *dest_node, const bNode *src_node)
i
Definition text_draw.cc:230
uint len