Blender V5.0
array_store_rle.cc
Go to the documentation of this file.
1/* SPDX-FileCopyrightText: 2025 Blender Authors
2 *
3 * SPDX-License-Identifier: GPL-2.0-or-later */
4
39
40#include <cstdlib>
41#include <cstring>
42
43#include "MEM_guardedalloc.h"
44
45#include "BLI_assert.h"
46#include "BLI_utildefines.h"
47
48#include "BLI_array_store.h" /* Own include. */
49
50#include "BLI_strict_flags.h" /* IWYU pragma: keep. Keep last. */
51
52/* -------------------------------------------------------------------- */
55
62#define USE_FIND_FASTPATH
63
64static size_t find_byte_not_equal_to(const uint8_t *data,
65 size_t offset,
66 const size_t size,
67 const uint8_t value)
68{
69 BLI_assert(offset <= size);
70
71#ifdef USE_FIND_FASTPATH
72 using fast_int = uintptr_t;
73
74 /* In the case of random data, early exit without entering more involved steps. */
75
76 /* Calculate the minimum size which may use an optimized search. */
77 constexpr size_t min_size_for_fast_path = (
78 /* Pass 1: scans a fixed size. */
79 sizeof(size_t[2]) +
80 /* Pass 2: scans a fixed size but aligns to `fast_int`. */
81 sizeof(size_t) + sizeof(fast_int) +
82 /* Pass 3: trims the end of `data` by `fast_int`
83 * add to ensure there is at least one item to read. */
84 sizeof(fast_int));
85
86 if (LIKELY(size - offset > min_size_for_fast_path)) {
87
88 /* Pass 1: Scan forward with a fixed size to check if an early exit
89 * is needed (this may exit on the first few bytes). */
90 const uint8_t *p = data + offset;
91 const uint8_t *p_end = p + sizeof(size_t[2]);
92 do {
93 if (LIKELY(*p != value)) {
94 return size_t(p - data);
95 }
96 p++;
97 } while (p < p_end);
98 /* `offset` is no longer valid and needs to be updated from `p` before use. */
99
100 /* Pass 2: Scan forward at least `sizeof(size_t)` bytes,
101 * aligned to the next `sizeof(fast_int)` aligned boundary. */
102 p_end = reinterpret_cast<const uint8_t *>(
103 ((uintptr_t(p) + sizeof(size_t) + sizeof(fast_int)) & ~(sizeof(fast_int) - 1)));
104 do {
105 if (LIKELY(*p != value)) {
106 return size_t(p - data);
107 }
108 p++;
109 } while (p < p_end);
110
111 /* Pass 3: Scan forward the `fast_int` aligned chunks (the fast path).
112 * This block is responsible for scanning over large spans of contiguous bytes. */
113
114 /* There are at least `sizeof(size_t[2])` number of bytes all equal.
115 * Use `fast_int` aligned reads for a faster search. */
116 BLI_assert((uintptr_t(p) & (sizeof(fast_int) - 1)) == 0);
117 const fast_int *p_fast = reinterpret_cast<const fast_int *>(p);
118 /* Not aligned, but this doesn't matter as it's only used for comparison. */
119 const fast_int *p_fast_last = reinterpret_cast<const fast_int *>(data +
120 (size - sizeof(fast_int)));
121 BLI_assert(p_fast <= p_fast_last);
122 fast_int value_fast;
123 memset(&value_fast, value, sizeof(value_fast));
124 do {
125 /* Use unlikely given many of the previous bytes match. */
126 if (UNLIKELY(*p_fast != value_fast)) {
127 break;
128 }
129 p_fast++;
130 } while (p_fast <= p_fast_last);
131 offset = size_t(reinterpret_cast<const uint8_t *>(p_fast) - data);
132 /* Perform byte level check with any trailing data. */
133 }
134#endif /* USE_FIND_FASTPATH */
135
136 while ((offset < size) && (value == data[offset])) {
137 offset += 1;
138 }
139 return offset;
140}
141
143
144/* -------------------------------------------------------------------- */
147
148struct RLE_Head {
154 size_t span_size;
155};
156
158 uint8_t _span_size_pad[sizeof(size_t)];
159 size_t value;
160};
161
162struct RLE_Span {
163 uint8_t _span_size_pad[sizeof(size_t)];
164 uint8_t value;
165};
166BLI_STATIC_ASSERT(sizeof(RLE_Span) == sizeof(size_t) + sizeof(uint8_t), "");
167
168struct RLE_Elem {
169 union {
173 };
174};
175
178 size_t links_num;
180 RLE_Elem links[(4096 / sizeof(RLE_Elem)) -
181 (sizeof(RLE_ElemChunk *) + sizeof(size_t) + MEM_SIZE_OVERHEAD)];
182};
184
189
190static void rle_link_chunk_iter_new(RLE_ElemChunk *links_block, RLE_ElemChunkIter *link_block_iter)
191{
192 link_block_iter->iter = links_block;
193 link_block_iter->link_curr = 0;
194}
195
197{
198 RLE_ElemChunk *link_block = link_block_iter->iter;
199 if (link_block_iter->link_curr < link_block->links_num) {
200 return &link_block->links[link_block_iter->link_curr++];
201 }
202 if (link_block->next) {
203 link_block = link_block_iter->iter = link_block->next;
204 link_block_iter->link_curr = 1;
205 return &link_block->links[0];
206 }
207 return nullptr;
208}
209
211{
212 RLE_ElemChunk *link_block = MEM_mallocN<RLE_ElemChunk>(__func__);
213 link_block->next = nullptr;
214 link_block->links_num = 0;
215 return link_block;
216}
217
219{
220 while (RLE_ElemChunk *link_iter = link_block) {
221 link_block = link_iter->next;
222 MEM_freeN(link_iter);
223 }
224}
225
227{
228 RLE_ElemChunk *link_block = *link_block_p;
229 if (UNLIKELY(link_block->links_num == ARRAY_SIZE(link_block->links))) {
230 RLE_ElemChunk *link_block_next = rle_link_chunk_new();
231 link_block->next = link_block_next;
232 link_block = link_block_next;
233 *link_block_p = link_block_next;
234 }
235 return &link_block->links[link_block->links_num++];
236}
237
239
240/* -------------------------------------------------------------------- */
243
244uint8_t *BLI_array_store_rle_encode(const uint8_t *data_dec,
245 const size_t data_dec_len,
246 const size_t data_enc_extra_size,
247 size_t *r_data_enc_len)
248{
249 size_t data_enc_alloc_size = data_enc_extra_size +
250 sizeof(RLE_Literal); /* A single null terminator. */
251
252 /* Notes on the threshold for choosing when to include literal data or RLE encode.
253 * From testing a ~4 million array of booleans.
254 *
255 * Regarding space efficiency:
256 *
257 * - For data with fewer changes: `sizeof(RLE_Literal)` (16 on a 64bit system) is optimal.
258 * The improvement varies, between 5-20%.
259 * - For random data: `sizeof(RLE_Literal) + sizeof(size_t)` (24 on a 64bit system) is optimal.
260 * The improvement is only ~5% though.
261 *
262 * The time difference between each is roughly the same.
263 */
264 constexpr size_t rle_skip_threshold = sizeof(RLE_Literal);
265
266 RLE_ElemChunk *link_blocks = rle_link_chunk_new();
267 RLE_ElemChunk *link_blocks_first = link_blocks;
268
269 /* Re-use results from scanning ahead (as needed). */
270 for (size_t ofs_dec = 0, span_skip_next = 1; ofs_dec < data_dec_len;) {
271 /* Scan ahead to detect the size of the non-RLE span. */
272 size_t ofs_dec_next = ofs_dec + span_skip_next;
273 span_skip_next = 1;
274
275 /* Detect and use the `span` if possible. */
276 uint8_t value_start = data_dec[ofs_dec];
277 ofs_dec_next = find_byte_not_equal_to(data_dec, ofs_dec_next, data_dec_len, value_start);
278
279 RLE_Elem *e = rle_link_chunk_elem_new(&link_blocks);
280 const size_t span = ofs_dec_next - ofs_dec;
281 if (span >= rle_skip_threshold) {
282 /* Catch off by one errors. */
283 BLI_assert(data_dec[ofs_dec] == data_dec[(ofs_dec + span) - 1]);
284 BLI_assert((ofs_dec + span == data_dec_len) ||
285 (data_dec[ofs_dec] != data_dec[(ofs_dec + span)]));
286 e->head.span_size = span;
287 e->span.value = value_start;
288 data_enc_alloc_size += sizeof(RLE_Span);
289 }
290 else {
291 /* A large enough span was not found,
292 * scan ahead to detect the size of the non-RLE span. */
293
294 /* Check the offset isn't at the very end of the array. */
295 size_t ofs_dec_test = ofs_dec_next + 1;
296 if (LIKELY(ofs_dec_test < data_dec_len)) {
297 /* The first value that changed, start searching here. */
298 size_t ofs_dec_test_start = ofs_dec_next;
299 value_start = data_dec[ofs_dec_test_start];
300 while (true) {
301 if (value_start == data_dec[ofs_dec_test]) {
302 ofs_dec_test += 1;
303 const size_t span_test = ofs_dec_test - ofs_dec_test_start;
304 BLI_assert(span_test <= rle_skip_threshold);
305 if (span_test == rle_skip_threshold) {
306 /* Write the span of non-RLE data,
307 * then start scanning the magnitude of the RLE span at the start of the loop. */
308 span_skip_next = span_test;
309 ofs_dec_next = ofs_dec_test_start;
310 break;
311 }
312 }
313 else {
314 BLI_assert(ofs_dec_test - ofs_dec_test_start < rle_skip_threshold);
315 value_start = data_dec[ofs_dec_test];
316 ofs_dec_test_start = ofs_dec_test;
317 ofs_dec_test += 1;
318 }
319
320 if (UNLIKELY(ofs_dec_test == data_dec_len)) {
321 ofs_dec_next = data_dec_len;
322 break;
323 }
324 }
325 }
326 else {
327 ofs_dec_next = data_dec_len;
328 }
329
330 /* Interleave the #RLE_Literal. */
331 const size_t non_rle_span = ofs_dec_next - ofs_dec;
332 e->head.span_size = 0;
333 e->literal.value = non_rle_span;
334 data_enc_alloc_size += sizeof(RLE_Literal) + non_rle_span;
335 }
336
337 ofs_dec = ofs_dec_next;
338 }
339
340 /* Encode RLE and literal data into this flat buffer. */
341 uint8_t *data_enc = MEM_malloc_arrayN<uint8_t>(data_enc_alloc_size, __func__);
342 data_enc += data_enc_extra_size;
343
344 size_t ofs_enc = 0;
345 size_t ofs_dec = 0;
346
347 RLE_ElemChunkIter link_block_iter;
348 rle_link_chunk_iter_new(link_blocks_first, &link_block_iter);
349 while (RLE_Elem *e = rle_link_chunk_iter_step(&link_block_iter)) {
350 BLI_assert(ofs_dec <= data_dec_len);
351
352 if (e->head.span_size) {
353 memcpy(data_enc + ofs_enc, &e->span, sizeof(RLE_Span));
354 ofs_enc += sizeof(RLE_Span);
355 ofs_dec += e->head.span_size;
356 }
357 else {
358 memcpy(data_enc + ofs_enc, &e->literal, sizeof(RLE_Literal));
359 ofs_enc += sizeof(RLE_Literal);
360 BLI_assert(e->literal.value > 0);
361 const size_t non_rle_span = e->literal.value;
362 memcpy(data_enc + ofs_enc, data_dec + ofs_dec, non_rle_span);
363 ofs_enc += non_rle_span;
364 ofs_dec += non_rle_span;
365 }
366 }
367 rle_link_chunk_free_all(link_blocks_first);
368 BLI_assert(data_enc_extra_size + ofs_enc + sizeof(RLE_Literal) == data_enc_alloc_size);
369 BLI_assert(ofs_dec == data_dec_len);
370
371 /* Set the `RLE_Literal` span & value to 0 to terminate. */
372 memset(data_enc + ofs_enc, 0x0, sizeof(RLE_Literal));
373
374 *r_data_enc_len = data_enc_alloc_size - data_enc_extra_size;
375
376 data_enc -= data_enc_extra_size;
377 return data_enc;
378}
379
380void BLI_array_store_rle_decode(const uint8_t *data_enc,
381 const size_t data_enc_len,
382 void *data_dec_v,
383 const size_t data_dec_len)
384{
385 /* NOTE: `data_enc_len` & `data_dec_len` could be omitted.
386 * They're just to ensure data isn't corrupt. */
387 uint8_t *data_dec = reinterpret_cast<uint8_t *>(data_dec_v);
388 size_t ofs_enc = 0;
389 size_t ofs_dec = 0;
390
391 while (true) {
392 /* Copy as this may not be aligned. */
393 RLE_Head e;
394 memcpy(&e, data_enc + ofs_enc, sizeof(RLE_Head));
395 ofs_enc += sizeof(RLE_Head);
396 if (e.span_size != 0) {
397 /* Read #RLE_Span::value directly from memory. */
398 const uint8_t value = *reinterpret_cast<const uint8_t *>(data_enc + ofs_enc);
399 memset(data_dec + ofs_dec, int(value), e.span_size);
400 ofs_enc += sizeof(uint8_t);
401 ofs_dec += e.span_size;
402 }
403 else {
404 /* Read #RLE_Literal::value directly from memory. */
405 size_t non_rle_span;
406 memcpy(&non_rle_span, data_enc + ofs_enc, sizeof(size_t));
407 ofs_enc += sizeof(size_t);
408 if (non_rle_span) {
409 memcpy(data_dec + ofs_dec, data_enc + ofs_enc, non_rle_span);
410 ofs_enc += non_rle_span;
411 ofs_dec += non_rle_span;
412 }
413 else {
414 /* Both are zero - an end-of-buffer signal. */
415 break;
416 }
417 }
418 }
419 BLI_assert(ofs_enc == data_enc_len);
420 BLI_assert(ofs_dec == data_dec_len);
421 UNUSED_VARS_NDEBUG(data_enc_len, data_dec_len);
422}
423
Efficient in-memory storage of multiple similar arrays.
#define BLI_STATIC_ASSERT(a, msg)
Definition BLI_assert.h:83
#define BLI_assert(a)
Definition BLI_assert.h:46
#define ARRAY_SIZE(arr)
#define UNUSED_VARS_NDEBUG(...)
#define UNLIKELY(x)
#define LIKELY(x)
Read Guarded memory(de)allocation.
#define MEM_SIZE_OVERHEAD
static size_t find_byte_not_equal_to(const uint8_t *data, size_t offset, const size_t size, const uint8_t value)
static void rle_link_chunk_free_all(RLE_ElemChunk *link_block)
void BLI_array_store_rle_decode(const uint8_t *data_enc, const size_t data_enc_len, void *data_dec_v, const size_t data_dec_len)
uint8_t * BLI_array_store_rle_encode(const uint8_t *data_dec, const size_t data_dec_len, const size_t data_enc_extra_size, size_t *r_data_enc_len)
static RLE_Elem * rle_link_chunk_iter_step(RLE_ElemChunkIter *link_block_iter)
static RLE_ElemChunk * rle_link_chunk_new()
static void rle_link_chunk_iter_new(RLE_ElemChunk *links_block, RLE_ElemChunkIter *link_block_iter)
static RLE_Elem * rle_link_chunk_elem_new(RLE_ElemChunk **link_block_p)
BMesh const char void * data
ATTR_WARN_UNUSED_RESULT const BMVert const BMEdge * e
static DBVT_INLINE btScalar size(const btDbvtVolume &a)
Definition btDbvt.cpp:52
static const float value_start[NUM_VALUE_KINDS]
void * MEM_mallocN(size_t len, const char *str)
Definition mallocn.cc:128
void * MEM_malloc_arrayN(size_t len, size_t size, const char *str)
Definition mallocn.cc:133
void MEM_freeN(void *vmemh)
Definition mallocn.cc:113
RLE_ElemChunk * iter
RLE_ElemChunk * next
RLE_Elem links[(4096/sizeof(RLE_Elem)) -(sizeof(RLE_ElemChunk *)+sizeof(size_t)+MEM_SIZE_OVERHEAD)]
RLE_Literal literal
RLE_Head head
RLE_Span span
uint8_t _span_size_pad[sizeof(size_t)]
uint8_t _span_size_pad[sizeof(size_t)]