Blender V4.3
atomic_ops_unix.h
Go to the documentation of this file.
1/*
2 * Original code from jemalloc with this license:
3 *
4 * Copyright (C) 2002-2013 Jason Evans <jasone@canonware.com>.
5 * All rights reserved.
6 * Copyright (C) 2007-2012 Mozilla Foundation. All rights reserved.
7 * Copyright (C) 2009-2013 Facebook, Inc. All rights reserved.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions are met:
11 * 1. Redistributions of source code must retain the above copyright notice(s),
12 * this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright notice(s),
14 * this list of conditions and the following disclaimer in the documentation
15 * and/or other materials provided with the distribution.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) ``AS IS'' AND ANY EXPRESS
18 * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
19 * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO
20 * EVENT SHALL THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY DIRECT, INDIRECT,
21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
22 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
23 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
24 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
25 * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
26 * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 * This program is free software; you can redistribute it and/or
28 * modify it under the terms of the GNU General Public License
29 * as published by the Free Software Foundation; either version 2
30 * of the License, or (at your option) any later version.
31 *
32 * This program is distributed in the hope that it will be useful,
33 * but WITHOUT ANY WARRANTY; without even the implied warranty of
34 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
35 * GNU General Public License for more details.
36 *
37 * You should have received a copy of the GNU General Public License
38 * along with this program; if not, write to the Free Software Foundation,
39 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
40 *
41 * The Original Code is adapted from jemalloc.
42 * Modifications Copyright (C) 2016 Blender Foundation
43 */
44
49#ifndef __ATOMIC_OPS_UNIX_H__
50#define __ATOMIC_OPS_UNIX_H__
51
52#include "atomic_ops_utils.h"
53
54#if defined(__arm__) || defined(__riscv)
55/* Attempt to fix compilation error on Debian armel and RISC-V kernels.
56 * Both architectures do have both 32 and 64bit atomics, however
57 * its gcc doesn't have __GCC_HAVE_SYNC_COMPARE_AND_SWAP_n defined.
58 */
59# define JE_FORCE_SYNC_COMPARE_AND_SWAP_1
60# define JE_FORCE_SYNC_COMPARE_AND_SWAP_2
61# define JE_FORCE_SYNC_COMPARE_AND_SWAP_4
62# define JE_FORCE_SYNC_COMPARE_AND_SWAP_8
63#endif
64
65/* Define the `ATOMIC_FORCE_USE_FALLBACK` to force lock-based fallback implementation to be used
66 * (even on platforms where there is native implementation available via compiler.
67 * Useful for development purposes. */
68#undef ATOMIC_FORCE_USE_FALLBACK
69
70/* -------------------------------------------------------------------- */
77typedef struct AtomicSpinLock {
78 volatile int lock;
79
80 /* Pad the structure size to a cache-line, to avoid unwanted sharing with other data. */
81 int pad[32 - sizeof(int)];
82} __attribute__((aligned(32))) AtomicSpinLock;
83
85{
86 while (__sync_lock_test_and_set(&lock->lock, 1)) {
87 while (lock->lock) {
88 }
89 }
90}
91
93{
94 __sync_lock_release(&lock->lock);
95}
96
99/* -------------------------------------------------------------------- */
103/* TODO(sergey): On x64 platform both read and write of a variable aligned to its type size is
104 * atomic, so in theory it is possible to avoid memory barrier and gain performance. The downside
105 * of that would be that it will impose requirement to value which is being operated on. */
106#define __atomic_impl_load_generic(v) (__sync_synchronize(), *(v))
107#define __atomic_impl_store_generic(p, v) \
108 do { \
109 *(p) = (v); \
110 __sync_synchronize(); \
111 } while (0)
112
115/* -------------------------------------------------------------------- */
119/* Global lock, shared by all atomic operations implementations.
120 *
121 * Could be split into per-size locks, although added complexity and being more error-proone does
122 * not seem to worth it for a fall-back implementation. */
124
125#define ATOMIC_LOCKING_OP_AND_FETCH_DEFINE(_type, _op_name, _op) \
126 ATOMIC_INLINE _type##_t atomic_##_op_name##_and_fetch_##_type(_type##_t *p, _type##_t x) \
127 { \
128 atomic_spin_lock(&_atomic_global_lock); \
129 const _type##_t original_value = *(p); \
130 const _type##_t new_value = original_value _op(x); \
131 *(p) = new_value; \
132 atomic_spin_unlock(&_atomic_global_lock); \
133 return new_value; \
134 }
135
136#define ATOMIC_LOCKING_FETCH_AND_OP_DEFINE(_type, _op_name, _op) \
137 ATOMIC_INLINE _type##_t atomic_fetch_and_##_op_name##_##_type(_type##_t *p, _type##_t x) \
138 { \
139 atomic_spin_lock(&_atomic_global_lock); \
140 const _type##_t original_value = *(p); \
141 *(p) = original_value _op(x); \
142 atomic_spin_unlock(&_atomic_global_lock); \
143 return original_value; \
144 }
145
146#define ATOMIC_LOCKING_ADD_AND_FETCH_DEFINE(_type) \
147 ATOMIC_LOCKING_OP_AND_FETCH_DEFINE(_type, add, +)
148
149#define ATOMIC_LOCKING_SUB_AND_FETCH_DEFINE(_type) \
150 ATOMIC_LOCKING_OP_AND_FETCH_DEFINE(_type, sub, -)
151
152#define ATOMIC_LOCKING_FETCH_AND_ADD_DEFINE(_type) \
153 ATOMIC_LOCKING_FETCH_AND_OP_DEFINE(_type, add, +)
154
155#define ATOMIC_LOCKING_FETCH_AND_SUB_DEFINE(_type) \
156 ATOMIC_LOCKING_FETCH_AND_OP_DEFINE(_type, sub, -)
157
158#define ATOMIC_LOCKING_FETCH_AND_OR_DEFINE(_type) ATOMIC_LOCKING_FETCH_AND_OP_DEFINE(_type, or, |)
159
160#define ATOMIC_LOCKING_FETCH_AND_AND_DEFINE(_type) \
161 ATOMIC_LOCKING_FETCH_AND_OP_DEFINE(_type, and, &)
162
163#define ATOMIC_LOCKING_CAS_DEFINE(_type) \
164 ATOMIC_INLINE _type##_t atomic_cas_##_type(_type##_t *v, _type##_t old, _type##_t _new) \
165 { \
166 atomic_spin_lock(&_atomic_global_lock); \
167 const _type##_t original_value = *v; \
168 if (*v == old) { \
169 *v = _new; \
170 } \
171 atomic_spin_unlock(&_atomic_global_lock); \
172 return original_value; \
173 }
174
175#define ATOMIC_LOCKING_LOAD_DEFINE(_type) \
176 ATOMIC_INLINE _type##_t atomic_load_##_type(const _type##_t *v) \
177 { \
178 atomic_spin_lock(&_atomic_global_lock); \
179 const _type##_t value = *v; \
180 atomic_spin_unlock(&_atomic_global_lock); \
181 return value; \
182 }
183
184#define ATOMIC_LOCKING_STORE_DEFINE(_type) \
185 ATOMIC_INLINE void atomic_store_##_type(_type##_t *p, const _type##_t v) \
186 { \
187 atomic_spin_lock(&_atomic_global_lock); \
188 *p = v; \
189 atomic_spin_unlock(&_atomic_global_lock); \
190 }
191
194/* -------------------------------------------------------------------- */
198#if !defined(ATOMIC_FORCE_USE_FALLBACK) && \
199 (defined(__GCC_HAVE_SYNC_COMPARE_AND_SWAP_8) || defined(JE_FORCE_SYNC_COMPARE_AND_SWAP_8))
200/* Unsigned */
202{
203 return __sync_add_and_fetch(p, x);
204}
205
207{
208 return __sync_sub_and_fetch(p, x);
209}
210
212{
213 return __sync_fetch_and_add(p, x);
214}
215
217{
218 return __sync_fetch_and_sub(p, x);
219}
220
222{
223 return __sync_val_compare_and_swap(v, old, _new);
224}
225
227{
228 return __atomic_load_n(v, __ATOMIC_SEQ_CST);
229}
230
232{
233 __atomic_store(p, &v, __ATOMIC_SEQ_CST);
234}
235
236/* Signed */
238{
239 return __sync_add_and_fetch(p, x);
240}
241
243{
244 return __sync_sub_and_fetch(p, x);
245}
246
248{
249 return __sync_fetch_and_add(p, x);
250}
251
253{
254 return __sync_fetch_and_sub(p, x);
255}
256
258{
259 return __sync_val_compare_and_swap(v, old, _new);
260}
261
263{
264 return __atomic_load_n(v, __ATOMIC_SEQ_CST);
265}
266
268{
269 __atomic_store(p, &v, __ATOMIC_SEQ_CST);
270}
271
272#elif !defined(ATOMIC_FORCE_USE_FALLBACK) && (defined(__amd64__) || defined(__x86_64__))
273/* Unsigned */
275{
276 asm volatile("lock; xaddq %0, %1;"
277 : "+r"(x), "=m"(*p) /* Outputs. */
278 : "m"(*p) /* Inputs. */
279 );
280 return x;
281}
282
284{
285 x = (uint64_t)(-(int64_t)x);
286 asm volatile("lock; xaddq %0, %1;"
287 : "+r"(x), "=m"(*p) /* Outputs. */
288 : "m"(*p) /* Inputs. */
289 );
290 return x;
291}
292
294{
295 return atomic_fetch_and_add_uint64(p, x) + x;
296}
297
299{
300 return atomic_fetch_and_sub_uint64(p, x) - x;
301}
302
304{
306 asm volatile("lock; cmpxchgq %2,%1" : "=a"(ret), "+m"(*v) : "r"(_new), "0"(old) : "memory");
307 return ret;
308}
309
311{
313}
314
316{
318}
319
320/* Signed */
322{
323 asm volatile("lock; xaddq %0, %1;"
324 : "+r"(x), "=m"(*p) /* Outputs. */
325 : "m"(*p) /* Inputs. */
326 );
327 return x;
328}
329
331{
332 x = -x;
333 asm volatile("lock; xaddq %0, %1;"
334 : "+r"(x), "=m"(*p) /* Outputs. */
335 : "m"(*p) /* Inputs. */
336 );
337 return x;
338}
339
341{
342 return atomic_fetch_and_add_int64(p, x) + x;
343}
344
346{
347 return atomic_fetch_and_sub_int64(p, x) - x;
348}
349
351{
352 int64_t ret;
353 asm volatile("lock; cmpxchgq %2,%1" : "=a"(ret), "+m"(*v) : "r"(_new), "0"(old) : "memory");
354 return ret;
355}
356
358{
360}
361
363{
365}
366
367#else
368
369/* Unsigned */
370
373
376
378
381
382/* Signed */
385
388
390
393
394#endif
395
398/* -------------------------------------------------------------------- */
402#if !defined(ATOMIC_FORCE_USE_FALLBACK) && \
403 (defined(__GCC_HAVE_SYNC_COMPARE_AND_SWAP_4) || defined(JE_FORCE_SYNC_COMPARE_AND_SWAP_4))
404/* Unsigned */
406{
407 return __sync_add_and_fetch(p, x);
408}
409
411{
412 return __sync_sub_and_fetch(p, x);
413}
414
416{
417 return __sync_val_compare_and_swap(v, old, _new);
418}
419
421{
422 return __atomic_load_n(v, __ATOMIC_SEQ_CST);
423}
424
426{
427 __atomic_store(p, &v, __ATOMIC_SEQ_CST);
428}
429
430/* Signed */
432{
433 return __sync_add_and_fetch(p, x);
434}
435
437{
438 return __sync_sub_and_fetch(p, x);
439}
440
442{
443 return __sync_val_compare_and_swap(v, old, _new);
444}
445
447{
448 return __atomic_load_n(v, __ATOMIC_SEQ_CST);
449}
450
452{
453 __atomic_store(p, &v, __ATOMIC_SEQ_CST);
454}
455
456#elif !defined(ATOMIC_FORCE_USE_FALLBACK) && \
457 (defined(__i386__) || defined(__amd64__) || defined(__x86_64__))
458/* Unsigned */
460{
461 uint32_t ret = x;
462 asm volatile("lock; xaddl %0, %1;"
463 : "+r"(ret), "=m"(*p) /* Outputs. */
464 : "m"(*p) /* Inputs. */
465 );
466 return ret + x;
467}
468
470{
472 asm volatile("lock; xaddl %0, %1;"
473 : "+r"(ret), "=m"(*p) /* Outputs. */
474 : "m"(*p) /* Inputs. */
475 );
476 return ret - x;
477}
478
480{
482 asm volatile("lock; cmpxchgl %2,%1" : "=a"(ret), "+m"(*v) : "r"(_new), "0"(old) : "memory");
483 return ret;
484}
485
487{
488 return __atomic_load_n(v, __ATOMIC_SEQ_CST);
489}
490
492{
493 __atomic_store(p, &v, __ATOMIC_SEQ_CST);
494}
495
496/* Signed */
498{
499 int32_t ret = x;
500 asm volatile("lock; xaddl %0, %1;"
501 : "+r"(ret), "=m"(*p) /* Outputs. */
502 : "m"(*p) /* Inputs. */
503 );
504 return ret + x;
505}
506
508{
509 int32_t ret = -x;
510 asm volatile("lock; xaddl %0, %1;"
511 : "+r"(ret), "=m"(*p) /* Outputs. */
512 : "m"(*p) /* Inputs. */
513 );
514 return ret - x;
515}
516
518{
519 int32_t ret;
520 asm volatile("lock; cmpxchgl %2,%1" : "=a"(ret), "+m"(*v) : "r"(_new), "0"(old) : "memory");
521 return ret;
522}
523
525{
526 return __atomic_load_n(v, __ATOMIC_SEQ_CST);
527}
528
530{
531 __atomic_store(p, &v, __ATOMIC_SEQ_CST);
532}
533
534#else
535
536/* Unsigned */
537
540
542
545
546/* Signed */
547
550
552
555
556#endif
557
558#if !defined(ATOMIC_FORCE_USE_FALLBACK) && \
559 (defined(__GCC_HAVE_SYNC_COMPARE_AND_SWAP_4) || defined(JE_FORCE_SYNC_COMPARE_AND_SWAP_4))
560/* Unsigned */
562{
563 return __sync_fetch_and_add(p, x);
564}
565
567{
568 return __sync_fetch_and_or(p, x);
569}
570
572{
573 return __sync_fetch_and_and(p, x);
574}
575
576/* Signed */
578{
579 return __sync_fetch_and_add(p, x);
580}
581
583{
584 return __sync_fetch_and_or(p, x);
585}
586
588{
589 return __sync_fetch_and_and(p, x);
590}
591
592#else
593
594/* Unsigned */
598
599/* Signed */
603
604#endif
605
608/* -------------------------------------------------------------------- */
612#if !defined(ATOMIC_FORCE_USE_FALLBACK) && \
613 (defined(__GCC_HAVE_SYNC_COMPARE_AND_SWAP_2) || defined(JE_FORCE_SYNC_COMPARE_AND_SWAP_2))
614
615/* Signed */
617{
618 return __sync_fetch_and_and(p, b);
619}
621{
622 return __sync_fetch_and_or(p, b);
623}
624
625#else
626
629
630#endif
631
634/* -------------------------------------------------------------------- */
638#if !defined(ATOMIC_FORCE_USE_FALLBACK) && \
639 (defined(__GCC_HAVE_SYNC_COMPARE_AND_SWAP_1) || defined(JE_FORCE_SYNC_COMPARE_AND_SWAP_1))
640
641/* Unsigned */
643{
644 return __sync_fetch_and_and(p, b);
645}
647{
648 return __sync_fetch_and_or(p, b);
649}
650
651/* Signed */
653{
654 return __sync_fetch_and_and(p, b);
655}
657{
658 return __sync_fetch_and_or(p, b);
659}
660
661#else
662
663/* Unsigned */
666
667/* Signed */
670
671#endif
672
675#undef __atomic_impl_load_generic
676#undef __atomic_impl_store_generic
677
678#undef ATOMIC_LOCKING_OP_AND_FETCH_DEFINE
679#undef ATOMIC_LOCKING_FETCH_AND_OP_DEFINE
680#undef ATOMIC_LOCKING_ADD_AND_FETCH_DEFINE
681#undef ATOMIC_LOCKING_SUB_AND_FETCH_DEFINE
682#undef ATOMIC_LOCKING_FETCH_AND_ADD_DEFINE
683#undef ATOMIC_LOCKING_FETCH_AND_SUB_DEFINE
684#undef ATOMIC_LOCKING_FETCH_AND_OR_DEFINE
685#undef ATOMIC_LOCKING_FETCH_AND_AND_DEFINE
686#undef ATOMIC_LOCKING_CAS_DEFINE
687#undef ATOMIC_LOCKING_LOAD_DEFINE
688#undef ATOMIC_LOCKING_STORE_DEFINE
689
690#endif /* __ATOMIC_OPS_UNIX_H__ */
ATOMIC_INLINE uint32_t atomic_fetch_and_or_uint32(uint32_t *p, uint32_t x)
ATOMIC_INLINE int32_t atomic_add_and_fetch_int32(int32_t *p, int32_t x)
ATOMIC_INLINE int16_t atomic_fetch_and_or_int16(int16_t *p, int16_t b)
ATOMIC_INLINE void atomic_store_uint64(uint64_t *p, uint64_t v)
ATOMIC_INLINE uint64_t atomic_fetch_and_sub_uint64(uint64_t *p, uint64_t x)
ATOMIC_INLINE int64_t atomic_sub_and_fetch_int64(int64_t *p, int64_t x)
ATOMIC_INLINE uint8_t atomic_fetch_and_and_uint8(uint8_t *p, uint8_t b)
ATOMIC_INLINE uint64_t atomic_load_uint64(const uint64_t *v)
ATOMIC_INLINE int64_t atomic_cas_int64(int64_t *v, int64_t old, int64_t _new)
ATOMIC_INLINE uint32_t atomic_fetch_and_add_uint32(uint32_t *p, uint32_t x)
ATOMIC_INLINE uint8_t atomic_fetch_and_or_uint8(uint8_t *p, uint8_t b)
ATOMIC_INLINE int32_t atomic_load_int32(const int32_t *v)
ATOMIC_INLINE int64_t atomic_load_int64(const int64_t *v)
ATOMIC_INLINE int32_t atomic_fetch_and_or_int32(int32_t *p, int32_t x)
ATOMIC_INLINE uint32_t atomic_fetch_and_and_uint32(uint32_t *p, uint32_t x)
ATOMIC_INLINE void atomic_store_int64(int64_t *p, int64_t v)
ATOMIC_INLINE int64_t atomic_fetch_and_add_int64(int64_t *p, int64_t x)
ATOMIC_INLINE uint32_t atomic_add_and_fetch_uint32(uint32_t *p, uint32_t x)
ATOMIC_INLINE void atomic_store_int32(int32_t *p, int32_t v)
ATOMIC_INLINE int32_t atomic_fetch_and_add_int32(int32_t *p, int32_t x)
ATOMIC_INLINE uint64_t atomic_cas_uint64(uint64_t *v, uint64_t old, uint64_t _new)
ATOMIC_INLINE uint64_t atomic_fetch_and_add_uint64(uint64_t *p, uint64_t x)
ATOMIC_INLINE uint32_t atomic_sub_and_fetch_uint32(uint32_t *p, uint32_t x)
ATOMIC_INLINE int16_t atomic_fetch_and_and_int16(int16_t *p, int16_t b)
ATOMIC_INLINE int64_t atomic_add_and_fetch_int64(int64_t *p, int64_t x)
ATOMIC_INLINE uint64_t atomic_add_and_fetch_uint64(uint64_t *p, uint64_t x)
ATOMIC_INLINE uint32_t atomic_load_uint32(const uint32_t *v)
ATOMIC_INLINE uint64_t atomic_sub_and_fetch_uint64(uint64_t *p, uint64_t x)
ATOMIC_INLINE void atomic_store_uint32(uint32_t *p, uint32_t v)
ATOMIC_INLINE int32_t atomic_cas_int32(int32_t *v, int32_t old, int32_t _new)
ATOMIC_INLINE int32_t atomic_fetch_and_and_int32(int32_t *p, int32_t x)
ATOMIC_INLINE int8_t atomic_fetch_and_or_int8(int8_t *p, int8_t b)
ATOMIC_INLINE int32_t atomic_sub_and_fetch_int32(int32_t *p, int32_t x)
ATOMIC_INLINE int8_t atomic_fetch_and_and_int8(int8_t *p, int8_t b)
ATOMIC_INLINE int64_t atomic_fetch_and_sub_int64(int64_t *p, int64_t x)
ATOMIC_INLINE uint32_t atomic_cas_uint32(uint32_t *v, uint32_t old, uint32_t _new)
struct AtomicSpinLock __attribute__((aligned(32))) AtomicSpinLock
ATOMIC_INLINE void atomic_spin_lock(volatile AtomicSpinLock *lock)
ATOMIC_INLINE void atomic_spin_unlock(volatile AtomicSpinLock *lock)
#define ATOMIC_LOCKING_FETCH_AND_AND_DEFINE(_type)
#define ATOMIC_LOCKING_FETCH_AND_SUB_DEFINE(_type)
#define __atomic_impl_load_generic(v)
#define ATOMIC_LOCKING_FETCH_AND_OR_DEFINE(_type)
#define ATOMIC_LOCKING_SUB_AND_FETCH_DEFINE(_type)
static _ATOMIC_MAYBE_UNUSED AtomicSpinLock _atomic_global_lock
#define ATOMIC_LOCKING_LOAD_DEFINE(_type)
#define __atomic_impl_store_generic(p, v)
#define ATOMIC_LOCKING_ADD_AND_FETCH_DEFINE(_type)
#define ATOMIC_LOCKING_FETCH_AND_ADD_DEFINE(_type)
#define ATOMIC_LOCKING_CAS_DEFINE(_type)
#define ATOMIC_LOCKING_STORE_DEFINE(_type)
volatile int lock
#define ATOMIC_INLINE
#define _ATOMIC_MAYBE_UNUSED
ATTR_WARN_UNUSED_RESULT const BMVert * v
local_group_size(16, 16) .push_constant(Type b
draw_view push_constant(Type::INT, "radiance_src") .push_constant(Type capture_info_buf storage_buf(1, Qualifier::READ, "ObjectBounds", "bounds_buf[]") .push_constant(Type draw_view int
return ret
signed short int16_t
Definition stdint.h:76
unsigned int uint32_t
Definition stdint.h:80
__int64 int64_t
Definition stdint.h:89
signed int int32_t
Definition stdint.h:77
unsigned char uint8_t
Definition stdint.h:78
unsigned __int64 uint64_t
Definition stdint.h:90
signed char int8_t
Definition stdint.h:75
int pad[32 - sizeof(int)]
volatile int lock