Blender V4.3
atomic.h
Go to the documentation of this file.
1/* SPDX-FileCopyrightText: 2014-2022 Blender Foundation
2 *
3 * SPDX-License-Identifier: Apache-2.0 */
4
5#ifndef __UTIL_ATOMIC_H__
6#define __UTIL_ATOMIC_H__
7
8#ifndef __KERNEL_GPU__
9
10/* Using atomic ops header from Blender. */
11# include "atomic_ops.h"
12
13# define atomic_add_and_fetch_float(p, x) atomic_add_and_fetch_fl((p), (x))
14# define atomic_compare_and_swap_float(p, old_val, new_val) \
15 atomic_cas_float((p), (old_val), (new_val))
16
17# define atomic_fetch_and_inc_uint32(p) atomic_fetch_and_add_uint32((p), 1)
18# define atomic_fetch_and_dec_uint32(p) atomic_fetch_and_add_uint32((p), -1)
19
20# define CCL_LOCAL_MEM_FENCE 0
21# define ccl_barrier(flags) ((void)0)
22
23#else /* __KERNEL_GPU__ */
24
25# ifndef __KERNEL_ONEAPI__
26# define atomic_fetch_and_add_uint32_shared atomic_fetch_and_add_uint32
27# endif
28
29# if defined(__KERNEL_CUDA__) || defined(__KERNEL_HIP__)
30
31# define atomic_add_and_fetch_float(p, x) (atomicAdd((float *)(p), (float)(x)) + (float)(x))
32
33# define atomic_fetch_and_add_uint32(p, x) atomicAdd((unsigned int *)(p), (unsigned int)(x))
34# define atomic_fetch_and_sub_uint32(p, x) atomicSub((unsigned int *)(p), (unsigned int)(x))
35# define atomic_fetch_and_inc_uint32(p) atomic_fetch_and_add_uint32((p), 1)
36# define atomic_fetch_and_dec_uint32(p) atomic_fetch_and_sub_uint32((p), 1)
37# define atomic_fetch_and_or_uint32(p, x) atomicOr((unsigned int *)(p), (unsigned int)(x))
38
39ccl_device_inline float atomic_compare_and_swap_float(volatile float *dest,
40 const float old_val,
41 const float new_val)
42{
43 union {
44 unsigned int int_value;
45 float float_value;
46 } new_value, prev_value, result;
47 prev_value.float_value = old_val;
48 new_value.float_value = new_val;
49 result.int_value = atomicCAS((unsigned int *)dest, prev_value.int_value, new_value.int_value);
50 return result.float_value;
51}
52
53# define CCL_LOCAL_MEM_FENCE
54# define ccl_barrier(flags) __syncthreads()
55
56# endif /* __KERNEL_CUDA__ */
57
58# ifdef __KERNEL_METAL__
59
60// global address space versions
61ccl_device_inline float atomic_add_and_fetch_float(volatile ccl_global float *_source,
62 const float operand)
63{
64# if __METAL_VERSION__ >= 300
65 return atomic_fetch_add_explicit(
66 (ccl_global atomic_float *)_source, operand, memory_order_relaxed);
67# else
68 volatile ccl_global atomic_int *source = (ccl_global atomic_int *)_source;
69 union {
70 int int_value;
71 float float_value;
72 } new_value, prev_value;
73 prev_value.int_value = atomic_load_explicit(source, memory_order_relaxed);
74 do {
75 new_value.float_value = prev_value.float_value + operand;
76 } while (!atomic_compare_exchange_weak_explicit(source,
77 &prev_value.int_value,
78 new_value.int_value,
79 memory_order_relaxed,
80 memory_order_relaxed));
81
82 return new_value.float_value;
83# endif
84}
85
86template<class T> ccl_device_inline uint32_t atomic_fetch_and_add_uint32(device T *p, int x)
87{
88 return atomic_fetch_add_explicit((device atomic_uint *)p, x, memory_order_relaxed);
89}
90
91template<class T> ccl_device_inline uint32_t atomic_fetch_and_sub_uint32(device T *p, int x)
92{
93 return atomic_fetch_sub_explicit((device atomic_uint *)p, x, memory_order_relaxed);
94}
95
96template<class T> ccl_device_inline uint32_t atomic_fetch_and_inc_uint32(device T *p)
97{
98 return atomic_fetch_add_explicit((device atomic_uint *)p, 1, memory_order_relaxed);
99}
100
101template<class T> ccl_device_inline uint32_t atomic_fetch_and_dec_uint32(device T *p)
102{
103 return atomic_fetch_sub_explicit((device atomic_uint *)p, 1, memory_order_relaxed);
104}
105
106template<class T> ccl_device_inline uint32_t atomic_fetch_and_or_uint32(device T *p, int x)
107{
108 return atomic_fetch_or_explicit((device atomic_uint *)p, x, memory_order_relaxed);
109}
110
111template<class T> ccl_device_inline uint32_t atomic_fetch_and_add_uint32(threadgroup T *p, int x)
112{
113 return atomic_fetch_add_explicit((threadgroup atomic_uint *)p, x, memory_order_relaxed);
114}
115
116template<class T> ccl_device_inline uint32_t atomic_fetch_and_sub_uint32(threadgroup T *p, int x)
117{
118 return atomic_fetch_sub_explicit((threadgroup atomic_uint *)p, x, memory_order_relaxed);
119}
120
121template<class T> ccl_device_inline uint32_t atomic_fetch_and_inc_uint32(threadgroup T *p)
122{
123 return atomic_fetch_add_explicit((threadgroup atomic_uint *)p, 1, memory_order_relaxed);
124}
125
126template<class T> ccl_device_inline uint32_t atomic_fetch_and_dec_uint32(threadgroup T *p)
127{
128 return atomic_fetch_sub_explicit((threadgroup atomic_uint *)p, 1, memory_order_relaxed);
129}
130
131template<class T> ccl_device_inline uint32_t atomic_fetch_and_or_uint32(threadgroup T *p, int x)
132{
133 return atomic_fetch_or_explicit((threadgroup atomic_uint *)p, x, memory_order_relaxed);
134}
135
137 const float old_val,
138 const float new_val)
139{
140# if __METAL_VERSION__ >= 300
141 float prev_value = old_val;
142 atomic_compare_exchange_weak_explicit((ccl_global atomic_float *)dest,
143 &prev_value,
144 new_val,
145 memory_order_relaxed,
146 memory_order_relaxed);
147 return prev_value;
148# else
149 int prev_value;
150 prev_value = __float_as_int(old_val);
151 atomic_compare_exchange_weak_explicit((ccl_global atomic_int *)dest,
152 &prev_value,
153 __float_as_int(new_val),
154 memory_order_relaxed,
155 memory_order_relaxed);
156 return __int_as_float(prev_value);
157# endif
158}
159
160# define atomic_store(p, x) atomic_store_explicit(p, x, memory_order_relaxed)
161# define atomic_fetch(p) atomic_load_explicit(p, memory_order_relaxed)
162
163# define atomic_store_local(p, x) \
164 atomic_store_explicit((ccl_gpu_shared atomic_int *)p, x, memory_order_relaxed)
165# define atomic_load_local(p) \
166 atomic_load_explicit((ccl_gpu_shared atomic_int *)p, memory_order_relaxed)
167
168# define CCL_LOCAL_MEM_FENCE mem_flags::mem_threadgroup
169# define ccl_barrier(flags) threadgroup_barrier(flags)
170
171# endif /* __KERNEL_METAL__ */
172
173# ifdef __KERNEL_ONEAPI__
174
176{
177 sycl::atomic_ref<float,
178 sycl::memory_order::relaxed,
179 sycl::memory_scope::device,
180 sycl::access::address_space::ext_intel_global_device_space>
181 atomic(*p);
182 return atomic.fetch_add(x);
183}
184
186 float old_val,
187 float new_val)
188{
189 sycl::atomic_ref<float,
190 sycl::memory_order::relaxed,
191 sycl::memory_scope::device,
192 sycl::access::address_space::ext_intel_global_device_space>
193 atomic(*source);
194 atomic.compare_exchange_weak(old_val, new_val);
195 return old_val;
196}
197
198ccl_device_inline unsigned int atomic_fetch_and_add_uint32(ccl_global unsigned int *p,
199 unsigned int x)
200{
201 sycl::atomic_ref<unsigned int,
202 sycl::memory_order::relaxed,
203 sycl::memory_scope::device,
204 sycl::access::address_space::ext_intel_global_device_space>
205 atomic(*p);
206 return atomic.fetch_add(x);
207}
208
210{
211 sycl::atomic_ref<int,
212 sycl::memory_order::relaxed,
213 sycl::memory_scope::device,
214 sycl::access::address_space::ext_intel_global_device_space>
215 atomic(*p);
216 return atomic.fetch_add(x);
217}
218
219ccl_device_inline int atomic_fetch_and_add_uint32_shared(int *p, int x)
220{
221 sycl::atomic_ref<int,
222 sycl::memory_order::relaxed,
223 sycl::memory_scope::device,
224 sycl::access::address_space::local_space>
225 atomic(*p);
226 return atomic.fetch_add(x);
227}
228
229ccl_device_inline unsigned int atomic_fetch_and_sub_uint32(ccl_global unsigned int *p,
230 unsigned int x)
231{
232 sycl::atomic_ref<unsigned int,
233 sycl::memory_order::relaxed,
234 sycl::memory_scope::device,
235 sycl::access::address_space::ext_intel_global_device_space>
236 atomic(*p);
237 return atomic.fetch_sub(x);
238}
239
240ccl_device_inline int atomic_fetch_and_sub_uint32(ccl_global int *p, int x)
241{
242 sycl::atomic_ref<int,
243 sycl::memory_order::relaxed,
244 sycl::memory_scope::device,
245 sycl::access::address_space::ext_intel_global_device_space>
246 atomic(*p);
247 return atomic.fetch_sub(x);
248}
249
250ccl_device_inline unsigned int atomic_fetch_and_inc_uint32(ccl_global unsigned int *p)
251{
252 return atomic_fetch_and_add_uint32(p, 1);
253}
254
256{
257 return atomic_fetch_and_add_uint32(p, 1);
258}
259
260ccl_device_inline unsigned int atomic_fetch_and_dec_uint32(ccl_global unsigned int *p)
261{
262 return atomic_fetch_and_sub_uint32(p, 1);
263}
264
266{
267 return atomic_fetch_and_sub_uint32(p, 1);
268}
269
270ccl_device_inline unsigned int atomic_fetch_and_or_uint32(ccl_global unsigned int *p,
271 unsigned int x)
272{
273 sycl::atomic_ref<unsigned int,
274 sycl::memory_order::relaxed,
275 sycl::memory_scope::device,
276 sycl::access::address_space::ext_intel_global_device_space>
277 atomic(*p);
278 return atomic.fetch_or(x);
279}
280
282{
283 sycl::atomic_ref<int,
284 sycl::memory_order::relaxed,
285 sycl::memory_scope::device,
286 sycl::access::address_space::ext_intel_global_device_space>
287 atomic(*p);
288 return atomic.fetch_or(x);
289}
290
291ccl_device_inline void atomic_store_local(int *p, int x)
292{
293 sycl::atomic_ref<int,
294 sycl::memory_order::relaxed,
295 sycl::memory_scope::device,
296 sycl::access::address_space::local_space>
297 atomic(*p);
298 atomic.store(x);
299}
300
301ccl_device_inline int atomic_load_local(int *p)
302{
303 sycl::atomic_ref<int,
304 sycl::memory_order::relaxed,
305 sycl::memory_scope::device,
306 sycl::access::address_space::local_space>
307 atomic(*p);
308 return atomic.load();
309}
310
311# endif /* __KERNEL_ONEAPI__ */
312
313#endif /* __KERNEL_GPU__ */
314
315#endif /* __UTIL_ATOMIC_H__ */
#define atomic_fetch_and_dec_uint32(p)
Definition atomic.h:18
#define atomic_compare_and_swap_float(p, old_val, new_val)
Definition atomic.h:14
#define atomic_fetch_and_inc_uint32(p)
Definition atomic.h:17
#define atomic_add_and_fetch_float(p, x)
Definition atomic.h:13
Provides wrapper around system-specific atomic primitives, and some extensions (faked-atomic operatio...
ATOMIC_INLINE uint32_t atomic_fetch_and_or_uint32(uint32_t *p, uint32_t x)
ATOMIC_INLINE uint32_t atomic_fetch_and_add_uint32(uint32_t *p, uint32_t x)
#define ccl_device_inline
#define ccl_global
#define __int_as_float(x)
#define __float_as_int(x)
draw_view in_light_buf[] float
draw_view push_constant(Type::INT, "radiance_src") .push_constant(Type capture_info_buf storage_buf(1, Qualifier::READ, "ObjectBounds", "bounds_buf[]") .push_constant(Type draw_view int
unsigned int uint32_t
Definition stdint.h:80