5#ifndef __UTIL_ATOMIC_H__
6#define __UTIL_ATOMIC_H__
13# define atomic_add_and_fetch_float(p, x) atomic_add_and_fetch_fl((p), (x))
14# define atomic_compare_and_swap_float(p, old_val, new_val) \
15 atomic_cas_float((p), (old_val), (new_val))
17# define atomic_fetch_and_inc_uint32(p) atomic_fetch_and_add_uint32((p), 1)
18# define atomic_fetch_and_dec_uint32(p) atomic_fetch_and_add_uint32((p), -1)
20# define CCL_LOCAL_MEM_FENCE 0
21# define ccl_barrier(flags) ((void)0)
25# ifndef __KERNEL_ONEAPI__
26# define atomic_fetch_and_add_uint32_shared atomic_fetch_and_add_uint32
29# if defined(__KERNEL_CUDA__) || defined(__KERNEL_HIP__)
31# define atomic_add_and_fetch_float(p, x) (atomicAdd((float *)(p), (float)(x)) + (float)(x))
33# define atomic_fetch_and_add_uint32(p, x) atomicAdd((unsigned int *)(p), (unsigned int)(x))
34# define atomic_fetch_and_sub_uint32(p, x) atomicSub((unsigned int *)(p), (unsigned int)(x))
35# define atomic_fetch_and_inc_uint32(p) atomic_fetch_and_add_uint32((p), 1)
36# define atomic_fetch_and_dec_uint32(p) atomic_fetch_and_sub_uint32((p), 1)
37# define atomic_fetch_and_or_uint32(p, x) atomicOr((unsigned int *)(p), (unsigned int)(x))
44 unsigned int int_value;
46 } new_value, prev_value,
result;
47 prev_value.float_value = old_val;
48 new_value.float_value = new_val;
49 result.int_value = atomicCAS((
unsigned int *)dest, prev_value.int_value, new_value.int_value);
50 return result.float_value;
53# define CCL_LOCAL_MEM_FENCE
54# define ccl_barrier(flags) __syncthreads()
58# ifdef __KERNEL_METAL__
64# if __METAL_VERSION__ >= 300
65 return atomic_fetch_add_explicit(
66 (
ccl_global atomic_float *)_source, operand, memory_order_relaxed);
72 } new_value, prev_value;
73 prev_value.int_value = atomic_load_explicit(source, memory_order_relaxed);
75 new_value.float_value = prev_value.float_value + operand;
76 }
while (!atomic_compare_exchange_weak_explicit(source,
77 &prev_value.int_value,
80 memory_order_relaxed));
82 return new_value.float_value;
88 return atomic_fetch_add_explicit((device atomic_uint *)p, x, memory_order_relaxed);
93 return atomic_fetch_sub_explicit((device atomic_uint *)p, x, memory_order_relaxed);
98 return atomic_fetch_add_explicit((device atomic_uint *)p, 1, memory_order_relaxed);
103 return atomic_fetch_sub_explicit((device atomic_uint *)p, 1, memory_order_relaxed);
108 return atomic_fetch_or_explicit((device atomic_uint *)p, x, memory_order_relaxed);
113 return atomic_fetch_add_explicit((threadgroup atomic_uint *)p, x, memory_order_relaxed);
118 return atomic_fetch_sub_explicit((threadgroup atomic_uint *)p, x, memory_order_relaxed);
123 return atomic_fetch_add_explicit((threadgroup atomic_uint *)p, 1, memory_order_relaxed);
128 return atomic_fetch_sub_explicit((threadgroup atomic_uint *)p, 1, memory_order_relaxed);
133 return atomic_fetch_or_explicit((threadgroup atomic_uint *)p, x, memory_order_relaxed);
140# if __METAL_VERSION__ >= 300
141 float prev_value = old_val;
142 atomic_compare_exchange_weak_explicit((
ccl_global atomic_float *)dest,
145 memory_order_relaxed,
146 memory_order_relaxed);
151 atomic_compare_exchange_weak_explicit((
ccl_global atomic_int *)dest,
154 memory_order_relaxed,
155 memory_order_relaxed);
160# define atomic_store(p, x) atomic_store_explicit(p, x, memory_order_relaxed)
161# define atomic_fetch(p) atomic_load_explicit(p, memory_order_relaxed)
163# define atomic_store_local(p, x) \
164 atomic_store_explicit((ccl_gpu_shared atomic_int *)p, x, memory_order_relaxed)
165# define atomic_load_local(p) \
166 atomic_load_explicit((ccl_gpu_shared atomic_int *)p, memory_order_relaxed)
168# define CCL_LOCAL_MEM_FENCE mem_flags::mem_threadgroup
169# define ccl_barrier(flags) threadgroup_barrier(flags)
173# ifdef __KERNEL_ONEAPI__
177 sycl::atomic_ref<
float,
178 sycl::memory_order::relaxed,
179 sycl::memory_scope::device,
180 sycl::access::address_space::ext_intel_global_device_space>
182 return atomic.fetch_add(x);
189 sycl::atomic_ref<
float,
190 sycl::memory_order::relaxed,
191 sycl::memory_scope::device,
192 sycl::access::address_space::ext_intel_global_device_space>
194 atomic.compare_exchange_weak(old_val, new_val);
201 sycl::atomic_ref<
unsigned int,
202 sycl::memory_order::relaxed,
203 sycl::memory_scope::device,
204 sycl::access::address_space::ext_intel_global_device_space>
206 return atomic.fetch_add(x);
211 sycl::atomic_ref<
int,
212 sycl::memory_order::relaxed,
213 sycl::memory_scope::device,
214 sycl::access::address_space::ext_intel_global_device_space>
216 return atomic.fetch_add(x);
221 sycl::atomic_ref<
int,
222 sycl::memory_order::relaxed,
223 sycl::memory_scope::device,
224 sycl::access::address_space::local_space>
226 return atomic.fetch_add(x);
232 sycl::atomic_ref<
unsigned int,
233 sycl::memory_order::relaxed,
234 sycl::memory_scope::device,
235 sycl::access::address_space::ext_intel_global_device_space>
237 return atomic.fetch_sub(x);
242 sycl::atomic_ref<
int,
243 sycl::memory_order::relaxed,
244 sycl::memory_scope::device,
245 sycl::access::address_space::ext_intel_global_device_space>
247 return atomic.fetch_sub(x);
262 return atomic_fetch_and_sub_uint32(p, 1);
267 return atomic_fetch_and_sub_uint32(p, 1);
273 sycl::atomic_ref<
unsigned int,
274 sycl::memory_order::relaxed,
275 sycl::memory_scope::device,
276 sycl::access::address_space::ext_intel_global_device_space>
278 return atomic.fetch_or(x);
283 sycl::atomic_ref<
int,
284 sycl::memory_order::relaxed,
285 sycl::memory_scope::device,
286 sycl::access::address_space::ext_intel_global_device_space>
288 return atomic.fetch_or(x);
293 sycl::atomic_ref<
int,
294 sycl::memory_order::relaxed,
295 sycl::memory_scope::device,
296 sycl::access::address_space::local_space>
303 sycl::atomic_ref<
int,
304 sycl::memory_order::relaxed,
305 sycl::memory_scope::device,
306 sycl::access::address_space::local_space>
308 return atomic.load();
#define atomic_fetch_and_dec_uint32(p)
#define atomic_compare_and_swap_float(p, old_val, new_val)
#define atomic_fetch_and_inc_uint32(p)
#define atomic_add_and_fetch_float(p, x)
Provides wrapper around system-specific atomic primitives, and some extensions (faked-atomic operatio...
ATOMIC_INLINE uint32_t atomic_fetch_and_or_uint32(uint32_t *p, uint32_t x)
ATOMIC_INLINE uint32_t atomic_fetch_and_add_uint32(uint32_t *p, uint32_t x)
#define ccl_device_inline
draw_view in_light_buf[] float
draw_view push_constant(Type::INT, "radiance_src") .push_constant(Type capture_info_buf storage_buf(1, Qualifier::READ, "ObjectBounds", "bounds_buf[]") .push_constant(Type draw_view int