Blender V4.3
state_flow.h
Go to the documentation of this file.
1/* SPDX-FileCopyrightText: 2011-2022 Blender Foundation
2 *
3 * SPDX-License-Identifier: Apache-2.0 */
4
5#pragma once
6
7#include "kernel/types.h"
8#include "util/atomic.h"
9
11
12/* Control Flow
13 *
14 * Utilities for control flow between kernels. The implementation is different between CPU and
15 * GPU devices. For the latter part of the logic is handled on the host side with wavefronts.
16 *
17 * There is a main path for regular path tracing camera for path tracing. Shadows for next
18 * event estimation branch off from this into their own path, that may be computed in
19 * parallel while the main path continues. Additionally, shading kernels are sorted using
20 * a key for coherence.
21 *
22 * Each kernel on the main path must call one of these functions. These may not be called
23 * multiple times from the same kernel.
24 *
25 * integrator_path_init(kg, state, next_kernel)
26 * integrator_path_next(kg, state, current_kernel, next_kernel)
27 * integrator_path_terminate(kg, state, current_kernel)
28 *
29 * For the shadow path similar functions are used, and again each shadow kernel must call
30 * one of them, and only once.
31 */
32
37
42
43#ifdef __KERNEL_GPU__
44
47 const DeviceKernel next_kernel)
48{
49 atomic_fetch_and_add_uint32(&kernel_integrator_state.queue_counter->num_queued[next_kernel], 1);
50 INTEGRATOR_STATE_WRITE(state, path, queued_kernel) = next_kernel;
51}
52
55 const DeviceKernel current_kernel,
56 const DeviceKernel next_kernel)
57{
58 atomic_fetch_and_sub_uint32(&kernel_integrator_state.queue_counter->num_queued[current_kernel],
59 1);
60 atomic_fetch_and_add_uint32(&kernel_integrator_state.queue_counter->num_queued[next_kernel], 1);
61 INTEGRATOR_STATE_WRITE(state, path, queued_kernel) = next_kernel;
62}
63
66 const DeviceKernel current_kernel)
67{
68 atomic_fetch_and_sub_uint32(&kernel_integrator_state.queue_counter->num_queued[current_kernel],
69 1);
70 INTEGRATOR_STATE_WRITE(state, path, queued_kernel) = 0;
71}
72
74 KernelGlobals kg, IntegratorState state, const DeviceKernel next_kernel, const bool is_ao)
75{
77 &kernel_integrator_state.next_shadow_path_index[0], 1);
78 atomic_fetch_and_add_uint32(&kernel_integrator_state.queue_counter->num_queued[next_kernel], 1);
79 INTEGRATOR_STATE_WRITE(shadow_state, shadow_path, queued_kernel) = next_kernel;
80# ifdef __PATH_GUIDING__
81 INTEGRATOR_STATE_WRITE(shadow_state, shadow_path, path_segment) = nullptr;
82# endif
83 return shadow_state;
84}
85
88 const DeviceKernel current_kernel,
89 const DeviceKernel next_kernel)
90{
91 atomic_fetch_and_sub_uint32(&kernel_integrator_state.queue_counter->num_queued[current_kernel],
92 1);
93 atomic_fetch_and_add_uint32(&kernel_integrator_state.queue_counter->num_queued[next_kernel], 1);
94 INTEGRATOR_STATE_WRITE(state, shadow_path, queued_kernel) = next_kernel;
95}
96
99 const DeviceKernel current_kernel)
100{
101 atomic_fetch_and_sub_uint32(&kernel_integrator_state.queue_counter->num_queued[current_kernel],
102 1);
103 INTEGRATOR_STATE_WRITE(state, shadow_path, queued_kernel) = 0;
104}
105
106/* Sort first by truncated state index (for good locality), then by key (for good coherence). */
107# define INTEGRATOR_SORT_KEY(key, state) \
108 (key + kernel_data.max_shaders * (state / kernel_integrator_state.sort_partition_divisor))
109
112 const DeviceKernel next_kernel,
113 const uint32_t key)
114{
115 const int key_ = INTEGRATOR_SORT_KEY(key, state);
116 atomic_fetch_and_add_uint32(&kernel_integrator_state.queue_counter->num_queued[next_kernel], 1);
117 INTEGRATOR_STATE_WRITE(state, path, queued_kernel) = next_kernel;
118 INTEGRATOR_STATE_WRITE(state, path, shader_sort_key) = key_;
119
120# if defined(__KERNEL_LOCAL_ATOMIC_SORT__)
121 if (!kernel_integrator_state.sort_key_counter[next_kernel]) {
122 return;
123 }
124# endif
125
126 atomic_fetch_and_add_uint32(&kernel_integrator_state.sort_key_counter[next_kernel][key_], 1);
127}
128
131 const DeviceKernel current_kernel,
132 const DeviceKernel next_kernel,
133 const uint32_t key)
134{
135 const int key_ = INTEGRATOR_SORT_KEY(key, state);
136 atomic_fetch_and_sub_uint32(&kernel_integrator_state.queue_counter->num_queued[current_kernel],
137 1);
138 atomic_fetch_and_add_uint32(&kernel_integrator_state.queue_counter->num_queued[next_kernel], 1);
139 INTEGRATOR_STATE_WRITE(state, path, queued_kernel) = next_kernel;
140 INTEGRATOR_STATE_WRITE(state, path, shader_sort_key) = key_;
141
142# if defined(__KERNEL_LOCAL_ATOMIC_SORT__)
143 if (!kernel_integrator_state.sort_key_counter[next_kernel]) {
144 return;
145 }
146# endif
147
148 atomic_fetch_and_add_uint32(&kernel_integrator_state.sort_key_counter[next_kernel][key_], 1);
149}
150
151#else
152
155 const DeviceKernel next_kernel)
156{
157 INTEGRATOR_STATE_WRITE(state, path, queued_kernel) = next_kernel;
158}
159
162 const DeviceKernel next_kernel,
163 const uint32_t key)
164{
165 INTEGRATOR_STATE_WRITE(state, path, queued_kernel) = next_kernel;
166 (void)key;
167}
168
171 const DeviceKernel current_kernel,
172 const DeviceKernel next_kernel)
173{
174 INTEGRATOR_STATE_WRITE(state, path, queued_kernel) = next_kernel;
175 (void)current_kernel;
176}
177
180 const DeviceKernel current_kernel)
181{
182 INTEGRATOR_STATE_WRITE(state, path, queued_kernel) = 0;
183 (void)current_kernel;
184}
185
188 const DeviceKernel current_kernel,
189 const DeviceKernel next_kernel,
190 const uint32_t key)
191{
192 INTEGRATOR_STATE_WRITE(state, path, queued_kernel) = next_kernel;
193 (void)key;
194 (void)current_kernel;
195}
196
198 KernelGlobals kg, IntegratorState state, const DeviceKernel next_kernel, const bool is_ao)
199{
200 IntegratorShadowState shadow_state = (is_ao) ? &state->ao : &state->shadow;
201 INTEGRATOR_STATE_WRITE(shadow_state, shadow_path, queued_kernel) = next_kernel;
202# ifdef __PATH_GUIDING__
203 INTEGRATOR_STATE_WRITE(shadow_state, shadow_path, path_segment) = nullptr;
204# endif
205 return shadow_state;
206}
207
210 const DeviceKernel current_kernel,
211 const DeviceKernel next_kernel)
212{
213 INTEGRATOR_STATE_WRITE(state, shadow_path, queued_kernel) = next_kernel;
214 (void)current_kernel;
215}
216
219 const DeviceKernel current_kernel)
220{
221 INTEGRATOR_STATE_WRITE(state, shadow_path, queued_kernel) = 0;
222 (void)current_kernel;
223}
224
225#endif
226
ATOMIC_INLINE uint32_t atomic_fetch_and_add_uint32(uint32_t *p, uint32_t x)
const KernelGlobalsCPU *ccl_restrict KernelGlobals
#define ccl_device_forceinline
#define CCL_NAMESPACE_END
#define kernel_integrator_state
DeviceKernel
static ulong state[N]
const IntegratorShadowStateCPU *ccl_restrict ConstIntegratorShadowState
Definition state.h:231
IntegratorStateCPU *ccl_restrict IntegratorState
Definition state.h:228
#define INTEGRATOR_STATE_WRITE(state, nested_struct, member)
Definition state.h:236
const IntegratorStateCPU *ccl_restrict ConstIntegratorState
Definition state.h:229
#define INTEGRATOR_STATE(state, nested_struct, member)
Definition state.h:235
IntegratorShadowStateCPU *ccl_restrict IntegratorShadowState
Definition state.h:230
ccl_device_forceinline void integrator_shadow_path_terminate(KernelGlobals kg, IntegratorShadowState state, const DeviceKernel current_kernel)
Definition state_flow.h:217
CCL_NAMESPACE_BEGIN ccl_device_forceinline bool integrator_path_is_terminated(ConstIntegratorState state)
Definition state_flow.h:33
ccl_device_forceinline void integrator_path_next_sorted(KernelGlobals kg, IntegratorState state, const DeviceKernel current_kernel, const DeviceKernel next_kernel, const uint32_t key)
Definition state_flow.h:186
ccl_device_forceinline void integrator_path_terminate(KernelGlobals kg, IntegratorState state, const DeviceKernel current_kernel)
Definition state_flow.h:178
ccl_device_forceinline bool integrator_shadow_path_is_terminated(ConstIntegratorShadowState state)
Definition state_flow.h:38
ccl_device_forceinline void integrator_path_next(KernelGlobals kg, IntegratorState state, const DeviceKernel current_kernel, const DeviceKernel next_kernel)
Definition state_flow.h:169
ccl_device_forceinline void integrator_path_init_sorted(KernelGlobals kg, IntegratorState state, const DeviceKernel next_kernel, const uint32_t key)
Definition state_flow.h:160
ccl_device_forceinline IntegratorShadowState integrator_shadow_path_init(KernelGlobals kg, IntegratorState state, const DeviceKernel next_kernel, const bool is_ao)
Definition state_flow.h:197
ccl_device_forceinline void integrator_shadow_path_next(KernelGlobals kg, IntegratorShadowState state, const DeviceKernel current_kernel, const DeviceKernel next_kernel)
Definition state_flow.h:208
ccl_device_forceinline void integrator_path_init(KernelGlobals kg, IntegratorState state, const DeviceKernel next_kernel)
Definition state_flow.h:153
unsigned int uint32_t
Definition stdint.h:80