Blender V5.0
state_flow.h
Go to the documentation of this file.
1/* SPDX-FileCopyrightText: 2011-2022 Blender Foundation
2 *
3 * SPDX-License-Identifier: Apache-2.0 */
4
5#pragma once
6
7#include "kernel/globals.h"
8#include "kernel/types.h"
9
10#include "kernel/film/write.h"
11
13
14#ifdef __KERNEL_GPU__
15# include "util/atomic.h"
16#endif
17
19
20/* Control Flow
21 *
22 * Utilities for control flow between kernels. The implementation is different between CPU and
23 * GPU devices. For the latter part of the logic is handled on the host side with wavefronts.
24 *
25 * There is a main path for regular path tracing camera for path tracing. Shadows for next
26 * event estimation branch off from this into their own path, that may be computed in
27 * parallel while the main path continues. Additionally, shading kernels are sorted using
28 * a key for coherence.
29 *
30 * Each kernel on the main path must call one of these functions. These may not be called
31 * multiple times from the same kernel.
32 *
33 * integrator_path_init(state, next_kernel)
34 * integrator_path_next(state, current_kernel, next_kernel)
35 * integrator_path_terminate(state, current_kernel)
36 *
37 * For the shadow path similar functions are used, and again each shadow kernel must call
38 * one of them, and only once.
39 */
40
45
50
54{
55 if (!render_buffer) {
56 return;
57 }
58
60 kernel_assert(kernel_data.film.pass_volume_majorant != PASS_UNUSED);
61
62 const float optical_depth = INTEGRATOR_STATE(state, path, optical_depth);
64 film_write_pass_float(buffer + kernel_data.film.pass_volume_majorant, optical_depth);
65 film_write_pass_float(buffer + kernel_data.film.pass_volume_majorant_sample_count, 1.0f);
66 }
67}
68
69#ifdef __KERNEL_GPU__
70
72 const DeviceKernel next_kernel)
73{
74 atomic_fetch_and_add_uint32(&kernel_integrator_state.queue_counter->num_queued[next_kernel], 1);
75 INTEGRATOR_STATE_WRITE(state, path, queued_kernel) = next_kernel;
76}
77
79 const DeviceKernel current_kernel,
80 const DeviceKernel next_kernel)
81{
82 atomic_fetch_and_sub_uint32(&kernel_integrator_state.queue_counter->num_queued[current_kernel],
83 1);
84 atomic_fetch_and_add_uint32(&kernel_integrator_state.queue_counter->num_queued[next_kernel], 1);
85 INTEGRATOR_STATE_WRITE(state, path, queued_kernel) = next_kernel;
86}
87
91 const DeviceKernel current_kernel)
92{
94
95 atomic_fetch_and_sub_uint32(&kernel_integrator_state.queue_counter->num_queued[current_kernel],
96 1);
97 INTEGRATOR_STATE_WRITE(state, path, queued_kernel) = 0;
98}
99
101 KernelGlobals kg, IntegratorState state, const DeviceKernel next_kernel, const bool is_ao)
102{
104 &kernel_integrator_state.next_shadow_path_index[0], 1);
105 atomic_fetch_and_add_uint32(&kernel_integrator_state.queue_counter->num_queued[next_kernel], 1);
106 INTEGRATOR_STATE_WRITE(shadow_state, shadow_path, queued_kernel) = next_kernel;
107# if defined(__PATH_GUIDING__)
108 if ((kernel_data.kernel_features & KERNEL_FEATURE_PATH_GUIDING)) {
109 INTEGRATOR_STATE_WRITE(shadow_state, shadow_path, path_segment) = nullptr;
110 }
111# endif
112 return shadow_state;
113}
114
116 const DeviceKernel current_kernel,
117 const DeviceKernel next_kernel)
118{
119 atomic_fetch_and_sub_uint32(&kernel_integrator_state.queue_counter->num_queued[current_kernel],
120 1);
121 atomic_fetch_and_add_uint32(&kernel_integrator_state.queue_counter->num_queued[next_kernel], 1);
122 INTEGRATOR_STATE_WRITE(state, shadow_path, queued_kernel) = next_kernel;
123}
124
126 const DeviceKernel current_kernel)
127{
128 atomic_fetch_and_sub_uint32(&kernel_integrator_state.queue_counter->num_queued[current_kernel],
129 1);
130 INTEGRATOR_STATE_WRITE(state, shadow_path, queued_kernel) = 0;
131}
132
133/* Sort first by truncated state index (for good locality), then by key (for good coherence). */
134# define INTEGRATOR_SORT_KEY(key, state) \
135 (key + kernel_data.max_shaders * (state / kernel_integrator_state.sort_partition_divisor))
136
139 const DeviceKernel next_kernel,
140 const uint32_t key)
141{
142 const int key_ = INTEGRATOR_SORT_KEY(key, state);
143 atomic_fetch_and_add_uint32(&kernel_integrator_state.queue_counter->num_queued[next_kernel], 1);
144 INTEGRATOR_STATE_WRITE(state, path, queued_kernel) = next_kernel;
145 INTEGRATOR_STATE_WRITE(state, path, shader_sort_key) = key_;
146
147# if defined(__KERNEL_LOCAL_ATOMIC_SORT__)
148 if (!kernel_integrator_state.sort_key_counter[next_kernel]) {
149 return;
150 }
151# endif
152
153 atomic_fetch_and_add_uint32(&kernel_integrator_state.sort_key_counter[next_kernel][key_], 1);
154}
155
158 const DeviceKernel current_kernel,
159 const DeviceKernel next_kernel,
160 const uint32_t key)
161{
162 const int key_ = INTEGRATOR_SORT_KEY(key, state);
163 atomic_fetch_and_sub_uint32(&kernel_integrator_state.queue_counter->num_queued[current_kernel],
164 1);
165 atomic_fetch_and_add_uint32(&kernel_integrator_state.queue_counter->num_queued[next_kernel], 1);
166 INTEGRATOR_STATE_WRITE(state, path, queued_kernel) = next_kernel;
167 INTEGRATOR_STATE_WRITE(state, path, shader_sort_key) = key_;
168
169# if defined(__KERNEL_LOCAL_ATOMIC_SORT__)
170 if (!kernel_integrator_state.sort_key_counter[next_kernel]) {
171 return;
172 }
173# endif
174
175 atomic_fetch_and_add_uint32(&kernel_integrator_state.sort_key_counter[next_kernel][key_], 1);
176}
177
178#else
179
181 const DeviceKernel next_kernel)
182{
183 INTEGRATOR_STATE_WRITE(state, path, queued_kernel) = next_kernel;
184}
185
188 const DeviceKernel next_kernel,
189 const uint32_t key)
190{
191 INTEGRATOR_STATE_WRITE(state, path, queued_kernel) = next_kernel;
192 (void)key;
193}
194
196 const DeviceKernel current_kernel,
197 const DeviceKernel next_kernel)
198{
199 INTEGRATOR_STATE_WRITE(state, path, queued_kernel) = next_kernel;
200 (void)current_kernel;
201}
202
206 const DeviceKernel current_kernel)
207{
209
210 INTEGRATOR_STATE_WRITE(state, path, queued_kernel) = 0;
211 (void)current_kernel;
212}
213
216 const DeviceKernel current_kernel,
217 const DeviceKernel next_kernel,
218 const uint32_t key)
219{
220 INTEGRATOR_STATE_WRITE(state, path, queued_kernel) = next_kernel;
221 (void)key;
222 (void)current_kernel;
223}
224
226 KernelGlobals kg, IntegratorState state, const DeviceKernel next_kernel, const bool is_ao)
227{
228 IntegratorShadowState shadow_state = (is_ao) ? &state->ao : &state->shadow;
229 INTEGRATOR_STATE_WRITE(shadow_state, shadow_path, queued_kernel) = next_kernel;
230# if defined(__PATH_GUIDING__)
231 if ((kernel_data.kernel_features & KERNEL_FEATURE_PATH_GUIDING)) {
232 INTEGRATOR_STATE_WRITE(shadow_state, shadow_path, path_segment) = nullptr;
233 }
234# endif
235 return shadow_state;
236}
237
239 const DeviceKernel current_kernel,
240 const DeviceKernel next_kernel)
241{
242 INTEGRATOR_STATE_WRITE(state, shadow_path, queued_kernel) = next_kernel;
243 (void)current_kernel;
244}
245
247 const DeviceKernel current_kernel)
248{
249 INTEGRATOR_STATE_WRITE(state, shadow_path, queued_kernel) = 0;
250 (void)current_kernel;
251}
252
253#endif
254
ATOMIC_INLINE uint32_t atomic_fetch_and_add_uint32(uint32_t *p, uint32_t x)
#define kernel_assert(cond)
#define kernel_data
#define PASS_UNUSED
#define ccl_restrict
#define ccl_device_forceinline
#define KERNEL_FEATURE_PATH_GUIDING
const ThreadKernelGlobalsCPU * KernelGlobals
#define ccl_device_inline
#define ccl_global
#define CCL_NAMESPACE_END
#define kernel_integrator_state
ccl_gpu_kernel_postfix ccl_global KernelWorkTile const int ccl_global float * render_buffer
@ PATH_RAY_VOLUME_PRIMARY_TRANSMIT
DeviceKernel
static ulong state[N]
IntegratorShadowStateCPU * IntegratorShadowState
Definition state.h:230
#define INTEGRATOR_STATE_WRITE(state, nested_struct, member)
Definition state.h:236
#define INTEGRATOR_STATE(state, nested_struct, member)
Definition state.h:235
const IntegratorShadowStateCPU * ConstIntegratorShadowState
Definition state.h:231
IntegratorStateCPU * IntegratorState
Definition state.h:228
const IntegratorStateCPU * ConstIntegratorState
Definition state.h:229
ccl_device_forceinline void integrator_shadow_path_terminate(IntegratorShadowState state, const DeviceKernel current_kernel)
Definition state_flow.h:246
ccl_device_forceinline void integrator_path_next_sorted(KernelGlobals, IntegratorState state, const DeviceKernel current_kernel, const DeviceKernel next_kernel, const uint32_t key)
Definition state_flow.h:214
CCL_NAMESPACE_BEGIN ccl_device_forceinline bool integrator_path_is_terminated(ConstIntegratorState state)
Definition state_flow.h:41
ccl_device_forceinline void integrator_path_init_sorted(KernelGlobals, IntegratorState state, const DeviceKernel next_kernel, const uint32_t key)
Definition state_flow.h:186
ccl_device_inline void write_optical_depth(KernelGlobals kg, IntegratorState state, ccl_global float *ccl_restrict render_buffer)
Definition state_flow.h:51
ccl_device_forceinline bool integrator_shadow_path_is_terminated(ConstIntegratorShadowState state)
Definition state_flow.h:46
ccl_device_forceinline void integrator_shadow_path_next(IntegratorShadowState state, const DeviceKernel current_kernel, const DeviceKernel next_kernel)
Definition state_flow.h:238
ccl_device_forceinline void integrator_path_init(IntegratorState state, const DeviceKernel next_kernel)
Definition state_flow.h:180
ccl_device_forceinline void integrator_path_next(IntegratorState state, const DeviceKernel current_kernel, const DeviceKernel next_kernel)
Definition state_flow.h:195
ccl_device_forceinline void integrator_path_terminate(KernelGlobals kg, IntegratorState state, ccl_global float *ccl_restrict render_buffer, const DeviceKernel current_kernel)
Definition state_flow.h:203
ccl_device_forceinline IntegratorShadowState integrator_shadow_path_init(KernelGlobals kg, IntegratorState state, const DeviceKernel next_kernel, const bool is_ao)
Definition state_flow.h:225
uint8_t flag
Definition wm_window.cc:145
ccl_device_inline void film_write_pass_float(ccl_global float *ccl_restrict buffer, const float value)
Definition write.h:58
CCL_NAMESPACE_BEGIN ccl_device_forceinline ccl_global float * film_pass_pixel_render_buffer(KernelGlobals kg, ConstIntegratorState state, ccl_global float *ccl_restrict render_buffer)
Definition write.h:24