Blender V4.3
state.h
Go to the documentation of this file.
1/* SPDX-FileCopyrightText: 2011-2022 Blender Foundation
2 *
3 * SPDX-License-Identifier: Apache-2.0 */
4
5/* Integrator State
6 *
7 * This file defines the data structures that define the state of a path. Any state that is
8 * preserved and passed between kernel executions is part of this.
9 *
10 * The size of this state must be kept as small as possible, to reduce cache misses and keep memory
11 * usage under control on GPUs that may execute millions of kernels.
12 *
13 * Memory may be allocated and passed along in different ways depending on the device. There may
14 * be a scalar layout, or AoS or SoA layout for batches. The state may be passed along as a pointer
15 * to every kernel, or the pointer may exist at program scope or in constant memory. To abstract
16 * these differences between devices and experiment with different layouts, macros are used.
17 *
18 * Use IntegratorState to pass a reference to the integrator state for the current path. These are
19 * defined differently on the CPU and GPU. Use ConstIntegratorState instead of const
20 * IntegratorState for passing state as read-only, to avoid oddities in typedef behavior.
21 *
22 * INTEGRATOR_STATE(state, x, y): read nested struct member x.y of IntegratorState
23 * INTEGRATOR_STATE_WRITE(state, x, y): write to nested struct member x.y of IntegratorState
24 *
25 * INTEGRATOR_STATE_ARRAY(state, x, index, y): read x[index].y
26 * INTEGRATOR_STATE_ARRAY_WRITE(state, x, index, y): write x[index].y
27 *
28 * INTEGRATOR_STATE_NULL: use to pass empty state to other functions.
29 */
30
31#include "kernel/types.h"
32
33#include "util/types.h"
34
35#ifdef __PATH_GUIDING__
36# include "util/guiding.h"
37#endif
38
39#pragma once
40
42
43/* Data structures */
44
45/* Integrator State
46 *
47 * CPU rendering path state with AoS layout. */
49#define KERNEL_STRUCT_BEGIN(name) struct {
50#define KERNEL_STRUCT_BEGIN_PACKED(parent_struct, feature) struct {
51#define KERNEL_STRUCT_MEMBER(parent_struct, type, name, feature) type name;
52#define KERNEL_STRUCT_MEMBER_PACKED KERNEL_STRUCT_MEMBER
53#define KERNEL_STRUCT_ARRAY_MEMBER KERNEL_STRUCT_MEMBER
54#define KERNEL_STRUCT_END(name) \
55 } \
56 name;
57#define KERNEL_STRUCT_END_ARRAY(name, cpu_size, gpu_size) \
58 } \
59 name[cpu_size];
60#define KERNEL_STRUCT_VOLUME_STACK_SIZE MAX_VOLUME_STACK_SIZE
62#undef KERNEL_STRUCT_BEGIN
63#undef KERNEL_STRUCT_BEGIN_PACKED
64#undef KERNEL_STRUCT_MEMBER
65#undef KERNEL_STRUCT_MEMBER_PACKED
66#undef KERNEL_STRUCT_ARRAY_MEMBER
67#undef KERNEL_STRUCT_END
68#undef KERNEL_STRUCT_END_ARRAY
70
71typedef struct IntegratorStateCPU {
72#define KERNEL_STRUCT_BEGIN(name) struct {
73#define KERNEL_STRUCT_BEGIN_PACKED(parent_struct, feature) struct {
74#define KERNEL_STRUCT_MEMBER(parent_struct, type, name, feature) type name;
75#define KERNEL_STRUCT_MEMBER_PACKED KERNEL_STRUCT_MEMBER
76#define KERNEL_STRUCT_ARRAY_MEMBER KERNEL_STRUCT_MEMBER
77#define KERNEL_STRUCT_END(name) \
78 } \
79 name;
80#define KERNEL_STRUCT_END_ARRAY(name, cpu_size, gpu_size) \
81 } \
82 name[cpu_size];
83#define KERNEL_STRUCT_VOLUME_STACK_SIZE MAX_VOLUME_STACK_SIZE
85#undef KERNEL_STRUCT_BEGIN
86#undef KERNEL_STRUCT_BEGIN_PACKED
87#undef KERNEL_STRUCT_MEMBER
88#undef KERNEL_STRUCT_MEMBER_PACKED
89#undef KERNEL_STRUCT_ARRAY_MEMBER
90#undef KERNEL_STRUCT_END
91#undef KERNEL_STRUCT_END_ARRAY
92#undef KERNEL_STRUCT_VOLUME_STACK_SIZE
93
97
98/* Path Queue
99 *
100 * Keep track of which kernels are queued to be executed next in the path
101 * for GPU rendering. */
105
106#if defined(__INTEGRATOR_GPU_PACKED_STATE__) && defined(__KERNEL_GPU__)
107
108/* Generate wrapper structs for all integrator state fields. This allows us to access state
109 * uniformly, regardless of whether it stored in a packed struct or separate arrays. */
110# define KERNEL_STRUCT_BEGIN(name)
111# define KERNEL_STRUCT_MEMBER(parent_struct, type, name, feature) \
112 struct Wrapped_##parent_struct##_##name { \
113 type name; \
114 };
115# define KERNEL_STRUCT_MEMBER_PACKED KERNEL_STRUCT_MEMBER
116# define KERNEL_STRUCT_BEGIN_PACKED(parent_struct, feature) \
117 KERNEL_STRUCT_BEGIN(parent_struct) \
118 KERNEL_STRUCT_MEMBER(parent_struct, packed_##parent_struct, packed, feature)
119# define KERNEL_STRUCT_ARRAY_MEMBER KERNEL_STRUCT_MEMBER
120# define KERNEL_STRUCT_END(name)
121# define KERNEL_STRUCT_END_ARRAY(name, cpu_size, gpu_size)
122# define KERNEL_STRUCT_VOLUME_STACK_SIZE MAX_VOLUME_STACK_SIZE
123
126
127# undef KERNEL_STRUCT_BEGIN
128# undef KERNEL_STRUCT_BEGIN_PACKED
129# undef KERNEL_STRUCT_MEMBER
130# undef KERNEL_STRUCT_MEMBER_PACKED
131# undef KERNEL_STRUCT_ARRAY_MEMBER
132# undef KERNEL_STRUCT_END
133# undef KERNEL_STRUCT_END_ARRAY
134# undef KERNEL_STRUCT_VOLUME_STACK_SIZE
135
136#endif
137
138/* Integrator State GPU
139 *
140 * GPU rendering path state with SoA layout. */
141typedef struct IntegratorStateGPU {
142#define KERNEL_STRUCT_BEGIN(name) struct {
143
144#ifdef __INTEGRATOR_GPU_PACKED_STATE__
145
146# ifdef __KERNEL_GPU__
147
148/* If we've opted in to packed layouts, generate member functions that return a pointer to a
149 * wrapper type so we can access state using uniform syntax. */
150# define KERNEL_STRUCT_MEMBER(parent_struct, type, name, feature) \
151 ccl_global Wrapped_##parent_struct##_##name *name; \
152 ccl_device_inline ccl_global Wrapped_##parent_struct##_##name *name##_fn() ccl_constant \
153 { \
154 return (ccl_global Wrapped_##parent_struct##_##name *)name; \
155 }
156# define KERNEL_STRUCT_MEMBER_PACKED(parent_struct, type, name, feature) \
157 ccl_device_inline ccl_global packed_##parent_struct *name##_fn() ccl_constant \
158 { \
159 return (ccl_global packed_##parent_struct *)packed; \
160 }
161# else
162# define KERNEL_STRUCT_MEMBER(parent_struct, type, name, feature) ccl_global type *name;
163# define KERNEL_STRUCT_MEMBER_PACKED(parent_struct, type, name, feature)
164# endif
165
166# define KERNEL_STRUCT_BEGIN_PACKED(parent_struct, feature) \
167 KERNEL_STRUCT_BEGIN(parent_struct) \
168 KERNEL_STRUCT_MEMBER(parent_struct, packed_##parent_struct, packed, feature)
169
170#else
171# define KERNEL_STRUCT_MEMBER(parent_struct, type, name, feature) ccl_global type *name;
172# define KERNEL_STRUCT_MEMBER_PACKED KERNEL_STRUCT_MEMBER
173# define KERNEL_STRUCT_BEGIN_PACKED(parent_struct, feature) KERNEL_STRUCT_BEGIN(parent_struct)
174#endif
175#define KERNEL_STRUCT_ARRAY_MEMBER KERNEL_STRUCT_MEMBER
176#define KERNEL_STRUCT_END(name) \
177 } \
178 name;
179#define KERNEL_STRUCT_END_ARRAY(name, cpu_size, gpu_size) \
180 } \
181 name[gpu_size];
182#define KERNEL_STRUCT_VOLUME_STACK_SIZE MAX_VOLUME_STACK_SIZE
183
185
187
188#undef KERNEL_STRUCT_BEGIN
189#undef KERNEL_STRUCT_BEGIN_PACKED
190#undef KERNEL_STRUCT_MEMBER
191#undef KERNEL_STRUCT_MEMBER_PACKED
192#undef KERNEL_STRUCT_ARRAY_MEMBER
193#undef KERNEL_STRUCT_END
194#undef KERNEL_STRUCT_END_ARRAY
195#undef KERNEL_STRUCT_VOLUME_STACK_SIZE
196
197 /* Count number of queued kernels. */
199
200 /* Count number of kernels queued for specific shaders. */
202
203 /* Index of shadow path which will be used by a next shadow path. */
205
206 /* Index of main path which will be used by a next shadow catcher split. */
208
209 /* Partition/key offsets used when writing sorted active indices. */
211
212 /* Divisor used to partition active indices by locality when sorting by material. */
215
216/* Abstraction
217 *
218 * Macros to access data structures on different devices.
219 *
220 * Note that there is a special access function for the shadow catcher state. This access is to
221 * happen from a kernel which operates on a "main" path. Attempt to use shadow catcher accessors
222 * from a kernel which operates on a shadow catcher state will cause bad memory access. */
223
224#ifndef __KERNEL_GPU__
225
226/* Scalar access on CPU. */
227
232
233# define INTEGRATOR_STATE_NULL nullptr
234
235# define INTEGRATOR_STATE(state, nested_struct, member) ((state)->nested_struct.member)
236# define INTEGRATOR_STATE_WRITE(state, nested_struct, member) ((state)->nested_struct.member)
237
238# define INTEGRATOR_STATE_ARRAY(state, nested_struct, array_index, member) \
239 ((state)->nested_struct[array_index].member)
240# define INTEGRATOR_STATE_ARRAY_WRITE(state, nested_struct, array_index, member) \
241 ((state)->nested_struct[array_index].member)
242
243#else /* !__KERNEL_GPU__ */
244
245/* Array access on GPU with Structure-of-Arrays. */
246
247typedef int IntegratorState;
248typedef int ConstIntegratorState;
249typedef int IntegratorShadowState;
251
252# define INTEGRATOR_STATE_NULL -1
253
254# ifdef __INTEGRATOR_GPU_PACKED_STATE__
255
256/* If we've opted in to packed layouts, we use the generated accessor functions (member##_fn) to
257 * resolve different layouts (packed vs separate). */
258# define INTEGRATOR_STATE(state, nested_struct, member) \
259 kernel_integrator_state.nested_struct.member##_fn()[state].member
260# define INTEGRATOR_STATE_ARRAY(state, nested_struct, array_index, member) \
261 kernel_integrator_state.nested_struct[array_index].member##_fn()[state].member
262# else
263# define INTEGRATOR_STATE(state, nested_struct, member) \
264 kernel_integrator_state.nested_struct.member[state]
265# define INTEGRATOR_STATE_ARRAY(state, nested_struct, array_index, member) \
266 kernel_integrator_state.nested_struct[array_index].member[state]
267# endif
268
269# define INTEGRATOR_STATE_WRITE(state, nested_struct, member) \
270 INTEGRATOR_STATE(state, nested_struct, member)
271# define INTEGRATOR_STATE_ARRAY_WRITE(state, nested_struct, array_index, member) \
272 INTEGRATOR_STATE_ARRAY(state, nested_struct, array_index, member)
273
274#endif /* !__KERNEL_GPU__ */
275
unsigned int uint
#define ccl_restrict
#define ccl_global
#define CCL_NAMESPACE_END
@ DEVICE_KERNEL_INTEGRATOR_NUM
const IntegratorShadowStateCPU *ccl_restrict ConstIntegratorShadowState
Definition state.h:231
CCL_NAMESPACE_BEGIN struct IntegratorShadowStateCPU IntegratorShadowStateCPU
struct IntegratorQueueCounter IntegratorQueueCounter
struct IntegratorStateGPU IntegratorStateGPU
IntegratorStateCPU *ccl_restrict IntegratorState
Definition state.h:228
const IntegratorStateCPU *ccl_restrict ConstIntegratorState
Definition state.h:229
IntegratorShadowStateCPU *ccl_restrict IntegratorShadowState
Definition state.h:230
struct IntegratorStateCPU IntegratorStateCPU
int num_queued[DEVICE_KERNEL_INTEGRATOR_NUM]
Definition state.h:103
IntegratorShadowStateCPU shadow
Definition state.h:94
IntegratorShadowStateCPU ao
Definition state.h:95
ccl_global IntegratorQueueCounter * queue_counter
Definition state.h:198
ccl_global int * next_shadow_path_index
Definition state.h:204
ccl_global int * next_main_path_index
Definition state.h:207
ccl_global int * sort_partition_key_offsets
Definition state.h:210
ccl_global int * sort_key_counter[DEVICE_KERNEL_INTEGRATOR_NUM]
Definition state.h:201
uint sort_partition_divisor
Definition state.h:213