Blender V4.3
kernel_arch_impl.h
Go to the documentation of this file.
1/* SPDX-FileCopyrightText: 2011-2022 Blender Foundation
2 *
3 * SPDX-License-Identifier: Apache-2.0 */
4
5/* Templated common implementation part of all CPU kernels.
6 *
7 * The idea is that particular .cpp files sets needed optimization flags and
8 * simply includes this file without worry of copying actual implementation over.
9 */
10
11#pragma once
12
13// clang-format off
15
16#ifndef KERNEL_STUB
19
23
36
39# include "kernel/film/read.h"
40
41# include "kernel/bake/bake.h"
42
43#else
44# define STUB_ASSERT(arch, name) \
45 assert(!(#name " kernel stub for architecture " #arch " was called!"))
46#endif /* KERNEL_STUB */
47// clang-format on
48
50
51/* --------------------------------------------------------------------
52 * Integrator.
53 */
54
55#ifdef KERNEL_STUB
56# define KERNEL_INVOKE(name, ...) (STUB_ASSERT(KERNEL_ARCH, name), 0)
57#else
58# define KERNEL_INVOKE(name, ...) integrator_##name(__VA_ARGS__)
59#endif
60
61/* TODO: Either use something like get_work_pixel(), or simplify tile which is passed here, so
62 * that it does not contain unused fields. */
63#define DEFINE_INTEGRATOR_INIT_KERNEL(name) \
64 bool KERNEL_FUNCTION_FULL_NAME(integrator_##name)(const KernelGlobalsCPU *kg, \
65 IntegratorStateCPU *state, \
66 KernelWorkTile *tile, \
67 ccl_global float *render_buffer) \
68 { \
69 return KERNEL_INVOKE( \
70 name, kg, state, tile, render_buffer, tile->x, tile->y, tile->start_sample); \
71 }
72
73#define DEFINE_INTEGRATOR_KERNEL(name) \
74 void KERNEL_FUNCTION_FULL_NAME(integrator_##name)(const KernelGlobalsCPU *kg, \
75 IntegratorStateCPU *state) \
76 { \
77 KERNEL_INVOKE(name, kg, state); \
78 }
79
80#define DEFINE_INTEGRATOR_SHADE_KERNEL(name) \
81 void KERNEL_FUNCTION_FULL_NAME(integrator_##name)( \
82 const KernelGlobalsCPU *kg, IntegratorStateCPU *state, ccl_global float *render_buffer) \
83 { \
84 KERNEL_INVOKE(name, kg, state, render_buffer); \
85 }
86
87#define DEFINE_INTEGRATOR_SHADOW_KERNEL(name) \
88 void KERNEL_FUNCTION_FULL_NAME(integrator_##name)(const KernelGlobalsCPU *kg, \
89 IntegratorStateCPU *state) \
90 { \
91 KERNEL_INVOKE(name, kg, &state->shadow); \
92 }
93
94#define DEFINE_INTEGRATOR_SHADOW_SHADE_KERNEL(name) \
95 void KERNEL_FUNCTION_FULL_NAME(integrator_##name)( \
96 const KernelGlobalsCPU *kg, IntegratorStateCPU *state, ccl_global float *render_buffer) \
97 { \
98 KERNEL_INVOKE(name, kg, &state->shadow, render_buffer); \
99 }
100
101DEFINE_INTEGRATOR_INIT_KERNEL(init_from_camera)
102DEFINE_INTEGRATOR_INIT_KERNEL(init_from_bake)
103DEFINE_INTEGRATOR_SHADE_KERNEL(intersect_closest)
104DEFINE_INTEGRATOR_KERNEL(intersect_subsurface)
105DEFINE_INTEGRATOR_KERNEL(intersect_volume_stack)
106DEFINE_INTEGRATOR_KERNEL(intersect_dedicated_light)
107DEFINE_INTEGRATOR_SHADE_KERNEL(shade_background)
111DEFINE_INTEGRATOR_SHADE_KERNEL(shade_dedicated_light)
113DEFINE_INTEGRATOR_SHADOW_KERNEL(intersect_shadow)
115
116/* --------------------------------------------------------------------
117 * Shader evaluation.
118 */
119
121 const KernelShaderEvalInput *input,
122 float *output,
123 const int offset)
124{
125#ifdef KERNEL_STUB
126 STUB_ASSERT(KERNEL_ARCH, shader_eval_displace);
127#else
128 kernel_displace_evaluate(kg, input, output, offset);
129#endif
130}
131
133 const KernelShaderEvalInput *input,
134 float *output,
135 const int offset)
136{
137#ifdef KERNEL_STUB
139#else
140 kernel_background_evaluate(kg, input, output, offset);
141#endif
142}
143
145 const KernelGlobalsCPU *kg,
146 const KernelShaderEvalInput *input,
147 float *output,
148 const int offset)
149{
150#ifdef KERNEL_STUB
152#else
153 kernel_curve_shadow_transparency_evaluate(kg, input, output, offset);
154#endif
155}
156
157/* --------------------------------------------------------------------
158 * Adaptive sampling.
159 */
160
162 const KernelGlobalsCPU *kg,
164 int x,
165 int y,
166 float threshold,
167 int reset,
168 int offset,
169 int stride)
170{
171#ifdef KERNEL_STUB
173 return false;
174#else
176 kg, render_buffer, x, y, threshold, reset, offset, stride);
177#endif
178}
179
182 int y,
183 int start_x,
184 int width,
185 int offset,
186 int stride)
187{
188#ifdef KERNEL_STUB
190#else
191 film_adaptive_sampling_filter_x(kg, render_buffer, y, start_x, width, offset, stride);
192#endif
193}
194
197 int x,
198 int start_y,
199 int height,
200 int offset,
201 int stride)
202{
203#ifdef KERNEL_STUB
205#else
206 film_adaptive_sampling_filter_y(kg, render_buffer, x, start_y, height, offset, stride);
207#endif
208}
209
210/* --------------------------------------------------------------------
211 * Cryptomatte.
212 */
213
216 int pixel_index)
217{
218#ifdef KERNEL_STUB
220#else
221 film_cryptomatte_post(kg, render_buffer, pixel_index);
222#endif
223}
224
225/* --------------------------------------------------------------------
226 * Film Convert.
227 */
228
229#ifdef KERNEL_STUB
230
231# define KERNEL_FILM_CONVERT_FUNCTION(name, is_float) \
232 void KERNEL_FUNCTION_FULL_NAME(film_convert_##name)(const KernelFilmConvert *kfilm_convert, \
233 const float *buffer, \
234 float *pixel, \
235 const int width, \
236 const int buffer_stride, \
237 const int pixel_stride) \
238 { \
239 STUB_ASSERT(KERNEL_ARCH, film_convert_##name); \
240 } \
241 void KERNEL_FUNCTION_FULL_NAME(film_convert_half_rgba_##name)( \
242 const KernelFilmConvert *kfilm_convert, \
243 const float *buffer, \
244 half4 *pixel, \
245 const int width, \
246 const int buffer_stride) \
247 { \
248 STUB_ASSERT(KERNEL_ARCH, film_convert_##name); \
249 }
250
251#else
252
253# define KERNEL_FILM_CONVERT_FUNCTION(name, is_float) \
254 void KERNEL_FUNCTION_FULL_NAME(film_convert_##name)(const KernelFilmConvert *kfilm_convert, \
255 const float *buffer, \
256 float *pixel, \
257 const int width, \
258 const int buffer_stride, \
259 const int pixel_stride) \
260 { \
261 for (int i = 0; i < width; i++, buffer += buffer_stride, pixel += pixel_stride) { \
262 film_get_pass_pixel_##name(kfilm_convert, buffer, pixel); \
263 } \
264 } \
265 void KERNEL_FUNCTION_FULL_NAME(film_convert_half_rgba_##name)( \
266 const KernelFilmConvert *kfilm_convert, \
267 const float *buffer, \
268 half4 *pixel, \
269 const int width, \
270 const int buffer_stride) \
271 { \
272 for (int i = 0; i < width; i++, buffer += buffer_stride, pixel++) { \
273 float pixel_rgba[4] = {0.0f, 0.0f, 0.0f, 1.0f}; \
274 film_get_pass_pixel_##name(kfilm_convert, buffer, pixel_rgba); \
275 if (is_float) { \
276 pixel_rgba[1] = pixel_rgba[0]; \
277 pixel_rgba[2] = pixel_rgba[0]; \
278 } \
279 film_apply_pass_pixel_overlays_rgba(kfilm_convert, buffer, pixel_rgba); \
280 *pixel = float4_to_half4_display( \
281 make_float4(pixel_rgba[0], pixel_rgba[1], pixel_rgba[2], pixel_rgba[3])); \
282 } \
283 }
284
285#endif
286
291
294
297KERNEL_FILM_CONVERT_FUNCTION(shadow_catcher, false)
298KERNEL_FILM_CONVERT_FUNCTION(shadow_catcher_matte_with_shadow, false)
301
302#undef KERNEL_FILM_CONVERT_FUNCTION
303
304#undef KERNEL_INVOKE
305#undef DEFINE_INTEGRATOR_KERNEL
306#undef DEFINE_INTEGRATOR_SHADE_KERNEL
307#undef DEFINE_INTEGRATOR_INIT_KERNEL
308
309#undef KERNEL_STUB
310#undef STUB_ASSERT
311#undef KERNEL_ARCH
312
#define output
void reset()
clear internal cached data and reset random seed
ccl_device_inline void film_cryptomatte_post(KernelGlobals kg, ccl_global float *render_buffer, int pixel_index)
#define ccl_global
#define CCL_NAMESPACE_END
CCL_NAMESPACE_BEGIN ccl_device void kernel_displace_evaluate(KernelGlobals kg, ccl_global const KernelShaderEvalInput *input, ccl_global float *output, const int offset)
ccl_device void kernel_curve_shadow_transparency_evaluate(KernelGlobals kg, ccl_global const KernelShaderEvalInput *input, ccl_global float *output, const int offset)
ccl_device void kernel_background_evaluate(KernelGlobals kg, ccl_global const KernelShaderEvalInput *input, ccl_global float *output, const int offset)
#define KERNEL_ARCH
#define KERNEL_FUNCTION_FULL_NAME(name)
ccl_gpu_kernel_postfix ccl_global KernelWorkTile const int ccl_global float * render_buffer
ccl_device bool film_adaptive_sampling_convergence_check(KernelGlobals kg, ccl_global float *render_buffer, int x, int y, float threshold, int reset, int offset, int stride)
ccl_device void film_adaptive_sampling_filter_x(KernelGlobals kg, ccl_global float *render_buffer, int y, int start_x, int width, int offset, int stride)
ccl_device void film_adaptive_sampling_filter_y(KernelGlobals kg, ccl_global float *render_buffer, int x, int start_y, int height, int offset, int stride)
#define DEFINE_INTEGRATOR_INIT_KERNEL(name)
#define KERNEL_FILM_CONVERT_FUNCTION(name, is_float)
void KERNEL_FUNCTION_FULL_NAME adaptive_sampling_filter_x(const KernelGlobalsCPU *kg, ccl_global float *render_buffer, int y, int start_x, int width, int offset, int stride)
#define DEFINE_INTEGRATOR_SHADE_KERNEL(name)
#define DEFINE_INTEGRATOR_KERNEL(name)
void KERNEL_FUNCTION_FULL_NAME cryptomatte_postprocess(const KernelGlobalsCPU *kg, ccl_global float *render_buffer, int pixel_index)
void KERNEL_FUNCTION_FULL_NAME shader_eval_background(const KernelGlobalsCPU *kg, const KernelShaderEvalInput *input, float *output, const int offset)
void KERNEL_FUNCTION_FULL_NAME shader_eval_displace(const KernelGlobalsCPU *kg, const KernelShaderEvalInput *input, float *output, const int offset)
#define DEFINE_INTEGRATOR_SHADOW_SHADE_KERNEL(name)
#define DEFINE_INTEGRATOR_SHADOW_KERNEL(name)
bool KERNEL_FUNCTION_FULL_NAME adaptive_sampling_convergence_check(const KernelGlobalsCPU *kg, ccl_global float *render_buffer, int x, int y, float threshold, int reset, int offset, int stride)
void KERNEL_FUNCTION_FULL_NAME adaptive_sampling_filter_y(const KernelGlobalsCPU *kg, ccl_global float *render_buffer, int x, int start_y, int height, int offset, int stride)
void KERNEL_FUNCTION_FULL_NAME shader_eval_curve_shadow_transparency(const KernelGlobalsCPU *kg, const KernelShaderEvalInput *input, float *output, const int offset)