Blender V4.5
kernel_arch_impl.h
Go to the documentation of this file.
1/* SPDX-FileCopyrightText: 2011-2022 Blender Foundation
2 *
3 * SPDX-License-Identifier: Apache-2.0 */
4
5/* Templated common implementation part of all CPU kernels.
6 *
7 * The idea is that particular `.cpp` files sets needed optimization flags and
8 * simply includes this file without worry of copying actual implementation over.
9 */
10
11#pragma once
12
13// clang-format off
15
16#ifndef KERNEL_STUB
17# include "kernel/globals.h"
18
20
24
28
31# include "kernel/film/read.h"
32
33# include "kernel/bake/bake.h"
34
35#else
36# define STUB_ASSERT(arch, name) \
37 assert(!(#name " kernel stub for architecture " #arch " was called!"))
38#endif /* KERNEL_STUB */
39// clang-format on
40
42
43/* --------------------------------------------------------------------
44 * Integrator.
45 */
46
47#ifdef KERNEL_STUB
48# define KERNEL_INVOKE(name, ...) (STUB_ASSERT(KERNEL_ARCH, name), 0)
49#else
50# define KERNEL_INVOKE(name, ...) integrator_##name(__VA_ARGS__)
51#endif
52
53/* TODO: Either use something like get_work_pixel(), or simplify tile which is passed here, so
54 * that it does not contain unused fields. */
55#define DEFINE_INTEGRATOR_INIT_KERNEL(name) \
56 bool KERNEL_FUNCTION_FULL_NAME(integrator_##name)(const ThreadKernelGlobalsCPU *kg, \
57 IntegratorStateCPU *state, \
58 KernelWorkTile *tile, \
59 ccl_global float *render_buffer) \
60 { \
61 return KERNEL_INVOKE( \
62 name, kg, state, tile, render_buffer, tile->x, tile->y, tile->start_sample); \
63 }
64
65#define DEFINE_INTEGRATOR_KERNEL(name) \
66 void KERNEL_FUNCTION_FULL_NAME(integrator_##name)(const ThreadKernelGlobalsCPU *kg, \
67 IntegratorStateCPU *state) \
68 { \
69 KERNEL_INVOKE(name, kg, state); \
70 }
71
72#define DEFINE_INTEGRATOR_SHADE_KERNEL(name) \
73 void KERNEL_FUNCTION_FULL_NAME(integrator_##name)(const ThreadKernelGlobalsCPU *kg, \
74 IntegratorStateCPU *state, \
75 ccl_global float *render_buffer) \
76 { \
77 KERNEL_INVOKE(name, kg, state, render_buffer); \
78 }
79
80#define DEFINE_INTEGRATOR_SHADOW_KERNEL(name) \
81 void KERNEL_FUNCTION_FULL_NAME(integrator_##name)(const ThreadKernelGlobalsCPU *kg, \
82 IntegratorStateCPU *state) \
83 { \
84 KERNEL_INVOKE(name, kg, &state->shadow); \
85 }
86
87#define DEFINE_INTEGRATOR_SHADOW_SHADE_KERNEL(name) \
88 void KERNEL_FUNCTION_FULL_NAME(integrator_##name)(const ThreadKernelGlobalsCPU *kg, \
89 IntegratorStateCPU *state, \
90 ccl_global float *render_buffer) \
91 { \
92 KERNEL_INVOKE(name, kg, &state->shadow, render_buffer); \
93 }
94
95DEFINE_INTEGRATOR_INIT_KERNEL(init_from_camera)
98
99/* --------------------------------------------------------------------
100 * Shader evaluation.
101 */
102
105 float *output,
106 const int offset)
107{
108#ifdef KERNEL_STUB
109 STUB_ASSERT(KERNEL_ARCH, shader_eval_displace);
110#else
112#endif
113}
114
117 float *output,
118 const int offset)
119{
120#ifdef KERNEL_STUB
122#else
124#endif
125}
126
128 const ThreadKernelGlobalsCPU *kg,
130 float *output,
131 const int offset)
132{
133#ifdef KERNEL_STUB
135#else
137#endif
138}
139
140/* --------------------------------------------------------------------
141 * Adaptive sampling.
142 */
143
145 const ThreadKernelGlobalsCPU *kg,
147 const int x,
148 const int y,
149 const float threshold,
150 const int reset,
151 const int offset,
152 const int stride)
153{
154#ifdef KERNEL_STUB
156 return false;
157#else
159 kg, render_buffer, x, y, threshold, reset, offset, stride);
160#endif
161}
162
165 const int y,
166 const int start_x,
167 const int width,
168 const int offset,
169 const int stride)
170{
171#ifdef KERNEL_STUB
173#else
174 film_adaptive_sampling_filter_x(kg, render_buffer, y, start_x, width, offset, stride);
175#endif
176}
177
180 const int x,
181 const int start_y,
182 const int height,
183 const int offset,
184 const int stride)
185{
186#ifdef KERNEL_STUB
188#else
189 film_adaptive_sampling_filter_y(kg, render_buffer, x, start_y, height, offset, stride);
190#endif
191}
192
193/* --------------------------------------------------------------------
194 * Cryptomatte.
195 */
196
199 const int pixel_index)
200{
201#ifdef KERNEL_STUB
203#else
204 film_cryptomatte_post(kg, render_buffer, pixel_index);
205#endif
206}
207
208/* --------------------------------------------------------------------
209 * Film Convert.
210 */
211
212#ifdef KERNEL_STUB
213
214# define KERNEL_FILM_CONVERT_FUNCTION(name, is_float) \
215 void KERNEL_FUNCTION_FULL_NAME(film_convert_##name)(const KernelFilmConvert *kfilm_convert, \
216 const float *buffer, \
217 float *pixel, \
218 const int width, \
219 const int buffer_stride, \
220 const int pixel_stride) \
221 { \
222 STUB_ASSERT(KERNEL_ARCH, film_convert_##name); \
223 } \
224 void KERNEL_FUNCTION_FULL_NAME(film_convert_half_rgba_##name)( \
225 const KernelFilmConvert *kfilm_convert, \
226 const float *buffer, \
227 half4 *pixel, \
228 const int width, \
229 const int buffer_stride) \
230 { \
231 STUB_ASSERT(KERNEL_ARCH, film_convert_##name); \
232 }
233
234#else
235
236# define KERNEL_FILM_CONVERT_FUNCTION(name, is_float) \
237 void KERNEL_FUNCTION_FULL_NAME(film_convert_##name)(const KernelFilmConvert *kfilm_convert, \
238 const float *buffer, \
239 float *pixel, \
240 const int width, \
241 const int buffer_stride, \
242 const int pixel_stride) \
243 { \
244 for (int i = 0; i < width; i++, buffer += buffer_stride, pixel += pixel_stride) { \
245 film_get_pass_pixel_##name(kfilm_convert, buffer, pixel); \
246 } \
247 } \
248 void KERNEL_FUNCTION_FULL_NAME(film_convert_half_rgba_##name)( \
249 const KernelFilmConvert *kfilm_convert, \
250 const float *buffer, \
251 half4 *pixel, \
252 const int width, \
253 const int buffer_stride) \
254 { \
255 for (int i = 0; i < width; i++, buffer += buffer_stride, pixel++) { \
256 float pixel_rgba[4] = {0.0f, 0.0f, 0.0f, 1.0f}; \
257 film_get_pass_pixel_##name(kfilm_convert, buffer, pixel_rgba); \
258 if (is_float) { \
259 pixel_rgba[1] = pixel_rgba[0]; \
260 pixel_rgba[2] = pixel_rgba[0]; \
261 } \
262 film_apply_pass_pixel_overlays_rgba(kfilm_convert, buffer, pixel_rgba); \
263 *pixel = float4_to_half4_display( \
264 make_float4(pixel_rgba[0], pixel_rgba[1], pixel_rgba[2], pixel_rgba[3])); \
265 } \
266 }
267
268#endif
269
274
277
280KERNEL_FILM_CONVERT_FUNCTION(shadow_catcher, false)
281KERNEL_FILM_CONVERT_FUNCTION(shadow_catcher_matte_with_shadow, false)
284
285#undef KERNEL_FILM_CONVERT_FUNCTION
286
287#undef KERNEL_INVOKE
288#undef DEFINE_INTEGRATOR_KERNEL
289#undef DEFINE_INTEGRATOR_SHADE_KERNEL
290#undef DEFINE_INTEGRATOR_INIT_KERNEL
291
292#undef KERNEL_STUB
293#undef STUB_ASSERT
294#undef KERNEL_ARCH
295
return true
void reset()
clear internal cached data and reset random seed
ccl_device_inline void film_cryptomatte_post(KernelGlobals kg, ccl_global float *render_buffer, const int pixel_index)
#define ccl_global
#define KERNEL_FILM_CONVERT_FUNCTION(name)
#define CCL_NAMESPACE_END
#define input
VecBase< float, 4 > float4
#define output
ccl_device void kernel_curve_shadow_transparency_evaluate(KernelGlobals kg, const ccl_global KernelShaderEvalInput *input, ccl_global float *output, const int offset)
CCL_NAMESPACE_BEGIN ccl_device void kernel_displace_evaluate(KernelGlobals kg, const ccl_global KernelShaderEvalInput *input, ccl_global float *output, const int offset)
ccl_device void kernel_background_evaluate(KernelGlobals kg, const ccl_global KernelShaderEvalInput *input, ccl_global float *output, const int offset)
#define KERNEL_ARCH
#define KERNEL_FUNCTION_FULL_NAME(name)
ccl_gpu_kernel_postfix ccl_global KernelWorkTile const int ccl_global float * render_buffer
ccl_device void film_adaptive_sampling_filter_y(KernelGlobals kg, ccl_global float *render_buffer, const int x, const int start_y, const int height, const int offset, const int stride)
ccl_device void film_adaptive_sampling_filter_x(KernelGlobals kg, ccl_global float *render_buffer, const int y, const int start_x, const int width, const int offset, const int stride)
ccl_device bool film_adaptive_sampling_convergence_check(KernelGlobals kg, ccl_global float *render_buffer, const int x, const int y, const float threshold, const int reset, const int offset, const int stride)
void KERNEL_FUNCTION_FULL_NAME shader_eval_background(const ThreadKernelGlobalsCPU *kg, const KernelShaderEvalInput *input, float *output, const int offset)
bool KERNEL_FUNCTION_FULL_NAME adaptive_sampling_convergence_check(const ThreadKernelGlobalsCPU *kg, ccl_global float *render_buffer, const int x, const int y, const float threshold, const int reset, const int offset, int stride)
void KERNEL_FUNCTION_FULL_NAME cryptomatte_postprocess(const ThreadKernelGlobalsCPU *kg, ccl_global float *render_buffer, int pixel_index)
void KERNEL_FUNCTION_FULL_NAME shader_eval_displace(const ThreadKernelGlobalsCPU *kg, const KernelShaderEvalInput *input, float *output, const int offset)
void KERNEL_FUNCTION_FULL_NAME adaptive_sampling_filter_x(const ThreadKernelGlobalsCPU *kg, ccl_global float *render_buffer, const int y, const int start_x, const int width, const int offset, int stride)
void KERNEL_FUNCTION_FULL_NAME adaptive_sampling_filter_y(const ThreadKernelGlobalsCPU *kg, ccl_global float *render_buffer, const int x, const int start_y, const int height, const int offset, int stride)
void KERNEL_FUNCTION_FULL_NAME shader_eval_curve_shadow_transparency(const ThreadKernelGlobalsCPU *kg, const KernelShaderEvalInput *input, float *output, const int offset)
#define DEFINE_INTEGRATOR_INIT_KERNEL(name)
#define KERNEL_FILM_CONVERT_FUNCTION(name, is_float)
#define DEFINE_INTEGRATOR_SHADE_KERNEL(name)