Blender V5.0
kernel_arch_impl.h
Go to the documentation of this file.
1/* SPDX-FileCopyrightText: 2011-2022 Blender Foundation
2 *
3 * SPDX-License-Identifier: Apache-2.0 */
4
5/* Templated common implementation part of all CPU kernels.
6 *
7 * The idea is that particular `.cpp` files sets needed optimization flags and
8 * simply includes this file without worry of copying actual implementation over.
9 */
10
11#pragma once
12
13// clang-format off
15
16#ifndef KERNEL_STUB
17# include "kernel/globals.h"
18
20
24
28
31# include "kernel/film/read.h"
33
34# include "kernel/bake/bake.h"
35
36#else
37# define STUB_ASSERT(arch, name) \
38 assert(!(#name " kernel stub for architecture " #arch " was called!"))
39#endif /* KERNEL_STUB */
40// clang-format on
41
43
44/* --------------------------------------------------------------------
45 * Integrator.
46 */
47
48#ifdef KERNEL_STUB
49# define KERNEL_INVOKE(name, ...) (STUB_ASSERT(KERNEL_ARCH, name), 0)
50#else
51# define KERNEL_INVOKE(name, ...) integrator_##name(__VA_ARGS__)
52#endif
53
54/* TODO: Either use something like get_work_pixel(), or simplify tile which is passed here, so
55 * that it does not contain unused fields. */
56#define DEFINE_INTEGRATOR_INIT_KERNEL(name) \
57 bool KERNEL_FUNCTION_FULL_NAME(integrator_##name)(const ThreadKernelGlobalsCPU *kg, \
58 IntegratorStateCPU *state, \
59 KernelWorkTile *tile, \
60 ccl_global float *render_buffer) \
61 { \
62 (void)kg; \
63 (void)state; \
64 (void)tile; \
65 (void)render_buffer; \
66 return KERNEL_INVOKE( \
67 name, kg, state, tile, render_buffer, tile->x, tile->y, tile->start_sample); \
68 }
69
70#define DEFINE_INTEGRATOR_SHADE_KERNEL(name) \
71 void KERNEL_FUNCTION_FULL_NAME(integrator_##name)(const ThreadKernelGlobalsCPU *kg, \
72 IntegratorStateCPU *state, \
73 ccl_global float *render_buffer) \
74 { \
75 (void)kg; \
76 (void)state; \
77 (void)render_buffer; \
78 KERNEL_INVOKE(name, kg, state, render_buffer); \
79 }
80
81DEFINE_INTEGRATOR_INIT_KERNEL(init_from_camera)
84
85/* --------------------------------------------------------------------
86 * Shader evaluation.
87 */
88
91 float *output,
92 const int offset)
93{
94#ifdef KERNEL_STUB
96 (void)kg;
97 (void)input;
98 (void)output;
99 (void)offset;
100#else
102#endif
103}
104
107 float *output,
108 const int offset)
109{
110#ifdef KERNEL_STUB
112 (void)kg;
113 (void)input;
114 (void)output;
115 (void)offset;
116#else
118#endif
119}
120
122 const ThreadKernelGlobalsCPU *kg,
124 float *output,
125 const int offset)
126{
127#ifdef KERNEL_STUB
129 (void)kg;
130 (void)input;
131 (void)output;
132 (void)offset;
133#else
135#endif
136}
137
140 float *output,
141 const int offset)
142{
143#ifdef KERNEL_STUB
145 (void)kg;
146 (void)input;
147 (void)output;
148 (void)offset;
149#else
151#endif
152}
153
154/* --------------------------------------------------------------------
155 * Adaptive sampling.
156 */
157
159 const ThreadKernelGlobalsCPU *kg,
161 const int x,
162 const int y,
163 const float threshold,
164 const int reset,
165 const int offset,
166 const int stride)
167{
168#ifdef KERNEL_STUB
170 (void)kg;
171 (void)render_buffer;
172 (void)x;
173 (void)y;
174 (void)threshold;
175 (void)reset;
176 (void)offset;
177 (void)stride;
178 return false;
179#else
181 kg, render_buffer, x, y, threshold, reset, offset, stride);
182#endif
183}
184
187 const int y,
188 const int start_x,
189 const int width,
190 const int offset,
191 const int stride)
192{
193#ifdef KERNEL_STUB
195 (void)kg;
196 (void)render_buffer;
197 (void)y;
198 (void)start_x;
199 (void)width;
200 (void)offset;
201 (void)stride;
202#else
203 film_adaptive_sampling_filter_x(kg, render_buffer, y, start_x, width, offset, stride);
204#endif
205}
206
209 const int x,
210 const int start_y,
211 const int height,
212 const int offset,
213 const int stride)
214{
215#ifdef KERNEL_STUB
217 (void)kg;
218 (void)render_buffer;
219 (void)x;
220 (void)start_y;
221 (void)height;
222 (void)offset;
223 (void)stride;
224#else
225 film_adaptive_sampling_filter_y(kg, render_buffer, x, start_y, height, offset, stride);
226#endif
227}
228
229/* --------------------------------------------------------------------
230 * Cryptomatte.
231 */
232
235 const int pixel_index)
236{
237#ifdef KERNEL_STUB
239 (void)kg;
240 (void)render_buffer;
241 (void)pixel_index;
242#else
243 film_cryptomatte_post(kg, render_buffer, pixel_index);
244#endif
245}
246
247/* --------------------------------------------------------------------
248 * Volume Scattering Probability Guiding.
249 */
250
253 const int y,
254 const int center_x,
255 const int min_x,
256 const int max_x,
257 const int offset,
258 const int stride)
259{
260#ifdef KERNEL_STUB
262 (void)kg;
263 (void)render_buffer;
264 (void)y;
265 (void)center_x;
266 (void)min_x;
267 (void)max_x;
268 (void)offset;
269 (void)stride;
270#else
271 volume_guiding_filter_x(kg, render_buffer, y, center_x, min_x, max_x, offset, stride);
272#endif
273}
274
277 const int x,
278 const int min_y,
279 const int max_y,
280 const int offset,
281 const int stride)
282{
283#ifdef KERNEL_STUB
285 (void)kg;
286 (void)render_buffer;
287 (void)x;
288 (void)min_y;
289 (void)max_y;
290 (void)offset;
291 (void)stride;
292#else
293 volume_guiding_filter_y(kg, render_buffer, x, min_y, max_y, offset, stride);
294#endif
295}
296
297/* --------------------------------------------------------------------
298 * Film Convert.
299 */
300
301#ifdef KERNEL_STUB
302
303# define KERNEL_FILM_CONVERT_FUNCTION(name, is_float) \
304 void KERNEL_FUNCTION_FULL_NAME(film_convert_##name)(const KernelFilmConvert *kfilm_convert, \
305 const float *buffer, \
306 float *pixel, \
307 const int width, \
308 const int buffer_stride, \
309 const int pixel_stride) \
310 { \
311 STUB_ASSERT(KERNEL_ARCH, film_convert_##name); \
312 (void)kfilm_convert; \
313 (void)buffer; \
314 (void)pixel; \
315 (void)width; \
316 (void)buffer_stride; \
317 (void)pixel_stride; \
318 } \
319 void KERNEL_FUNCTION_FULL_NAME(film_convert_half_rgba_##name)( \
320 const KernelFilmConvert *kfilm_convert, \
321 const float *buffer, \
322 half4 *pixel, \
323 const int width, \
324 const int buffer_stride) \
325 { \
326 STUB_ASSERT(KERNEL_ARCH, film_convert_##name); \
327 (void)kfilm_convert; \
328 (void)buffer; \
329 (void)pixel; \
330 (void)width; \
331 (void)buffer_stride; \
332 }
333
334#else
335
336# define KERNEL_FILM_CONVERT_FUNCTION(name, is_float) \
337 void KERNEL_FUNCTION_FULL_NAME(film_convert_##name)(const KernelFilmConvert *kfilm_convert, \
338 const float *buffer, \
339 float *pixel, \
340 const int width, \
341 const int buffer_stride, \
342 const int pixel_stride) \
343 { \
344 for (int i = 0; i < width; i++, buffer += buffer_stride, pixel += pixel_stride) { \
345 film_get_pass_pixel_##name(kfilm_convert, buffer, pixel); \
346 } \
347 } \
348 void KERNEL_FUNCTION_FULL_NAME(film_convert_half_rgba_##name)( \
349 const KernelFilmConvert *kfilm_convert, \
350 const float *buffer, \
351 half4 *pixel, \
352 const int width, \
353 const int buffer_stride) \
354 { \
355 for (int i = 0; i < width; i++, buffer += buffer_stride, pixel++) { \
356 float pixel_rgba[4] = {0.0f, 0.0f, 0.0f, 1.0f}; \
357 film_get_pass_pixel_##name(kfilm_convert, buffer, pixel_rgba); \
358 if (is_float) { \
359 pixel_rgba[1] = pixel_rgba[0]; \
360 pixel_rgba[2] = pixel_rgba[0]; \
361 } \
362 film_apply_pass_pixel_overlays_rgba(kfilm_convert, buffer, pixel_rgba); \
363 *pixel = float4_to_half4_display( \
364 make_float4(pixel_rgba[0], pixel_rgba[1], pixel_rgba[2], pixel_rgba[3])); \
365 } \
366 }
367
368#endif
369
373KERNEL_FILM_CONVERT_FUNCTION(volume_majorant, true)
375
379
382KERNEL_FILM_CONVERT_FUNCTION(shadow_catcher, false)
383KERNEL_FILM_CONVERT_FUNCTION(shadow_catcher_matte_with_shadow, false)
386
387#undef KERNEL_FILM_CONVERT_FUNCTION
388
389#undef KERNEL_INVOKE
390#undef DEFINE_INTEGRATOR_SHADE_KERNEL
391#undef DEFINE_INTEGRATOR_INIT_KERNEL
392
393#undef KERNEL_STUB
394#undef STUB_ASSERT
395#undef KERNEL_ARCH
396
return true
void reset()
clear internal cached data and reset random seed
ccl_device_inline void film_cryptomatte_post(KernelGlobals kg, ccl_global float *render_buffer, const int pixel_index)
#define ccl_global
#define KERNEL_FILM_CONVERT_FUNCTION(name)
#define CCL_NAMESPACE_END
#define input
#define output
ccl_device void kernel_volume_density_evaluate(KernelGlobals kg, ccl_global const KernelShaderEvalInput *input, ccl_global float *output, const int offset)
ccl_device void kernel_curve_shadow_transparency_evaluate(KernelGlobals kg, const ccl_global KernelShaderEvalInput *input, ccl_global float *output, const int offset)
CCL_NAMESPACE_BEGIN ccl_device void kernel_displace_evaluate(KernelGlobals kg, const ccl_global KernelShaderEvalInput *input, ccl_global float *output, const int offset)
ccl_device void kernel_background_evaluate(KernelGlobals kg, const ccl_global KernelShaderEvalInput *input, ccl_global float *output, const int offset)
#define KERNEL_ARCH
#define KERNEL_FUNCTION_FULL_NAME(name)
ccl_gpu_kernel_postfix ccl_global KernelWorkTile const int ccl_global float * render_buffer
ccl_device void film_adaptive_sampling_filter_y(KernelGlobals kg, ccl_global float *render_buffer, const int x, const int start_y, const int height, const int offset, const int stride)
ccl_device void film_adaptive_sampling_filter_x(KernelGlobals kg, ccl_global float *render_buffer, const int y, const int start_x, const int width, const int offset, const int stride)
ccl_device bool film_adaptive_sampling_convergence_check(KernelGlobals kg, ccl_global float *render_buffer, const int x, const int y, const float threshold, const int reset, const int offset, const int stride)
void KERNEL_FUNCTION_FULL_NAME volume_guiding_filter_x(const ThreadKernelGlobalsCPU *kg, ccl_global float *render_buffer, const int y, const int center_x, const int min_x, const int max_x, const int offset, int stride)
void KERNEL_FUNCTION_FULL_NAME volume_guiding_filter_y(const ThreadKernelGlobalsCPU *kg, ccl_global float *render_buffer, const int x, const int center_y, const int height, const int offset, int stride)
void KERNEL_FUNCTION_FULL_NAME shader_eval_background(const ThreadKernelGlobalsCPU *kg, const KernelShaderEvalInput *input, float *output, const int offset)
bool KERNEL_FUNCTION_FULL_NAME adaptive_sampling_convergence_check(const ThreadKernelGlobalsCPU *kg, ccl_global float *render_buffer, const int x, const int y, const float threshold, const int reset, const int offset, int stride)
void KERNEL_FUNCTION_FULL_NAME cryptomatte_postprocess(const ThreadKernelGlobalsCPU *kg, ccl_global float *render_buffer, int pixel_index)
void KERNEL_FUNCTION_FULL_NAME shader_eval_displace(const ThreadKernelGlobalsCPU *kg, const KernelShaderEvalInput *input, float *output, const int offset)
void KERNEL_FUNCTION_FULL_NAME adaptive_sampling_filter_x(const ThreadKernelGlobalsCPU *kg, ccl_global float *render_buffer, const int y, const int start_x, const int width, const int offset, int stride)
void KERNEL_FUNCTION_FULL_NAME adaptive_sampling_filter_y(const ThreadKernelGlobalsCPU *kg, ccl_global float *render_buffer, const int x, const int start_y, const int height, const int offset, int stride)
void KERNEL_FUNCTION_FULL_NAME shader_eval_curve_shadow_transparency(const ThreadKernelGlobalsCPU *kg, const KernelShaderEvalInput *input, float *output, const int offset)
void KERNEL_FUNCTION_FULL_NAME shader_eval_volume_density(const ThreadKernelGlobalsCPU *kg, const KernelShaderEvalInput *input, float *output, const int offset)
#define DEFINE_INTEGRATOR_INIT_KERNEL(name)
#define KERNEL_FILM_CONVERT_FUNCTION(name, is_float)
#define DEFINE_INTEGRATOR_SHADE_KERNEL(name)
void KERNEL_FUNCTION_FULL_NAME volume_guiding_filter_y(const ThreadKernelGlobalsCPU *kg, ccl_global float *render_buffer, const int x, const int min_y, const int max_y, const int offset, const int stride)
void KERNEL_FUNCTION_FULL_NAME volume_guiding_filter_x(const ThreadKernelGlobalsCPU *kg, ccl_global float *render_buffer, const int y, const int center_x, const int min_x, const int max_x, const int offset, const int stride)