Blender V5.0
shader_eval.cpp
Go to the documentation of this file.
1/* SPDX-FileCopyrightText: 2011-2022 Blender Foundation
2 *
3 * SPDX-License-Identifier: Apache-2.0 */
4
6
8
9#include "device/device.h"
10#include "device/queue.h"
11
12#include "device/cpu/kernel.h"
13
14#include "util/log.h"
15#include "util/progress.h"
16#include "util/tbb.h"
17
19
20ShaderEval::ShaderEval(Device *device, Progress &progress) : device_(device), progress_(progress)
21{
22 DCHECK_NE(device_, nullptr);
23}
24
26 const int max_num_inputs,
27 const int num_channels,
28 const std::function<int(device_vector<KernelShaderEvalInput> &)> &fill_input,
29 const std::function<void(device_vector<float> &)> &read_output)
30{
31 bool first_device = true;
32 bool success = true;
33
34 device_->foreach_device([&](Device *device) {
35 if (!first_device) {
36 LOG_DEBUG << "Multi-devices are not yet fully implemented, will evaluate shader on a "
37 "single device.";
38 return;
39 }
40 first_device = false;
41
42 device_vector<KernelShaderEvalInput> input(device, "ShaderEval input", MEM_READ_ONLY);
43 device_vector<float> output(device, "ShaderEval output", MEM_READ_WRITE);
44
45 /* Allocate and copy device buffers. */
46 DCHECK_EQ(input.device, device);
47 DCHECK_EQ(output.device, device);
48 DCHECK_LE(output.size(), input.size());
49
50 input.alloc(max_num_inputs);
51 int const num_points = fill_input(input);
52 if (num_points == 0) {
53 return;
54 }
55
56 input.copy_to_device();
57 output.alloc(num_points * num_channels);
58 output.zero_to_device();
59
60 /* Evaluate on CPU or GPU. */
61 success = (device->info.type == DEVICE_CPU) ?
62 eval_cpu(device, type, input, output, num_points) :
63 eval_gpu(device, type, input, output, num_points);
64
65 /* Copy data back from device if not canceled. */
66 if (success) {
67 output.copy_from_device(0, 1, output.size());
68 read_output(output);
69 }
70
71 input.free();
72 output.free();
73 });
74
75 return success;
76}
77
79 const ShaderEvalType type,
82 const int64_t work_size)
83{
84 vector<ThreadKernelGlobalsCPU> kernel_thread_globals;
85 device->get_cpu_kernel_thread_globals(kernel_thread_globals);
86
87 /* Find required kernel function. */
88 const CPUKernels &kernels = Device::get_cpu_kernels();
89
90 /* Simple parallel_for over all work items. */
91 KernelShaderEvalInput *input_data = input.data();
92 float *output_data = output.data();
93 bool success = true;
94
95 tbb::task_arena local_arena(device->info.cpu_threads);
96 local_arena.execute([&]() {
97 parallel_for(int64_t(0), work_size, [&](int64_t work_index) {
98 /* TODO: is this fast enough? */
99 if (progress_.get_cancel()) {
100 success = false;
101 return;
102 }
103
104 const int thread_index = tbb::this_task_arena::current_thread_index();
105 const ThreadKernelGlobalsCPU *kg = &kernel_thread_globals[thread_index];
106
107 switch (type) {
109 kernels.shader_eval_displace(kg, input_data, output_data, work_index);
110 break;
112 kernels.shader_eval_background(kg, input_data, output_data, work_index);
113 break;
115 kernels.shader_eval_curve_shadow_transparency(kg, input_data, output_data, work_index);
116 break;
118 kernels.shader_eval_volume_density(kg, input_data, output_data, work_index);
119 break;
120 }
121 });
122 });
123
124 return success;
125}
126
128 const ShaderEvalType type,
131 const int64_t work_size)
132{
133 /* Find required kernel function. */
134 DeviceKernel kernel;
135 switch (type) {
138 break;
141 break;
144 break;
147 };
148
149 /* Create device queue. */
151 queue->init_execution();
152
153 /* Execute work on GPU in chunk, so we can cancel.
154 * TODO: query appropriate size from device. */
155 const int32_t chunk_size = 65536;
156
157 const device_ptr d_input = input.device_pointer;
158 device_ptr d_output = output.device_pointer;
159
160 assert(work_size <= 0x7fffffff);
161 for (int32_t d_offset = 0; d_offset < int32_t(work_size); d_offset += chunk_size) {
162 int32_t d_work_size = std::min(chunk_size, int32_t(work_size) - d_offset);
163
164 const DeviceKernelArguments args(&d_input, &d_output, &d_offset, &d_work_size);
165
166 queue->enqueue(kernel, d_work_size, args);
167 queue->synchronize();
168
169 if (progress_.get_cancel()) {
170 return false;
171 }
172 }
173
174 return true;
175}
176
long long int int64_t
ShaderEvalFunction shader_eval_background
ShaderEvalFunction shader_eval_volume_density
ShaderEvalFunction shader_eval_displace
ShaderEvalFunction shader_eval_curve_shadow_transparency
DeviceType type
static const CPUKernels & get_cpu_kernels()
virtual unique_ptr< DeviceQueue > gpu_queue_create()
virtual void get_cpu_kernel_thread_globals(vector< ThreadKernelGlobalsCPU > &)
DeviceInfo info
ShaderEval(Device *device, Progress &progress)
bool eval_cpu(Device *device, const ShaderEvalType type, device_vector< KernelShaderEvalInput > &input, device_vector< float > &output, const int64_t work_size)
bool eval(const ShaderEvalType type, const int max_num_inputs, const int num_channels, const std::function< int(device_vector< KernelShaderEvalInput > &)> &fill_input, const std::function< void(device_vector< float > &)> &read_output)
Progress & progress_
Definition shader_eval.h:51
Device * device_
Definition shader_eval.h:50
bool eval_gpu(Device *device, const ShaderEvalType type, device_vector< KernelShaderEvalInput > &input, device_vector< float > &output, const int64_t work_size)
@ MEM_READ_WRITE
@ MEM_READ_ONLY
#define CCL_NAMESPACE_END
@ DEVICE_CPU
#define input
#define assert(assertion)
#define output
ccl_gpu_kernel_postfix const ccl_global int ccl_global float const int work_size
DeviceKernel
@ DEVICE_KERNEL_SHADER_EVAL_DISPLACE
@ DEVICE_KERNEL_SHADER_EVAL_VOLUME_DENSITY
@ DEVICE_KERNEL_SHADER_EVAL_BACKGROUND
@ DEVICE_KERNEL_SHADER_EVAL_CURVE_SHADOW_TRANSPARENCY
#define DCHECK_EQ(a, b)
Definition log.h:144
#define LOG_DEBUG
Definition log.h:107
#define DCHECK_LE(a, b)
Definition log.h:147
#define DCHECK_NE(a, b)
Definition log.h:143
ShaderEvalType
Definition shader_eval.h:18
@ SHADER_EVAL_VOLUME_DENSITY
Definition shader_eval.h:22
@ SHADER_EVAL_CURVE_SHADOW_TRANSPARENCY
Definition shader_eval.h:21
@ SHADER_EVAL_BACKGROUND
Definition shader_eval.h:20
@ SHADER_EVAL_DISPLACE
Definition shader_eval.h:19
uint64_t device_ptr
Definition types_base.h:44