Blender V4.3
shader_eval.cpp
Go to the documentation of this file.
1/* SPDX-FileCopyrightText: 2011-2022 Blender Foundation
2 *
3 * SPDX-License-Identifier: Apache-2.0 */
4
6
7#include "device/device.h"
8#include "device/queue.h"
9
10#include "device/cpu/kernel.h"
12
13#include "util/log.h"
14#include "util/progress.h"
15#include "util/tbb.h"
16
18
19ShaderEval::ShaderEval(Device *device, Progress &progress) : device_(device), progress_(progress)
20{
21 DCHECK_NE(device_, nullptr);
22}
23
25 const int max_num_inputs,
26 const int num_channels,
27 const function<int(device_vector<KernelShaderEvalInput> &)> &fill_input,
28 const function<void(device_vector<float> &)> &read_output)
29{
30 bool first_device = true;
31 bool success = true;
32
33 device_->foreach_device([&](Device *device) {
34 if (!first_device) {
35 VLOG_WORK << "Multi-devices are not yet fully implemented, will evaluate shader on a "
36 "single device.";
37 return;
38 }
39 first_device = false;
40
41 device_vector<KernelShaderEvalInput> input(device, "ShaderEval input", MEM_READ_ONLY);
42 device_vector<float> output(device, "ShaderEval output", MEM_READ_WRITE);
43
44 /* Allocate and copy device buffers. */
45 DCHECK_EQ(input.device, device);
46 DCHECK_EQ(output.device, device);
47 DCHECK_LE(output.size(), input.size());
48
49 input.alloc(max_num_inputs);
50 int num_points = fill_input(input);
51 if (num_points == 0) {
52 return;
53 }
54
55 input.copy_to_device();
56 output.alloc(num_points * num_channels);
57 output.zero_to_device();
58
59 /* Evaluate on CPU or GPU. */
60 success = (device->info.type == DEVICE_CPU) ?
61 eval_cpu(device, type, input, output, num_points) :
62 eval_gpu(device, type, input, output, num_points);
63
64 /* Copy data back from device if not canceled. */
65 if (success) {
66 output.copy_from_device(0, 1, output.size());
67 read_output(output);
68 }
69
70 input.free();
71 output.free();
72 });
73
74 return success;
75}
76
78 const ShaderEvalType type,
81 const int64_t work_size)
82{
83 vector<CPUKernelThreadGlobals> kernel_thread_globals;
84 device->get_cpu_kernel_thread_globals(kernel_thread_globals);
85
86 /* Find required kernel function. */
87 const CPUKernels &kernels = Device::get_cpu_kernels();
88
89 /* Simple parallel_for over all work items. */
90 KernelShaderEvalInput *input_data = input.data();
91 float *output_data = output.data();
92 bool success = true;
93
94 tbb::task_arena local_arena(device->info.cpu_threads);
95 local_arena.execute([&]() {
96 parallel_for(int64_t(0), work_size, [&](int64_t work_index) {
97 /* TODO: is this fast enough? */
98 if (progress_.get_cancel()) {
99 success = false;
100 return;
101 }
102
103 const int thread_index = tbb::this_task_arena::current_thread_index();
104 const KernelGlobalsCPU *kg = &kernel_thread_globals[thread_index];
105
106 switch (type) {
108 kernels.shader_eval_displace(kg, input_data, output_data, work_index);
109 break;
111 kernels.shader_eval_background(kg, input_data, output_data, work_index);
112 break;
114 kernels.shader_eval_curve_shadow_transparency(kg, input_data, output_data, work_index);
115 break;
116 }
117 });
118 });
119
120 return success;
121}
122
124 const ShaderEvalType type,
126 device_vector<float> &output,
127 const int64_t work_size)
128{
129 /* Find required kernel function. */
130 DeviceKernel kernel;
131 switch (type) {
134 break;
137 break;
140 break;
141 };
142
143 /* Create device queue. */
144 unique_ptr<DeviceQueue> queue = device->gpu_queue_create();
145 queue->init_execution();
146
147 /* Execute work on GPU in chunk, so we can cancel.
148 * TODO: query appropriate size from device. */
149 const int32_t chunk_size = 65536;
150
151 device_ptr d_input = input.device_pointer;
152 device_ptr d_output = output.device_pointer;
153
154 assert(work_size <= 0x7fffffff);
155 for (int32_t d_offset = 0; d_offset < int32_t(work_size); d_offset += chunk_size) {
156 int32_t d_work_size = std::min(chunk_size, int32_t(work_size) - d_offset);
157
158 DeviceKernelArguments args(&d_input, &d_output, &d_offset, &d_work_size);
159
160 queue->enqueue(kernel, d_work_size, args);
161 queue->synchronize();
162
163 if (progress_.get_cancel()) {
164 return false;
165 }
166 }
167
168 return true;
169}
170
#define output
ShaderEvalFunction shader_eval_background
ShaderEvalFunction shader_eval_displace
ShaderEvalFunction shader_eval_curve_shadow_transparency
DeviceType type
static const CPUKernels & get_cpu_kernels()
virtual void get_cpu_kernel_thread_globals(vector< CPUKernelThreadGlobals > &)
virtual unique_ptr< DeviceQueue > gpu_queue_create()
virtual void foreach_device(const function< void(Device *)> &callback)
DeviceInfo info
bool get_cancel() const
Definition progress.h:93
bool eval(const ShaderEvalType type, const int max_num_inputs, const int num_channels, const function< int(device_vector< KernelShaderEvalInput > &)> &fill_input, const function< void(device_vector< float > &)> &read_output)
ShaderEval(Device *device, Progress &progress)
bool eval_cpu(Device *device, const ShaderEvalType type, device_vector< KernelShaderEvalInput > &input, device_vector< float > &output, const int64_t work_size)
Progress & progress_
Definition shader_eval.h:50
Device * device_
Definition shader_eval.h:49
bool eval_gpu(Device *device, const ShaderEvalType type, device_vector< KernelShaderEvalInput > &input, device_vector< float > &output, const int64_t work_size)
@ MEM_READ_WRITE
@ MEM_READ_ONLY
#define CCL_NAMESPACE_END
@ DEVICE_CPU
ccl_gpu_kernel_postfix ccl_global const int ccl_global float const int work_size
DeviceKernel
@ DEVICE_KERNEL_SHADER_EVAL_DISPLACE
@ DEVICE_KERNEL_SHADER_EVAL_BACKGROUND
@ DEVICE_KERNEL_SHADER_EVAL_CURVE_SHADOW_TRANSPARENCY
#define DCHECK_EQ(a, b)
Definition log.h:59
#define VLOG_WORK
Definition log.h:75
#define DCHECK_LE(a, b)
Definition log.h:62
#define DCHECK_NE(a, b)
Definition log.h:58
ShaderEvalType
Definition shader_eval.h:18
@ SHADER_EVAL_CURVE_SHADOW_TRANSPARENCY
Definition shader_eval.h:21
@ SHADER_EVAL_BACKGROUND
Definition shader_eval.h:20
@ SHADER_EVAL_DISPLACE
Definition shader_eval.h:19
__int64 int64_t
Definition stdint.h:89
signed int int32_t
Definition stdint.h:77
uint64_t device_ptr
Definition util/types.h:45