Blender V4.3
cpu/device_impl.cpp
Go to the documentation of this file.
1/* SPDX-FileCopyrightText: 2011-2022 Blender Foundation
2 *
3 * SPDX-License-Identifier: Apache-2.0 */
4
6
7#include <stdlib.h>
8#include <string.h>
9
10/* So ImathMath is included before our kernel_cpu_compat. */
11#ifdef WITH_OSL
12/* So no context pollution happens from indirectly included windows.h */
13# include "util/windows.h"
14# include <OSL/oslexec.h>
15#endif
16
17#ifdef WITH_EMBREE
18# if EMBREE_MAJOR_VERSION >= 4
19# include <embree4/rtcore.h>
20# else
21# include <embree3/rtcore.h>
22# endif
23#endif
24
25#include "device/cpu/kernel.h"
27
28#include "device/device.h"
29
30// clang-format off
34#include "kernel/types.h"
35
36#include "kernel/osl/globals.h"
37// clang-format on
38
39#include "bvh/embree.h"
40
41#include "session/buffers.h"
42
43#include "util/debug.h"
44#include "util/foreach.h"
45#include "util/function.h"
46#include "util/guiding.h"
47#include "util/log.h"
48#include "util/map.h"
50#include "util/optimization.h"
51#include "util/progress.h"
52#include "util/system.h"
53#include "util/task.h"
54#include "util/thread.h"
55
57
58CPUDevice::CPUDevice(const DeviceInfo &info_, Stats &stats_, Profiler &profiler_, bool headless_)
59 : Device(info_, stats_, profiler_, headless_), texture_info(this, "texture_info", MEM_GLOBAL)
60{
61 /* Pick any kernel, all of them are supposed to have same level of microarchitecture
62 * optimization. */
64 << " CPU kernels.";
65
66 if (info.cpu_threads == 0) {
68 }
69
70#ifdef WITH_OSL
71 kernel_globals.osl = &osl_globals;
72#endif
73#ifdef WITH_EMBREE
74 embree_device = rtcNewDevice("verbose=0");
75#endif
76 need_texture_info = false;
77}
78
80{
81#ifdef WITH_EMBREE
82 rtcReleaseDevice(embree_device);
83#endif
84
86}
87
89{
90 BVHLayoutMask bvh_layout_mask = BVH_LAYOUT_BVH2;
91#ifdef WITH_EMBREE
92 bvh_layout_mask |= BVH_LAYOUT_EMBREE;
93#endif /* WITH_EMBREE */
94 return bvh_layout_mask;
95}
96
98{
99 if (!need_texture_info) {
100 return false;
101 }
102
104 need_texture_info = false;
105
106 return true;
107}
108
110{
111 if (mem.type == MEM_TEXTURE) {
112 assert(!"mem_alloc not supported for textures.");
113 }
114 else if (mem.type == MEM_GLOBAL) {
115 assert(!"mem_alloc not supported for global memory.");
116 }
117 else {
118 if (mem.name) {
119 VLOG_WORK << "Buffer allocate: " << mem.name << ", "
120 << string_human_readable_number(mem.memory_size()) << " bytes. ("
122 }
123
124 if (mem.type == MEM_DEVICE_ONLY || !mem.host_pointer) {
125 size_t alignment = MIN_ALIGNMENT_CPU_DATA_TYPES;
126 void *data = util_aligned_malloc(mem.memory_size(), alignment);
127 mem.device_pointer = (device_ptr)data;
128 }
129 else {
131 }
132
133 mem.device_size = mem.memory_size();
135 }
136}
137
139{
140 if (mem.type == MEM_GLOBAL) {
141 global_free(mem);
142 global_alloc(mem);
143 }
144 else if (mem.type == MEM_TEXTURE) {
145 tex_free((device_texture &)mem);
147 }
148 else {
149 if (!mem.device_pointer) {
150 mem_alloc(mem);
151 }
152
153 /* copy is no-op */
154 }
155}
156
158 device_memory & /*mem*/, size_t /*y*/, size_t /*w*/, size_t /*h*/, size_t /*elem*/)
159{
160 /* no-op */
161}
162
164{
165 if (!mem.device_pointer) {
166 mem_alloc(mem);
167 }
168
169 if (mem.device_pointer) {
170 memset((void *)mem.device_pointer, 0, mem.memory_size());
171 }
172}
173
175{
176 if (mem.type == MEM_GLOBAL) {
177 global_free(mem);
178 }
179 else if (mem.type == MEM_TEXTURE) {
180 tex_free((device_texture &)mem);
181 }
182 else if (mem.device_pointer) {
183 if (mem.type == MEM_DEVICE_ONLY || !mem.host_pointer) {
185 }
186 mem.device_pointer = 0;
188 mem.device_size = 0;
189 }
190}
191
192device_ptr CPUDevice::mem_alloc_sub_ptr(device_memory &mem, size_t offset, size_t /*size*/)
193{
194 return (device_ptr)(((char *)mem.device_pointer) + mem.memory_elements_size(offset));
195}
196
197void CPUDevice::const_copy_to(const char *name, void *host, size_t size)
198{
199#ifdef WITH_EMBREE
200 if (strcmp(name, "data") == 0) {
201 assert(size <= sizeof(KernelData));
202
203 // Update scene handle (since it is different for each device on multi devices)
204 KernelData *const data = (KernelData *)host;
205 data->device_bvh = embree_scene;
206 }
207#endif
208 kernel_const_copy(&kernel_globals, name, host, size);
209}
210
212{
213 VLOG_WORK << "Global memory allocate: " << mem.name << ", "
214 << string_human_readable_number(mem.memory_size()) << " bytes. ("
216
218
220 mem.device_size = mem.memory_size();
222}
223
225{
226 if (mem.device_pointer) {
227 mem.device_pointer = 0;
229 mem.device_size = 0;
230 }
231}
232
234{
235 VLOG_WORK << "Texture allocate: " << mem.name << ", "
236 << string_human_readable_number(mem.memory_size()) << " bytes. ("
238
240 mem.device_size = mem.memory_size();
242
243 const uint slot = mem.slot;
244 if (slot >= texture_info.size()) {
245 /* Allocate some slots in advance, to reduce amount of re-allocations. */
246 texture_info.resize(slot + 128);
247 }
248
249 texture_info[slot] = mem.info;
251 need_texture_info = true;
252}
253
255{
256 if (mem.device_pointer) {
257 mem.device_pointer = 0;
259 mem.device_size = 0;
260 need_texture_info = true;
261 }
262}
263
264void CPUDevice::build_bvh(BVH *bvh, Progress &progress, bool refit)
265{
266#ifdef WITH_EMBREE
267 if (bvh->params.bvh_layout == BVH_LAYOUT_EMBREE ||
272 {
273 BVHEmbree *const bvh_embree = static_cast<BVHEmbree *>(bvh);
274 if (refit) {
275 bvh_embree->refit(progress);
276 }
277 else {
278 bvh_embree->build(progress, &stats, embree_device);
279 }
280
281 if (bvh->params.top_level) {
282 embree_scene = bvh_embree->scene;
283 }
284 }
285 else
286#endif
287 Device::build_bvh(bvh, progress, refit);
288}
289
291{
292#ifdef WITH_PATH_GUIDING
293 if (!guiding_device) {
294 if (guiding_device_type() == 8) {
295 guiding_device = make_unique<openpgl::cpp::Device>(PGL_DEVICE_TYPE_CPU_8);
296 }
297 else if (guiding_device_type() == 4) {
298 guiding_device = make_unique<openpgl::cpp::Device>(PGL_DEVICE_TYPE_CPU_4);
299 }
300 }
301 return guiding_device.get();
302#else
303 return nullptr;
304#endif
305}
306
308 vector<CPUKernelThreadGlobals> &kernel_thread_globals)
309{
310 /* Ensure latest texture info is loaded into kernel globals before returning. */
312
313 kernel_thread_globals.clear();
314 void *osl_memory = get_cpu_osl_memory();
315 for (int i = 0; i < info.cpu_threads; i++) {
316 kernel_thread_globals.emplace_back(kernel_globals, osl_memory, profiler, i);
317 }
318}
319
321{
322#ifdef WITH_OSL
323 return &osl_globals;
324#else
325 return NULL;
326#endif
327}
328
329bool CPUDevice::load_kernels(const uint /*kernel_features*/)
330{
331 return true;
332}
333
unsigned int uint
void util_aligned_free(void *ptr)
CCL_NAMESPACE_BEGIN void * util_aligned_malloc(size_t size, int alignment)
void refit(btStridingMeshInterface *triangles, const btVector3 &aabbMin, const btVector3 &aabbMax)
BVHLayout bvh_layout
Definition params.h:84
bool top_level
Definition params.h:81
Definition bvh/bvh.h:66
BVHParams params
Definition bvh/bvh.h:68
virtual void mem_copy_from(device_memory &mem, size_t y, size_t w, size_t h, size_t elem) override
virtual bool load_kernels(uint) override
bool need_texture_info
CPUDevice(const DeviceInfo &info_, Stats &stats_, Profiler &profiler_, bool headless_)
bool load_texture_info()
virtual void mem_free(device_memory &mem) override
virtual void const_copy_to(const char *name, void *host, size_t size) override
virtual void mem_alloc(device_memory &mem) override
virtual void * get_cpu_osl_memory() override
void tex_alloc(device_texture &mem)
virtual BVHLayoutMask get_bvh_layout_mask(uint) const override
virtual device_ptr mem_alloc_sub_ptr(device_memory &mem, size_t offset, size_t) override
void tex_free(device_texture &mem)
void global_alloc(device_memory &mem)
device_vector< TextureInfo > texture_info
virtual void mem_zero(device_memory &mem) override
KernelGlobalsCPU kernel_globals
void * get_guiding_device() const override
virtual void get_cpu_kernel_thread_globals(vector< CPUKernelThreadGlobals > &kernel_thread_globals) override
void build_bvh(BVH *bvh, Progress &progress, bool refit) override
virtual void mem_copy_to(device_memory &mem) override
void global_free(device_memory &mem)
const char * get_uarch_name() const
IntegratorInitFunction integrator_init_from_camera
static const CPUKernels & get_cpu_kernels()
Profiler & profiler
Stats & stats
virtual void build_bvh(BVH *bvh, Progress &progress, bool refit)
DeviceInfo info
void mem_free(size_t size)
Definition util/stats.h:26
void mem_alloc(size_t size)
Definition util/stats.h:20
static int max_concurrency()
Definition task.cpp:98
size_t memory_elements_size(int elements)
size_t size() const
T * resize(size_t width, size_t height=0, size_t depth=0)
@ MEM_TEXTURE
@ MEM_DEVICE_ONLY
#define MIN_ALIGNMENT_CPU_DATA_TYPES
#define CCL_NAMESPACE_END
#define NULL
void kernel_global_memory_copy(KernelGlobalsCPU *kg, const char *name, void *mem, size_t size)
CCL_NAMESPACE_BEGIN void kernel_const_copy(KernelGlobalsCPU *kg, const char *name, void *host, size_t)
KernelData
@ BVH_LAYOUT_MULTI_HIPRT_EMBREE
@ BVH_LAYOUT_EMBREE
@ BVH_LAYOUT_BVH2
@ BVH_LAYOUT_MULTI_METAL_EMBREE
@ BVH_LAYOUT_MULTI_EMBREEGPU_EMBREE
@ BVH_LAYOUT_MULTI_OPTIX_EMBREE
#define VLOG_INFO
Definition log.h:72
#define VLOG_WORK
Definition log.h:75
int BVHLayoutMask
Definition params.h:51
unsigned __int64 uint64_t
Definition stdint.h:90
string string_human_readable_size(size_t size)
Definition string.cpp:234
string string_human_readable_number(size_t num)
Definition string.cpp:255
static CCL_NAMESPACE_BEGIN int guiding_device_type()
uint64_t device_ptr
Definition util/types.h:45