Blender V5.0
cpu/device_impl.cpp
Go to the documentation of this file.
1/* SPDX-FileCopyrightText: 2011-2022 Blender Foundation
2 *
3 * SPDX-License-Identifier: Apache-2.0 */
4
6
7#include <cstdlib>
8#include <cstring>
9
10/* So ImathMath is included before our kernel_cpu_compat. */
11#ifdef WITH_OSL
12/* So no context pollution happens from indirectly included windows.h */
13# ifdef _WIN32
14# include "util/windows.h"
15# endif
16# include <OSL/oslexec.h>
17#endif
18
19#ifdef WITH_EMBREE
20# include <embree4/rtcore.h>
21#endif
22
23#include "device/cpu/kernel.h"
24
25#include "device/device.h"
26
28#include "kernel/globals.h"
29#include "kernel/types.h"
30
31#include "bvh/embree.h"
32
33#include "session/buffers.h"
34
35#include "util/guiding.h"
36#include "util/log.h"
37#include "util/progress.h"
38#include "util/task.h"
39
41
42CPUDevice::CPUDevice(const DeviceInfo &info_, Stats &stats_, Profiler &profiler_, bool headless_)
43 : Device(info_, stats_, profiler_, headless_), texture_info(this, "texture_info", MEM_GLOBAL)
44{
45 /* Pick any kernel, all of them are supposed to have same level of microarchitecture
46 * optimization. */
48 << " CPU kernels.";
49
50 if (info.cpu_threads == 0) {
52 }
53
54#ifdef WITH_EMBREE
55 embree_device = rtcNewDevice("verbose=0");
56#endif
57 need_texture_info = false;
58}
59
61{
62#ifdef WITH_EMBREE
63 rtcReleaseDevice(embree_device);
64#endif
65
66 texture_info.free();
67}
68
70{
71 BVHLayoutMask bvh_layout_mask = BVH_LAYOUT_BVH2;
72#ifdef WITH_EMBREE
73 bvh_layout_mask |= BVH_LAYOUT_EMBREE;
74#endif /* WITH_EMBREE */
75 return bvh_layout_mask;
76}
77
79{
80 if (!need_texture_info) {
81 return false;
82 }
83
84 texture_info.copy_to_device();
85 need_texture_info = false;
86
87 return true;
88}
89
91{
92 if (mem.type == MEM_TEXTURE) {
93 assert(!"mem_alloc not supported for textures.");
94 }
95 else if (mem.type == MEM_GLOBAL) {
96 assert(!"mem_alloc not supported for global memory.");
97 }
98 else {
99 if (mem.name) {
100 LOG_DEBUG << "Buffer allocate: " << mem.name << ", "
101 << string_human_readable_number(mem.memory_size()) << " bytes. ("
103 }
104
105 if (mem.type == MEM_DEVICE_ONLY) {
106 size_t alignment = MIN_ALIGNMENT_DEVICE_MEMORY;
107 void *data = util_aligned_malloc(mem.memory_size(), alignment);
109 }
110 else {
111 assert(!(mem.host_pointer == nullptr && mem.memory_size() > 0));
113 }
114
115 mem.device_size = mem.memory_size();
116 stats.mem_alloc(mem.device_size);
117 }
118}
119
121{
122 if (mem.type == MEM_GLOBAL) {
123 global_free(mem);
124 global_alloc(mem);
125 }
126 else if (mem.type == MEM_TEXTURE) {
127 tex_free((device_texture &)mem);
129 }
130 else {
131 if (!mem.device_pointer) {
132 mem_alloc(mem);
133 }
134
135 /* copy is no-op */
136 }
137}
138
140{
141 /* no-op */
142}
143
145 device_memory & /*mem*/, size_t /*y*/, size_t /*w*/, size_t /*h*/, size_t /*elem*/)
146{
147 /* no-op */
148}
149
151{
152 if (!mem.device_pointer) {
153 mem_alloc(mem);
154 }
155
156 if (mem.device_pointer) {
157 memset((void *)mem.device_pointer, 0, mem.memory_size());
158 }
159}
160
162{
163 if (mem.type == MEM_GLOBAL) {
164 global_free(mem);
165 }
166 else if (mem.type == MEM_TEXTURE) {
167 tex_free((device_texture &)mem);
168 }
169 else if (mem.device_pointer) {
170 if (mem.type == MEM_DEVICE_ONLY) {
171 util_aligned_free((void *)mem.device_pointer, mem.memory_size());
172 }
173 mem.device_pointer = 0;
174 stats.mem_free(mem.device_size);
175 mem.device_size = 0;
176 }
177}
178
179device_ptr CPUDevice::mem_alloc_sub_ptr(device_memory &mem, const size_t offset, size_t /*size*/)
180{
181 return (device_ptr)(((char *)mem.device_pointer) + mem.memory_elements_size(offset));
182}
183
184void CPUDevice::const_copy_to(const char *name, void *host, const size_t size)
185{
186#ifdef WITH_EMBREE
187 if (strcmp(name, "data") == 0) {
188 assert(size <= sizeof(KernelData));
189
190 /* Update scene handle (since it is different for each device on multi devices).
191 * This must be a raw pointer copy since at some points during scene update this
192 * pointer may be invalid. */
193 KernelData *const data = (KernelData *)host;
194 data->device_bvh = embree_traversable;
195 }
196#endif
198}
199
201{
202 LOG_DEBUG << "Global memory allocate: " << mem.name << ", "
203 << string_human_readable_number(mem.memory_size()) << " bytes. ("
205
207
209 mem.device_size = mem.memory_size();
210 stats.mem_alloc(mem.device_size);
211}
212
214{
215 if (mem.device_pointer) {
216 mem.device_pointer = 0;
217 stats.mem_free(mem.device_size);
218 mem.device_size = 0;
219 }
220}
221
223{
224 LOG_DEBUG << "Texture allocate: " << mem.name << ", "
225 << string_human_readable_number(mem.memory_size()) << " bytes. ("
227
229 mem.device_size = mem.memory_size();
230 stats.mem_alloc(mem.device_size);
231
232 const uint slot = mem.slot;
233 if (slot >= texture_info.size()) {
234 /* Allocate some slots in advance, to reduce amount of re-allocations. */
235 texture_info.resize(slot + 128);
236 }
237
238 texture_info[slot] = mem.info;
239 texture_info[slot].data = (uint64_t)mem.host_pointer;
240 need_texture_info = true;
241}
242
244{
245 if (mem.device_pointer) {
246 mem.device_pointer = 0;
247 stats.mem_free(mem.device_size);
248 mem.device_size = 0;
249 need_texture_info = true;
250 }
251}
252
253void CPUDevice::build_bvh(BVH *bvh, Progress &progress, bool refit)
254{
255#ifdef WITH_EMBREE
256 if (bvh->params.bvh_layout == BVH_LAYOUT_EMBREE ||
261 {
262 BVHEmbree *const bvh_embree = static_cast<BVHEmbree *>(bvh);
263 if (refit) {
264 bvh_embree->refit(progress);
265 }
266 else {
267 bvh_embree->build(progress, &stats, embree_device);
268 }
269
270 if (bvh->params.top_level) {
271# if RTC_VERSION >= 40400
272 embree_traversable = rtcGetSceneTraversable(bvh_embree->scene);
273# else
274 embree_traversable = bvh_embree->scene;
275# endif
276 }
277 }
278 else
279#endif
280 {
281 Device::build_bvh(bvh, progress, refit);
282 }
283}
284
286{
287#if defined(WITH_PATH_GUIDING)
288 if (!guiding_device) {
289 if (guiding_device_type() == 8) {
290 guiding_device = make_unique<openpgl::cpp::Device>(PGL_DEVICE_TYPE_CPU_8);
291 }
292 else if (guiding_device_type() == 4) {
293 guiding_device = make_unique<openpgl::cpp::Device>(PGL_DEVICE_TYPE_CPU_4);
294 }
295 }
296 return guiding_device.get();
297#else
298 return nullptr;
299#endif
300}
301
303 vector<ThreadKernelGlobalsCPU> &kernel_thread_globals)
304{
305 /* Ensure latest texture info is loaded into kernel globals before returning. */
307
308 kernel_thread_globals.clear();
309 OSLGlobals *osl_globals = get_cpu_osl_memory();
310 for (int i = 0; i < info.cpu_threads; i++) {
311 kernel_thread_globals.emplace_back(kernel_globals, osl_globals, profiler, i);
312 }
313}
314
316{
317#ifdef WITH_OSL
318 return &osl_globals;
319#else
320 return nullptr;
321#endif
322}
323
324bool CPUDevice::load_kernels(const uint /*kernel_features*/)
325{
326 return true;
327}
328
unsigned int uint
CCL_NAMESPACE_BEGIN void * util_aligned_malloc(const size_t size, const int alignment)
void util_aligned_free(void *ptr, const size_t size)
BMesh const char void * data
unsigned long long int uint64_t
static DBVT_INLINE btScalar size(const btDbvtVolume &a)
Definition btDbvt.cpp:52
void refit(btStridingMeshInterface *triangles, const btVector3 &aabbMin, const btVector3 &aabbMax)
BVHLayout bvh_layout
Definition params.h:83
bool top_level
Definition params.h:80
Definition bvh/bvh.h:67
BVHParams params
Definition bvh/bvh.h:69
bool load_kernels(uint) override
bool need_texture_info
CPUDevice(const DeviceInfo &info_, Stats &stats_, Profiler &profiler_, bool headless_)
device_ptr mem_alloc_sub_ptr(device_memory &mem, const size_t offset, size_t) override
bool load_texture_info()
void mem_free(device_memory &mem) override
void mem_alloc(device_memory &mem) override
void tex_alloc(device_texture &mem)
BVHLayoutMask get_bvh_layout_mask(uint) const override
void tex_free(device_texture &mem)
void const_copy_to(const char *name, void *host, const size_t size) override
void global_alloc(device_memory &mem)
~CPUDevice() override
void mem_copy_from(device_memory &mem, const size_t y, size_t w, const size_t h, size_t elem) override
void mem_move_to_host(device_memory &mem) override
device_vector< TextureInfo > texture_info
void mem_zero(device_memory &mem) override
KernelGlobalsCPU kernel_globals
void * get_guiding_device() const override
void build_bvh(BVH *bvh, Progress &progress, bool refit) override
OSLGlobals * get_cpu_osl_memory() override
void mem_copy_to(device_memory &mem) override
void get_cpu_kernel_thread_globals(vector< ThreadKernelGlobalsCPU > &kernel_thread_globals) override
void global_free(device_memory &mem)
const char * get_uarch_name() const
IntegratorInitFunction integrator_init_from_camera
Device(const DeviceInfo &info_, Stats &stats_, Profiler &profiler_, bool headless_)
static const CPUKernels & get_cpu_kernels()
friend class device_memory
Profiler & profiler
Stats & stats
virtual void build_bvh(BVH *bvh, Progress &progress, bool refit)
DeviceInfo info
static int max_concurrency()
Definition task.cpp:96
size_t memory_elements_size(const int elements)
@ MEM_TEXTURE
@ MEM_DEVICE_ONLY
#define MIN_ALIGNMENT_DEVICE_MEMORY
#define CCL_NAMESPACE_END
#define assert(assertion)
CCL_NAMESPACE_BEGIN void kernel_const_copy(KernelGlobalsCPU *kg, const char *name, void *host, size_t)
void kernel_global_memory_copy(KernelGlobalsCPU *kg, const char *name, void *mem, const size_t size)
@ BVH_LAYOUT_MULTI_HIPRT_EMBREE
@ BVH_LAYOUT_EMBREE
@ BVH_LAYOUT_BVH2
@ BVH_LAYOUT_MULTI_METAL_EMBREE
@ BVH_LAYOUT_MULTI_EMBREEGPU_EMBREE
@ BVH_LAYOUT_MULTI_OPTIX_EMBREE
#define LOG_DEBUG
Definition log.h:107
#define LOG_INFO
Definition log.h:106
int BVHLayoutMask
Definition params.h:50
const char * name
string string_human_readable_size(size_t size)
Definition string.cpp:257
string string_human_readable_number(size_t num)
Definition string.cpp:276
i
Definition text_draw.cc:230
uint64_t device_ptr
Definition types_base.h:44
static CCL_NAMESPACE_BEGIN int guiding_device_type()