Blender V5.0
optix/device_impl.cpp
Go to the documentation of this file.
1/* SPDX-FileCopyrightText: 2019 NVIDIA Corporation
2 * SPDX-FileCopyrightText: 2019-2022 Blender Foundation
3 *
4 * SPDX-License-Identifier: Apache-2.0 */
5
6#ifdef WITH_OPTIX
7
9# include "device/optix/queue.h"
10
11# include "bvh/bvh.h"
12# include "bvh/optix.h"
13
14# include "scene/hair.h"
15# include "scene/mesh.h"
16# include "scene/object.h"
17# include "scene/pointcloud.h"
18# include "scene/scene.h"
19
20# include "util/debug.h"
21# include "util/log.h"
22# include "util/path.h"
23# include "util/progress.h"
24# include "util/task.h"
25
26# define __KERNEL_OPTIX__
28
30
31static void execute_optix_task(TaskPool &pool, OptixTask task, OptixResult &failure_reason)
32{
33 OptixTask additional_tasks[16];
34 unsigned int num_additional_tasks = 0;
35
36 const OptixResult result = optixTaskExecute(task, additional_tasks, 16, &num_additional_tasks);
37 if (result == OPTIX_SUCCESS) {
38 for (unsigned int i = 0; i < num_additional_tasks; ++i) {
39 pool.push([&pool, additional_task = additional_tasks[i], &failure_reason] {
40 execute_optix_task(pool, additional_task, failure_reason);
41 });
42 }
43 }
44 else {
45 failure_reason = result;
46 }
47}
48
49OptiXDevice::OptiXDevice(const DeviceInfo &info, Stats &stats, Profiler &profiler, bool headless)
50 : CUDADevice(info, stats, profiler, headless),
51# ifdef WITH_OSL
52 osl_colorsystem(this, "osl_colorsystem", MEM_READ_ONLY),
53# endif
54 sbt_data(this, "__sbt", MEM_READ_ONLY),
55 launch_params(this, "kernel_params", false)
56{
57 /* Make the CUDA context current. */
58 if (!cuContext) {
59 /* Do not initialize if CUDA context creation failed already. */
60 return;
61 }
62 const CUDAContextScope scope(this);
63
64 /* Create OptiX context for this device. */
65 OptixDeviceContextOptions options = {};
66 options.logCallbackLevel = 4; /* Fatal = 1, Error = 2, Warning = 3, Print = 4. */
67 options.logCallbackFunction = [](unsigned int level, const char *, const char *message, void *) {
68 switch (level) {
69 case 1:
70 LOG_FATAL << message;
71 break;
72 case 2:
73 LOG_ERROR << message;
74 break;
75 case 3:
76 LOG_WARNING << message;
77 break;
78 case 4:
79 LOG_DEBUG << message;
80 break;
81 default:
82 break;
83 }
84 };
85 if (DebugFlags().optix.use_debug) {
86 LOG_INFO << "Using OptiX debug mode.";
87 options.validationMode = OPTIX_DEVICE_CONTEXT_VALIDATION_MODE_ALL;
88 }
89 optix_assert(optixDeviceContextCreate(cuContext, &options, &context));
90 optix_assert(optixDeviceContextSetLogCallback(
91 context, options.logCallbackFunction, options.logCallbackData, options.logCallbackLevel));
92
93 /* Fix weird compiler bug that assigns wrong size. */
94 launch_params.data_elements = sizeof(KernelParamsOptiX);
95
96 /* Allocate launch parameter buffer memory on device. */
97 launch_params.alloc_to_device(1);
98}
99
100OptiXDevice::~OptiXDevice()
101{
102 /* Make CUDA context current. */
103 const CUDAContextScope scope(this);
104
105 free_bvh_memory_delayed();
106
107 sbt_data.free();
108 texture_info.free();
109 launch_params.free();
110
111 /* Unload modules. */
112 if (optix_module != nullptr) {
113 optixModuleDestroy(optix_module);
114 }
115 for (int i = 0; i < 2; ++i) {
116 if (builtin_modules[i] != nullptr) {
117 optixModuleDestroy(builtin_modules[i]);
118 }
119 }
120 for (int i = 0; i < NUM_PIPELINES; ++i) {
121 if (pipelines[i] != nullptr) {
122 optixPipelineDestroy(pipelines[i]);
123 }
124 }
125 for (int i = 0; i < NUM_PROGRAM_GROUPS; ++i) {
126 if (groups[i] != nullptr) {
127 optixProgramGroupDestroy(groups[i]);
128 }
129 }
130
131# ifdef WITH_OSL
132 if (osl_camera_module != nullptr) {
133 optixModuleDestroy(osl_camera_module);
134 }
135 for (const OptixModule &module : osl_modules) {
136 if (module != nullptr) {
137 optixModuleDestroy(module);
138 }
139 }
140 for (const OptixProgramGroup &group : osl_groups) {
141 if (group != nullptr) {
142 optixProgramGroupDestroy(group);
143 }
144 }
145 osl_colorsystem.free();
146# endif
147
148 optixDeviceContextDestroy(context);
149}
150
151unique_ptr<DeviceQueue> OptiXDevice::gpu_queue_create()
152{
153 return make_unique<OptiXDeviceQueue>(this);
154}
155
156BVHLayoutMask OptiXDevice::get_bvh_layout_mask(uint /*kernel_features*/) const
157{
158 /* OptiX has its own internal acceleration structure format. */
159 return BVH_LAYOUT_OPTIX;
160}
161
162static string get_optix_include_dir()
163{
164 const char *env_dir = getenv("OPTIX_ROOT_DIR");
165 const char *default_dir = CYCLES_RUNTIME_OPTIX_ROOT_DIR;
166
167 if (env_dir && env_dir[0]) {
168 const string env_include_dir = path_join(env_dir, "include");
169 return env_include_dir;
170 }
171 if (default_dir[0]) {
172 const string default_include_dir = path_join(default_dir, "include");
173 return default_include_dir;
174 }
175
176 return string();
177}
178
179string OptiXDevice::compile_kernel_get_common_cflags(const uint kernel_features)
180{
181 string common_cflags = CUDADevice::compile_kernel_get_common_cflags(kernel_features);
182
183 /* Add OptiX SDK include directory to include paths. */
184 common_cflags += string_printf(" -I\"%s\"", get_optix_include_dir().c_str());
185
186 /* Specialization for shader ray-tracing. */
187 if (kernel_features & KERNEL_FEATURE_NODE_RAYTRACE) {
188 common_cflags += " --keep-device-functions";
189 }
190
191 return common_cflags;
192}
193
194void OptiXDevice::create_optix_module(TaskPool &pool,
195 OptixModuleCompileOptions &module_options,
196 string &ptx_data,
197 OptixModule &module,
198 OptixResult &result)
199{
200 OptixTask task = nullptr;
201 result = optixModuleCreateWithTasks(context,
202 &module_options,
203 &pipeline_options,
204 ptx_data.data(),
205 ptx_data.size(),
206 nullptr,
207 nullptr,
208 &module,
209 &task);
210 if (result == OPTIX_SUCCESS) {
211 execute_optix_task(pool, task, result);
212 }
213}
214
215bool OptiXDevice::load_kernels(const uint kernel_features)
216{
217 if (have_error()) {
218 /* Abort early if context creation failed already. */
219 return false;
220 }
221
222# ifdef WITH_OSL
223 /* TODO: Consider splitting kernels into an OSL-camera-only and a full-OSL variant. */
224 const bool use_osl_shading = (kernel_features & KERNEL_FEATURE_OSL_SHADING);
225 const bool use_osl_camera = (kernel_features & KERNEL_FEATURE_OSL_CAMERA);
226# else
227 const bool use_osl_shading = false;
228 const bool use_osl_camera = false;
229# endif
230
231 /* Skip creating OptiX module if only doing denoising. */
232 const bool need_optix_kernels = (kernel_features &
234
235 /* Detect existence of OptiX kernel and SDK here early. So we can error out
236 * before compiling the CUDA kernels, to avoid failing right after when
237 * compiling the OptiX kernel. */
238 string suffix = use_osl_shading ? "_osl" :
239 (kernel_features & (KERNEL_FEATURE_NODE_RAYTRACE | KERNEL_FEATURE_MNEE)) ?
240 "_shader_raytrace" :
241 "";
242 string ptx_filename;
243 if (need_optix_kernels) {
244 ptx_filename = path_get("lib/kernel_optix" + suffix + ".ptx.zst");
245 if (use_adaptive_compilation() || path_file_size(ptx_filename) == -1) {
246 std::string optix_include_dir = get_optix_include_dir();
247 if (optix_include_dir.empty()) {
248 set_error(
249 "Unable to compile OptiX kernels at runtime. Set OPTIX_ROOT_DIR environment variable "
250 "to a directory containing the OptiX SDK.");
251 return false;
252 }
253 if (!path_is_directory(optix_include_dir)) {
254 set_error(string_printf(
255 "OptiX headers not found at %s, unable to compile OptiX kernels at runtime. Install "
256 "OptiX SDK in the specified location, or set OPTIX_ROOT_DIR environment variable to a "
257 "directory containing the OptiX SDK.",
258 optix_include_dir.c_str()));
259 return false;
260 }
261 }
262 }
263
264 /* Load CUDA modules because we need some of the utility kernels. */
265 if (!CUDADevice::load_kernels(kernel_features)) {
266 return false;
267 }
268
269 if (!need_optix_kernels) {
270 return true;
271 }
272
273 const CUDAContextScope scope(this);
274
275 /* Unload existing OptiX module and pipelines first. */
276 if (optix_module != nullptr) {
277 optixModuleDestroy(optix_module);
278 optix_module = nullptr;
279 }
280 for (int i = 0; i < 2; ++i) {
281 if (builtin_modules[i] != nullptr) {
282 optixModuleDestroy(builtin_modules[i]);
283 builtin_modules[i] = nullptr;
284 }
285 }
286 for (int i = 0; i < NUM_PIPELINES; ++i) {
287 if (pipelines[i] != nullptr) {
288 optixPipelineDestroy(pipelines[i]);
289 pipelines[i] = nullptr;
290 }
291 }
292 for (int i = 0; i < NUM_PROGRAM_GROUPS; ++i) {
293 if (groups[i] != nullptr) {
294 optixProgramGroupDestroy(groups[i]);
295 groups[i] = nullptr;
296 }
297 }
298
299# ifdef WITH_OSL
300 if (osl_camera_module != nullptr) {
301 optixModuleDestroy(osl_camera_module);
302 osl_camera_module = nullptr;
303 }
304
305 /* Recreating base OptiX module invalidates all OSL modules too, since they link against it. */
306 for (const OptixModule &module : osl_modules) {
307 if (module != nullptr) {
308 optixModuleDestroy(module);
309 }
310 }
311 osl_modules.clear();
312
313 for (const OptixProgramGroup &group : osl_groups) {
314 if (group != nullptr) {
315 optixProgramGroupDestroy(group);
316 }
317 }
318 osl_groups.clear();
319# endif
320
321 OptixModuleCompileOptions module_options = {};
322 module_options.maxRegisterCount = 0; /* Do not set an explicit register limit. */
323
324 if (DebugFlags().optix.use_debug) {
325 module_options.optLevel = OPTIX_COMPILE_OPTIMIZATION_LEVEL_0;
326 module_options.debugLevel = OPTIX_COMPILE_DEBUG_LEVEL_FULL;
327 }
328 else {
329 module_options.optLevel = OPTIX_COMPILE_OPTIMIZATION_LEVEL_3;
330 module_options.debugLevel = OPTIX_COMPILE_DEBUG_LEVEL_NONE;
331 }
332
333 module_options.boundValues = nullptr;
334 module_options.numBoundValues = 0;
335 module_options.payloadTypes = nullptr;
336 module_options.numPayloadTypes = 0;
337
338 /* Default to no motion blur and two-level graph, since it is the fastest option. */
339 pipeline_options.usesMotionBlur = false;
340 pipeline_options.traversableGraphFlags =
341 OPTIX_TRAVERSABLE_GRAPH_FLAG_ALLOW_SINGLE_LEVEL_INSTANCING;
342 pipeline_options.numPayloadValues = 8;
343 pipeline_options.numAttributeValues = 2; /* u, v */
344 pipeline_options.exceptionFlags = OPTIX_EXCEPTION_FLAG_NONE;
345 pipeline_options.pipelineLaunchParamsVariableName = "kernel_params"; /* See globals.h */
346
347 pipeline_options.usesPrimitiveTypeFlags = OPTIX_PRIMITIVE_TYPE_FLAGS_TRIANGLE;
348 if (kernel_features & KERNEL_FEATURE_HAIR_THICK) {
349 pipeline_options.usesPrimitiveTypeFlags |= OPTIX_PRIMITIVE_TYPE_FLAGS_ROUND_LINEAR |
350 OPTIX_PRIMITIVE_TYPE_FLAGS_ROUND_CATMULLROM;
351 }
352 if (kernel_features & (KERNEL_FEATURE_HAIR_RIBBON | KERNEL_FEATURE_POINTCLOUD)) {
353 pipeline_options.usesPrimitiveTypeFlags |= OPTIX_PRIMITIVE_TYPE_FLAGS_CUSTOM;
354 }
355
356 /* Keep track of whether motion blur is enabled, so to enable/disable motion in BVH builds
357 * This is necessary since objects may be reported to have motion if the Vector pass is
358 * active, but may still need to be rendered without motion blur if that isn't active as well. */
359 if (kernel_features & KERNEL_FEATURE_OBJECT_MOTION) {
360 pipeline_options.usesMotionBlur = true;
361 /* Motion blur can insert motion transforms into the traversal graph.
362 * It is no longer a two-level graph then, so need to set flags to allow any configuration. */
363 pipeline_options.traversableGraphFlags = OPTIX_TRAVERSABLE_GRAPH_FLAG_ALLOW_ANY;
364 }
365
366 { /* Load and compile PTX module with OptiX kernels. */
367 string ptx_data;
368 if (use_adaptive_compilation() || path_file_size(ptx_filename) == -1) {
369 string cflags = compile_kernel_get_common_cflags(kernel_features);
370 ptx_filename = compile_kernel(cflags, ("kernel" + suffix).c_str(), "optix", true);
371 }
372 if (ptx_filename.empty() || !path_read_compressed_text(ptx_filename, ptx_data)) {
373 set_error(string_printf("Failed to load OptiX kernel from '%s'", ptx_filename.c_str()));
374 return false;
375 }
376
377 TaskPool pool;
378 OptixResult result;
379 create_optix_module(pool, module_options, ptx_data, optix_module, result);
380 pool.wait_work();
381 if (result != OPTIX_SUCCESS) {
382 set_error(string_printf("Failed to load OptiX kernel from '%s' (%s)",
383 ptx_filename.c_str(),
384 optixGetErrorName(result)));
385 return false;
386 }
387 }
388
389 /* Create program groups. */
390 OptixProgramGroupDesc group_descs[NUM_PROGRAM_GROUPS] = {};
391 OptixProgramGroupOptions group_options = {}; /* There are no options currently. */
392 group_descs[PG_RGEN_INTERSECT_CLOSEST].kind = OPTIX_PROGRAM_GROUP_KIND_RAYGEN;
393 group_descs[PG_RGEN_INTERSECT_CLOSEST].raygen.module = optix_module;
394 group_descs[PG_RGEN_INTERSECT_CLOSEST].raygen.entryFunctionName =
395 "__raygen__kernel_optix_integrator_intersect_closest";
396 group_descs[PG_RGEN_INTERSECT_SHADOW].kind = OPTIX_PROGRAM_GROUP_KIND_RAYGEN;
397 group_descs[PG_RGEN_INTERSECT_SHADOW].raygen.module = optix_module;
398 group_descs[PG_RGEN_INTERSECT_SHADOW].raygen.entryFunctionName =
399 "__raygen__kernel_optix_integrator_intersect_shadow";
400 group_descs[PG_RGEN_INTERSECT_SUBSURFACE].kind = OPTIX_PROGRAM_GROUP_KIND_RAYGEN;
401 group_descs[PG_RGEN_INTERSECT_SUBSURFACE].raygen.module = optix_module;
402 group_descs[PG_RGEN_INTERSECT_SUBSURFACE].raygen.entryFunctionName =
403 "__raygen__kernel_optix_integrator_intersect_subsurface";
404 group_descs[PG_RGEN_INTERSECT_VOLUME_STACK].kind = OPTIX_PROGRAM_GROUP_KIND_RAYGEN;
405 group_descs[PG_RGEN_INTERSECT_VOLUME_STACK].raygen.module = optix_module;
406 group_descs[PG_RGEN_INTERSECT_VOLUME_STACK].raygen.entryFunctionName =
407 "__raygen__kernel_optix_integrator_intersect_volume_stack";
408 group_descs[PG_RGEN_INTERSECT_DEDICATED_LIGHT].kind = OPTIX_PROGRAM_GROUP_KIND_RAYGEN;
409 group_descs[PG_RGEN_INTERSECT_DEDICATED_LIGHT].raygen.module = optix_module;
410 group_descs[PG_RGEN_INTERSECT_DEDICATED_LIGHT].raygen.entryFunctionName =
411 "__raygen__kernel_optix_integrator_intersect_dedicated_light";
412 group_descs[PG_MISS].kind = OPTIX_PROGRAM_GROUP_KIND_MISS;
413 group_descs[PG_MISS].miss.module = optix_module;
414 group_descs[PG_MISS].miss.entryFunctionName = "__miss__kernel_optix_miss";
415 group_descs[PG_HITD].kind = OPTIX_PROGRAM_GROUP_KIND_HITGROUP;
416 group_descs[PG_HITD].hitgroup.moduleCH = optix_module;
417 group_descs[PG_HITD].hitgroup.entryFunctionNameCH = "__closesthit__kernel_optix_hit";
418 group_descs[PG_HITD].hitgroup.moduleAH = optix_module;
419 group_descs[PG_HITD].hitgroup.entryFunctionNameAH = "__anyhit__kernel_optix_visibility_test";
420 group_descs[PG_HITS].kind = OPTIX_PROGRAM_GROUP_KIND_HITGROUP;
421 group_descs[PG_HITS].hitgroup.moduleAH = optix_module;
422 group_descs[PG_HITS].hitgroup.entryFunctionNameAH = "__anyhit__kernel_optix_shadow_all_hit";
423 group_descs[PG_HITV].kind = OPTIX_PROGRAM_GROUP_KIND_HITGROUP;
424 group_descs[PG_HITV].hitgroup.moduleCH = optix_module;
425 group_descs[PG_HITV].hitgroup.entryFunctionNameCH = "__closesthit__kernel_optix_hit";
426 group_descs[PG_HITV].hitgroup.moduleAH = optix_module;
427 group_descs[PG_HITV].hitgroup.entryFunctionNameAH = "__anyhit__kernel_optix_volume_test";
428
429 OptixProgramGroupDesc ignore_desc = {};
430 ignore_desc.kind = OPTIX_PROGRAM_GROUP_KIND_HITGROUP;
431 ignore_desc.hitgroup.moduleCH = optix_module;
432 ignore_desc.hitgroup.entryFunctionNameCH = "__closesthit__kernel_optix_ignore";
433 ignore_desc.hitgroup.moduleAH = optix_module;
434 ignore_desc.hitgroup.entryFunctionNameAH = "__anyhit__kernel_optix_ignore";
435
436 if (kernel_features & KERNEL_FEATURE_HAIR_THICK) {
437 /* Built-in thick curve intersection. */
438 OptixBuiltinISOptions builtin_options = {};
439 builtin_options.builtinISModuleType = OPTIX_PRIMITIVE_TYPE_ROUND_CATMULLROM;
440 builtin_options.buildFlags = OPTIX_BUILD_FLAG_PREFER_FAST_TRACE |
441 OPTIX_BUILD_FLAG_ALLOW_COMPACTION | OPTIX_BUILD_FLAG_ALLOW_UPDATE;
442 builtin_options.curveEndcapFlags = OPTIX_CURVE_ENDCAP_DEFAULT; /* Disable end-caps. */
443 builtin_options.usesMotionBlur = false;
444
445 optix_assert(optixBuiltinISModuleGet(
446 context, &module_options, &pipeline_options, &builtin_options, &builtin_modules[0]));
447
448 group_descs[PG_HITD].hitgroup.moduleIS = builtin_modules[0];
449 group_descs[PG_HITD].hitgroup.entryFunctionNameIS = nullptr;
450 group_descs[PG_HITS].hitgroup.moduleIS = builtin_modules[0];
451 group_descs[PG_HITS].hitgroup.entryFunctionNameIS = nullptr;
452
453 if (pipeline_options.usesMotionBlur) {
454 builtin_options.usesMotionBlur = true;
455
456 optix_assert(optixBuiltinISModuleGet(
457 context, &module_options, &pipeline_options, &builtin_options, &builtin_modules[1]));
458
459 group_descs[PG_HITD_MOTION] = group_descs[PG_HITD];
460 group_descs[PG_HITD_MOTION].hitgroup.moduleIS = builtin_modules[1];
461 group_descs[PG_HITS_MOTION] = group_descs[PG_HITS];
462 group_descs[PG_HITS_MOTION].hitgroup.moduleIS = builtin_modules[1];
463 }
464
465 builtin_options.builtinISModuleType = OPTIX_PRIMITIVE_TYPE_ROUND_LINEAR;
466 builtin_options.usesMotionBlur = false;
467
468 optix_assert(optixBuiltinISModuleGet(
469 context, &module_options, &pipeline_options, &builtin_options, &builtin_modules[2]));
470
471 group_descs[PG_HITD_CURVE_LINEAR] = group_descs[PG_HITD];
472 group_descs[PG_HITD_CURVE_LINEAR].hitgroup.moduleIS = builtin_modules[2];
473 group_descs[PG_HITS_CURVE_LINEAR] = group_descs[PG_HITS];
474 group_descs[PG_HITS_CURVE_LINEAR].hitgroup.moduleIS = builtin_modules[2];
475 group_descs[PG_HITV_CURVE_LINEAR] = ignore_desc;
476 group_descs[PG_HITL_CURVE_LINEAR] = ignore_desc;
477
478 if (pipeline_options.usesMotionBlur) {
479 builtin_options.usesMotionBlur = true;
480
481 optix_assert(optixBuiltinISModuleGet(
482 context, &module_options, &pipeline_options, &builtin_options, &builtin_modules[3]));
483
484 group_descs[PG_HITD_CURVE_LINEAR_MOTION] = group_descs[PG_HITD_CURVE_LINEAR];
485 group_descs[PG_HITD_CURVE_LINEAR_MOTION].hitgroup.moduleIS = builtin_modules[3];
486 group_descs[PG_HITS_CURVE_LINEAR_MOTION] = group_descs[PG_HITS_CURVE_LINEAR];
487 group_descs[PG_HITS_CURVE_LINEAR_MOTION].hitgroup.moduleIS = builtin_modules[3];
488 group_descs[PG_HITV_CURVE_LINEAR_MOTION] = ignore_desc;
489 group_descs[PG_HITL_CURVE_LINEAR_MOTION] = ignore_desc;
490 }
491 }
492 if (kernel_features & KERNEL_FEATURE_HAIR_RIBBON) {
493 /* Custom ribbon intersection. */
494 group_descs[PG_HITD_CURVE_RIBBON] = group_descs[PG_HITD];
495 group_descs[PG_HITD_CURVE_RIBBON].kind = OPTIX_PROGRAM_GROUP_KIND_HITGROUP;
496 group_descs[PG_HITD_CURVE_RIBBON].hitgroup.moduleIS = optix_module;
497 group_descs[PG_HITD_CURVE_RIBBON].hitgroup.entryFunctionNameIS =
498 "__intersection__curve_ribbon";
499 group_descs[PG_HITS_CURVE_RIBBON] = group_descs[PG_HITS];
500 group_descs[PG_HITS_CURVE_RIBBON].kind = OPTIX_PROGRAM_GROUP_KIND_HITGROUP;
501 group_descs[PG_HITS_CURVE_RIBBON].hitgroup.moduleIS = optix_module;
502 group_descs[PG_HITS_CURVE_RIBBON].hitgroup.entryFunctionNameIS =
503 "__intersection__curve_ribbon";
504 group_descs[PG_HITV_CURVE_RIBBON] = ignore_desc;
505 group_descs[PG_HITL_CURVE_RIBBON] = ignore_desc;
506 }
507
508 if (kernel_features & KERNEL_FEATURE_POINTCLOUD) {
509 group_descs[PG_HITD_POINTCLOUD] = group_descs[PG_HITD];
510 group_descs[PG_HITD_POINTCLOUD].kind = OPTIX_PROGRAM_GROUP_KIND_HITGROUP;
511 group_descs[PG_HITD_POINTCLOUD].hitgroup.moduleIS = optix_module;
512 group_descs[PG_HITD_POINTCLOUD].hitgroup.entryFunctionNameIS = "__intersection__point";
513 group_descs[PG_HITS_POINTCLOUD] = group_descs[PG_HITS];
514 group_descs[PG_HITS_POINTCLOUD].kind = OPTIX_PROGRAM_GROUP_KIND_HITGROUP;
515 group_descs[PG_HITS_POINTCLOUD].hitgroup.moduleIS = optix_module;
516 group_descs[PG_HITS_POINTCLOUD].hitgroup.entryFunctionNameIS = "__intersection__point";
517 group_descs[PG_HITV_POINTCLOUD] = ignore_desc;
518 group_descs[PG_HITL_POINTCLOUD] = ignore_desc;
519 }
520
521 /* Add hit group for local intersections. */
523 group_descs[PG_HITL].kind = OPTIX_PROGRAM_GROUP_KIND_HITGROUP;
524 group_descs[PG_HITL].hitgroup.moduleAH = optix_module;
525 group_descs[PG_HITL].hitgroup.entryFunctionNameAH = "__anyhit__kernel_optix_local_hit";
526 }
527
528 /* Shader ray-tracing replaces some functions with direct callables. */
529 if (kernel_features & KERNEL_FEATURE_NODE_RAYTRACE) {
530 group_descs[PG_RGEN_SHADE_SURFACE_RAYTRACE].kind = OPTIX_PROGRAM_GROUP_KIND_RAYGEN;
531 group_descs[PG_RGEN_SHADE_SURFACE_RAYTRACE].raygen.module = optix_module;
532 group_descs[PG_RGEN_SHADE_SURFACE_RAYTRACE].raygen.entryFunctionName =
533 "__raygen__kernel_optix_integrator_shade_surface_raytrace";
534
535 /* Kernels with OSL shading support are built without SVM, so can skip those direct callables
536 * there. */
537 if (!use_osl_shading) {
538 group_descs[PG_CALL_SVM_AO].kind = OPTIX_PROGRAM_GROUP_KIND_CALLABLES;
539 group_descs[PG_CALL_SVM_AO].callables.moduleDC = optix_module;
540 group_descs[PG_CALL_SVM_AO].callables.entryFunctionNameDC = "__direct_callable__svm_node_ao";
541 group_descs[PG_CALL_SVM_BEVEL].kind = OPTIX_PROGRAM_GROUP_KIND_CALLABLES;
542 group_descs[PG_CALL_SVM_BEVEL].callables.moduleDC = optix_module;
543 group_descs[PG_CALL_SVM_BEVEL].callables.entryFunctionNameDC =
544 "__direct_callable__svm_node_bevel";
545 }
546 }
547
548 if (kernel_features & KERNEL_FEATURE_MNEE) {
549 group_descs[PG_RGEN_SHADE_SURFACE_MNEE].kind = OPTIX_PROGRAM_GROUP_KIND_RAYGEN;
550 group_descs[PG_RGEN_SHADE_SURFACE_MNEE].raygen.module = optix_module;
551 group_descs[PG_RGEN_SHADE_SURFACE_MNEE].raygen.entryFunctionName =
552 "__raygen__kernel_optix_integrator_shade_surface_mnee";
553 }
554
555 /* OSL uses direct callables to execute, so shading needs to be done in OptiX if OSL is used. */
556 if (use_osl_shading) {
557 group_descs[PG_RGEN_SHADE_BACKGROUND].kind = OPTIX_PROGRAM_GROUP_KIND_RAYGEN;
558 group_descs[PG_RGEN_SHADE_BACKGROUND].raygen.module = optix_module;
559 group_descs[PG_RGEN_SHADE_BACKGROUND].raygen.entryFunctionName =
560 "__raygen__kernel_optix_integrator_shade_background";
561 group_descs[PG_RGEN_SHADE_LIGHT].kind = OPTIX_PROGRAM_GROUP_KIND_RAYGEN;
562 group_descs[PG_RGEN_SHADE_LIGHT].raygen.module = optix_module;
563 group_descs[PG_RGEN_SHADE_LIGHT].raygen.entryFunctionName =
564 "__raygen__kernel_optix_integrator_shade_light";
565 group_descs[PG_RGEN_SHADE_SURFACE].kind = OPTIX_PROGRAM_GROUP_KIND_RAYGEN;
566 group_descs[PG_RGEN_SHADE_SURFACE].raygen.module = optix_module;
567 group_descs[PG_RGEN_SHADE_SURFACE].raygen.entryFunctionName =
568 "__raygen__kernel_optix_integrator_shade_surface";
569 group_descs[PG_RGEN_SHADE_VOLUME].kind = OPTIX_PROGRAM_GROUP_KIND_RAYGEN;
570 group_descs[PG_RGEN_SHADE_VOLUME].raygen.module = optix_module;
571 group_descs[PG_RGEN_SHADE_VOLUME].raygen.entryFunctionName =
572 "__raygen__kernel_optix_integrator_shade_volume";
573 group_descs[PG_RGEN_SHADE_VOLUME_RAY_MARCHING].kind = OPTIX_PROGRAM_GROUP_KIND_RAYGEN;
574 group_descs[PG_RGEN_SHADE_VOLUME_RAY_MARCHING].raygen.module = optix_module;
575 group_descs[PG_RGEN_SHADE_VOLUME_RAY_MARCHING].raygen.entryFunctionName =
576 "__raygen__kernel_optix_integrator_shade_volume_ray_marching";
577 group_descs[PG_RGEN_SHADE_SHADOW].kind = OPTIX_PROGRAM_GROUP_KIND_RAYGEN;
578 group_descs[PG_RGEN_SHADE_SHADOW].raygen.module = optix_module;
579 group_descs[PG_RGEN_SHADE_SHADOW].raygen.entryFunctionName =
580 "__raygen__kernel_optix_integrator_shade_shadow";
581 group_descs[PG_RGEN_SHADE_DEDICATED_LIGHT].kind = OPTIX_PROGRAM_GROUP_KIND_RAYGEN;
582 group_descs[PG_RGEN_SHADE_DEDICATED_LIGHT].raygen.module = optix_module;
583 group_descs[PG_RGEN_SHADE_DEDICATED_LIGHT].raygen.entryFunctionName =
584 "__raygen__kernel_optix_integrator_shade_dedicated_light";
585 group_descs[PG_RGEN_EVAL_DISPLACE].kind = OPTIX_PROGRAM_GROUP_KIND_RAYGEN;
586 group_descs[PG_RGEN_EVAL_DISPLACE].raygen.module = optix_module;
587 group_descs[PG_RGEN_EVAL_DISPLACE].raygen.entryFunctionName =
588 "__raygen__kernel_optix_shader_eval_displace";
589 group_descs[PG_RGEN_EVAL_BACKGROUND].kind = OPTIX_PROGRAM_GROUP_KIND_RAYGEN;
590 group_descs[PG_RGEN_EVAL_BACKGROUND].raygen.module = optix_module;
591 group_descs[PG_RGEN_EVAL_BACKGROUND].raygen.entryFunctionName =
592 "__raygen__kernel_optix_shader_eval_background";
593 group_descs[PG_RGEN_EVAL_CURVE_SHADOW_TRANSPARENCY].kind = OPTIX_PROGRAM_GROUP_KIND_RAYGEN;
594 group_descs[PG_RGEN_EVAL_CURVE_SHADOW_TRANSPARENCY].raygen.module = optix_module;
595 group_descs[PG_RGEN_EVAL_CURVE_SHADOW_TRANSPARENCY].raygen.entryFunctionName =
596 "__raygen__kernel_optix_shader_eval_curve_shadow_transparency";
597 group_descs[PG_RGEN_EVAL_VOLUME_DENSITY].kind = OPTIX_PROGRAM_GROUP_KIND_RAYGEN;
598 group_descs[PG_RGEN_EVAL_VOLUME_DENSITY].raygen.module = optix_module;
599 group_descs[PG_RGEN_EVAL_VOLUME_DENSITY].raygen.entryFunctionName =
600 "__raygen__kernel_optix_shader_eval_volume_density";
601 }
602
603# ifdef WITH_OSL
604 /* When using custom OSL cameras, integrator_init_from_camera is its own specialized module. */
605 if (use_osl_camera) {
606 /* Load and compile the OSL camera PTX module. */
607 string ptx_data, ptx_filename = path_get("lib/kernel_optix_osl_camera.ptx.zst");
608 if (!path_read_compressed_text(ptx_filename, ptx_data)) {
609 set_error(
610 string_printf("Failed to load OptiX OSL camera kernel from '%s'", ptx_filename.c_str()));
611 return false;
612 }
613
614 TaskPool pool;
615 OptixResult result;
616 create_optix_module(pool, module_options, ptx_data, osl_camera_module, result);
617 pool.wait_work();
618 if (result != OPTIX_SUCCESS) {
619 set_error(string_printf("Failed to load OptiX kernel from '%s' (%s)",
620 ptx_filename.c_str(),
621 optixGetErrorName(result)));
622 return false;
623 }
624
625 group_descs[PG_RGEN_INIT_FROM_CAMERA].kind = OPTIX_PROGRAM_GROUP_KIND_RAYGEN;
626 group_descs[PG_RGEN_INIT_FROM_CAMERA].raygen.module = osl_camera_module;
627 group_descs[PG_RGEN_INIT_FROM_CAMERA].raygen.entryFunctionName =
628 "__raygen__kernel_optix_integrator_init_from_camera";
629 }
630# endif
631
632 optix_assert(optixProgramGroupCreate(
633 context, group_descs, NUM_PROGRAM_GROUPS, &group_options, nullptr, nullptr, groups));
634
635 /* Get program stack sizes. */
636 OptixStackSizes stack_size[NUM_PROGRAM_GROUPS] = {};
637 /* Set up SBT, which in this case is used only to select between different programs. */
638 sbt_data.alloc(NUM_PROGRAM_GROUPS);
639 memset(sbt_data.host_pointer, 0, sizeof(SbtRecord) * NUM_PROGRAM_GROUPS);
640 for (int i = 0; i < NUM_PROGRAM_GROUPS; ++i) {
641 optix_assert(optixSbtRecordPackHeader(groups[i], &sbt_data[i]));
642 optix_assert(optixProgramGroupGetStackSize(groups[i], &stack_size[i], nullptr));
643 }
644 sbt_data.copy_to_device(); /* Upload SBT to device. */
645
646 /* Calculate maximum trace continuation stack size. */
647 unsigned int trace_css = stack_size[PG_HITD].cssCH;
648 /* This is based on the maximum of closest-hit and any-hit/intersection programs. */
649 trace_css = std::max(trace_css, stack_size[PG_HITD].cssIS + stack_size[PG_HITD].cssAH);
650 trace_css = std::max(trace_css, stack_size[PG_HITS].cssIS + stack_size[PG_HITS].cssAH);
651 trace_css = std::max(trace_css, stack_size[PG_HITL].cssIS + stack_size[PG_HITL].cssAH);
652 trace_css = std::max(trace_css, stack_size[PG_HITV].cssIS + stack_size[PG_HITV].cssAH);
653 trace_css = std::max(trace_css,
654 stack_size[PG_HITD_MOTION].cssIS + stack_size[PG_HITD_MOTION].cssAH);
655 trace_css = std::max(trace_css,
656 stack_size[PG_HITS_MOTION].cssIS + stack_size[PG_HITS_MOTION].cssAH);
657 trace_css = std::max(
658 trace_css, stack_size[PG_HITD_CURVE_LINEAR].cssIS + stack_size[PG_HITD_CURVE_LINEAR].cssAH);
659 trace_css = std::max(
660 trace_css, stack_size[PG_HITS_CURVE_LINEAR].cssIS + stack_size[PG_HITS_CURVE_LINEAR].cssAH);
661 trace_css = std::max(trace_css,
662 stack_size[PG_HITD_CURVE_LINEAR_MOTION].cssIS +
663 stack_size[PG_HITD_CURVE_LINEAR_MOTION].cssAH);
664 trace_css = std::max(trace_css,
665 stack_size[PG_HITS_CURVE_LINEAR_MOTION].cssIS +
666 stack_size[PG_HITS_CURVE_LINEAR_MOTION].cssAH);
667 trace_css = std::max(
668 trace_css, stack_size[PG_HITD_CURVE_RIBBON].cssIS + stack_size[PG_HITD_CURVE_RIBBON].cssAH);
669 trace_css = std::max(
670 trace_css, stack_size[PG_HITS_CURVE_RIBBON].cssIS + stack_size[PG_HITS_CURVE_RIBBON].cssAH);
671 trace_css = std::max(
672 trace_css, stack_size[PG_HITD_POINTCLOUD].cssIS + stack_size[PG_HITD_POINTCLOUD].cssAH);
673 trace_css = std::max(
674 trace_css, stack_size[PG_HITS_POINTCLOUD].cssIS + stack_size[PG_HITS_POINTCLOUD].cssAH);
675
676 OptixPipelineLinkOptions link_options = {};
677 link_options.maxTraceDepth = 1;
678
679 if (use_osl_shading || use_osl_camera) {
680 /* OSL kernels will be (re)created on by OSL manager. */
681 }
682 else if (kernel_features & (KERNEL_FEATURE_NODE_RAYTRACE | KERNEL_FEATURE_MNEE)) {
683 /* Create shader ray-tracing and MNEE pipeline. */
684 vector<OptixProgramGroup> pipeline_groups;
685 pipeline_groups.reserve(NUM_PROGRAM_GROUPS);
686 if (kernel_features & KERNEL_FEATURE_NODE_RAYTRACE) {
687 pipeline_groups.push_back(groups[PG_RGEN_SHADE_SURFACE_RAYTRACE]);
688 pipeline_groups.push_back(groups[PG_CALL_SVM_AO]);
689 pipeline_groups.push_back(groups[PG_CALL_SVM_BEVEL]);
690 }
691 if (kernel_features & KERNEL_FEATURE_MNEE) {
692 pipeline_groups.push_back(groups[PG_RGEN_SHADE_SURFACE_MNEE]);
693 }
694 pipeline_groups.push_back(groups[PG_MISS]);
695 pipeline_groups.push_back(groups[PG_HITD]);
696 pipeline_groups.push_back(groups[PG_HITS]);
697 pipeline_groups.push_back(groups[PG_HITL]);
698 pipeline_groups.push_back(groups[PG_HITV]);
699 if (pipeline_options.usesMotionBlur) {
700 pipeline_groups.push_back(groups[PG_HITD_MOTION]);
701 pipeline_groups.push_back(groups[PG_HITS_MOTION]);
702 pipeline_groups.push_back(groups[PG_HITV_MOTION]);
703 pipeline_groups.push_back(groups[PG_HITL_MOTION]);
704 }
705 if (kernel_features & KERNEL_FEATURE_HAIR_THICK) {
706 pipeline_groups.push_back(groups[PG_HITD_CURVE_LINEAR]);
707 pipeline_groups.push_back(groups[PG_HITS_CURVE_LINEAR]);
708 pipeline_groups.push_back(groups[PG_HITV_CURVE_LINEAR]);
709 pipeline_groups.push_back(groups[PG_HITL_CURVE_LINEAR]);
710 if (pipeline_options.usesMotionBlur) {
711 pipeline_groups.push_back(groups[PG_HITD_CURVE_LINEAR_MOTION]);
712 pipeline_groups.push_back(groups[PG_HITS_CURVE_LINEAR_MOTION]);
713 pipeline_groups.push_back(groups[PG_HITV_CURVE_LINEAR_MOTION]);
714 pipeline_groups.push_back(groups[PG_HITL_CURVE_LINEAR_MOTION]);
715 }
716 }
717 if (kernel_features & KERNEL_FEATURE_HAIR_RIBBON) {
718 pipeline_groups.push_back(groups[PG_HITD_CURVE_RIBBON]);
719 pipeline_groups.push_back(groups[PG_HITS_CURVE_RIBBON]);
720 pipeline_groups.push_back(groups[PG_HITV_CURVE_RIBBON]);
721 pipeline_groups.push_back(groups[PG_HITL_CURVE_RIBBON]);
722 }
723 if (kernel_features & KERNEL_FEATURE_POINTCLOUD) {
724 pipeline_groups.push_back(groups[PG_HITD_POINTCLOUD]);
725 pipeline_groups.push_back(groups[PG_HITS_POINTCLOUD]);
726 pipeline_groups.push_back(groups[PG_HITV_POINTCLOUD]);
727 pipeline_groups.push_back(groups[PG_HITL_POINTCLOUD]);
728 }
729
730 optix_assert(optixPipelineCreate(context,
731 &pipeline_options,
732 &link_options,
733 pipeline_groups.data(),
734 pipeline_groups.size(),
735 nullptr,
736 nullptr,
737 &pipelines[PIP_SHADE]));
738
739 /* Combine ray generation and trace continuation stack size. */
740 const unsigned int css = std::max(stack_size[PG_RGEN_SHADE_SURFACE_RAYTRACE].cssRG,
741 stack_size[PG_RGEN_SHADE_SURFACE_MNEE].cssRG) +
742 link_options.maxTraceDepth * trace_css;
743 const unsigned int dss = std::max(stack_size[PG_CALL_SVM_AO].dssDC,
744 stack_size[PG_CALL_SVM_BEVEL].dssDC);
745
746 /* Set stack size depending on pipeline options. */
747 optix_assert(optixPipelineSetStackSize(
748 pipelines[PIP_SHADE], 0, dss, css, pipeline_options.usesMotionBlur ? 3 : 2));
749 }
750
751 { /* Create intersection-only pipeline. */
752 vector<OptixProgramGroup> pipeline_groups;
753 pipeline_groups.reserve(NUM_PROGRAM_GROUPS);
754 pipeline_groups.push_back(groups[PG_RGEN_INTERSECT_CLOSEST]);
755 pipeline_groups.push_back(groups[PG_RGEN_INTERSECT_SHADOW]);
756 pipeline_groups.push_back(groups[PG_RGEN_INTERSECT_SUBSURFACE]);
757 pipeline_groups.push_back(groups[PG_RGEN_INTERSECT_VOLUME_STACK]);
758 pipeline_groups.push_back(groups[PG_RGEN_INTERSECT_DEDICATED_LIGHT]);
759 pipeline_groups.push_back(groups[PG_MISS]);
760 pipeline_groups.push_back(groups[PG_HITD]);
761 pipeline_groups.push_back(groups[PG_HITS]);
762 pipeline_groups.push_back(groups[PG_HITL]);
763 pipeline_groups.push_back(groups[PG_HITV]);
764 if (pipeline_options.usesMotionBlur) {
765 pipeline_groups.push_back(groups[PG_HITD_MOTION]);
766 pipeline_groups.push_back(groups[PG_HITS_MOTION]);
767 }
768 if (kernel_features & KERNEL_FEATURE_HAIR_THICK) {
769 pipeline_groups.push_back(groups[PG_HITD_CURVE_LINEAR]);
770 pipeline_groups.push_back(groups[PG_HITS_CURVE_LINEAR]);
771 if (pipeline_options.usesMotionBlur) {
772 pipeline_groups.push_back(groups[PG_HITD_CURVE_LINEAR_MOTION]);
773 pipeline_groups.push_back(groups[PG_HITS_CURVE_LINEAR_MOTION]);
774 }
775 }
776 if (kernel_features & KERNEL_FEATURE_HAIR_RIBBON) {
777 pipeline_groups.push_back(groups[PG_HITD_CURVE_RIBBON]);
778 pipeline_groups.push_back(groups[PG_HITS_CURVE_RIBBON]);
779 }
780 if (kernel_features & KERNEL_FEATURE_POINTCLOUD) {
781 pipeline_groups.push_back(groups[PG_HITD_POINTCLOUD]);
782 pipeline_groups.push_back(groups[PG_HITS_POINTCLOUD]);
783 }
784
785 optix_assert(optixPipelineCreate(context,
786 &pipeline_options,
787 &link_options,
788 pipeline_groups.data(),
789 pipeline_groups.size(),
790 nullptr,
791 nullptr,
792 &pipelines[PIP_INTERSECT]));
793
794 /* Calculate continuation stack size based on the maximum of all ray generation stack sizes. */
795 const unsigned int css =
796 std::max(stack_size[PG_RGEN_INTERSECT_CLOSEST].cssRG,
797 std::max(stack_size[PG_RGEN_INTERSECT_SHADOW].cssRG,
798 std::max(stack_size[PG_RGEN_INTERSECT_SUBSURFACE].cssRG,
799 stack_size[PG_RGEN_INTERSECT_VOLUME_STACK].cssRG))) +
800 link_options.maxTraceDepth * trace_css;
801
802 optix_assert(optixPipelineSetStackSize(
803 pipelines[PIP_INTERSECT], 0, 0, css, pipeline_options.usesMotionBlur ? 3 : 2));
804 }
805
806 return !have_error();
807}
808
809bool OptiXDevice::load_osl_kernels()
810{
811# ifdef WITH_OSL
812 if (have_error()) {
813 return false;
814 }
815
816 struct OSLKernel {
817 string ptx;
818 string fused_entry;
819 };
820
821 auto get_osl_kernel = [&](const OSL::ShaderGroupRef &group) {
822 if (!group) {
823 return OSLKernel{};
824 }
825 string osl_ptx, fused_name;
826 osl_globals.ss->getattribute(group.get(), "group_fused_name", fused_name);
827 osl_globals.ss->getattribute(
828 group.get(), "ptx_compiled_version", OSL::TypeDesc::PTR, &osl_ptx);
829
830 int groupdata_size = 0;
831 osl_globals.ss->getattribute(group.get(), "llvm_groupdata_size", groupdata_size);
832 if (groupdata_size == 0) {
833 // Old attribute name from our patched OSL version as fallback.
834 osl_globals.ss->getattribute(group.get(), "groupdata_size", groupdata_size);
835 }
836 if (groupdata_size > 2048) { /* See 'group_data' array in kernel/osl/osl.h */
837 set_error(
838 string_printf("Requested OSL group data size (%d) is greater than the maximum "
839 "supported with OptiX (2048)",
840 groupdata_size));
841 return OSLKernel{};
842 }
843
844 return OSLKernel{std::move(osl_ptx), std::move(fused_name)};
845 };
846
847 /* This has to be in the same order as the ShaderType enum, so that the index calculation in
848 * osl_eval_nodes checks out */
849 vector<OSLKernel> osl_kernels;
850 osl_kernels.emplace_back(get_osl_kernel(osl_globals.camera_state));
851 for (const OSL::ShaderGroupRef &group : osl_globals.surface_state) {
852 osl_kernels.emplace_back(get_osl_kernel(group));
853 }
854 for (const OSL::ShaderGroupRef &group : osl_globals.volume_state) {
855 osl_kernels.emplace_back(get_osl_kernel(group));
856 }
857 for (const OSL::ShaderGroupRef &group : osl_globals.displacement_state) {
858 osl_kernels.emplace_back(get_osl_kernel(group));
859 }
860 for (const OSL::ShaderGroupRef &group : osl_globals.bump_state) {
861 osl_kernels.emplace_back(get_osl_kernel(group));
862 }
863
864 if (have_error()) {
865 return false;
866 }
867
868 const CUDAContextScope scope(this);
869
870 if (pipelines[PIP_SHADE]) {
871 optixPipelineDestroy(pipelines[PIP_SHADE]);
872 }
873
874 for (OptixModule &module : osl_modules) {
875 if (module != nullptr) {
876 optixModuleDestroy(module);
877 module = nullptr;
878 }
879 }
880 for (OptixProgramGroup &group : osl_groups) {
881 if (group != nullptr) {
882 optixProgramGroupDestroy(group);
883 group = nullptr;
884 }
885 }
886
887 /* We always need to reserve a spot for the camera shader group, but if it's unused
888 * and there are no other shader groups, we can skip creating the pipeline. */
889 if (osl_kernels.size() == 1 && osl_kernels[0].ptx.empty()) {
890 return true;
891 }
892
893 OptixProgramGroupOptions group_options = {}; /* There are no options currently. */
894 OptixModuleCompileOptions module_options = {};
895 module_options.optLevel = OPTIX_COMPILE_OPTIMIZATION_LEVEL_3;
896 module_options.debugLevel = OPTIX_COMPILE_DEBUG_LEVEL_NONE;
897
898 /* In addition to the modules for each OSL group, we need to load our own osl_services.ptx
899 * as well as the shadeops.ptx that's embedded in OSL. */
900 size_t id_osl_services = osl_kernels.size();
901 size_t id_osl_shadeops = osl_kernels.size() + 1;
902 osl_groups.resize(osl_kernels.size() + 2);
903 osl_modules.resize(osl_kernels.size() + 2);
904
905 { /* Load and compile PTX module with OSL services. */
906 string osl_services_ptx, ptx_filename = path_get("lib/kernel_optix_osl_services.ptx.zst");
907 if (!path_read_compressed_text(ptx_filename, osl_services_ptx)) {
908 set_error(string_printf("Failed to load OptiX OSL services kernel from '%s'",
909 ptx_filename.c_str()));
910 return false;
911 }
912
913 const char *shadeops_ptx_ptr = nullptr;
914 osl_globals.ss->getattribute("shadeops_cuda_ptx", OSL::TypeDesc::PTR, &shadeops_ptx_ptr);
915 int shadeops_ptx_size = 0;
916 osl_globals.ss->getattribute("shadeops_cuda_ptx_size", OSL::TypeDesc::INT, &shadeops_ptx_size);
917 string shadeops_ptx(shadeops_ptx_ptr, shadeops_ptx_size);
918
919 TaskPool pool;
920 OptixResult services_result, shadeops_result;
921 create_optix_module(
922 pool, module_options, osl_services_ptx, osl_modules[id_osl_services], services_result);
923 create_optix_module(
924 pool, module_options, shadeops_ptx, osl_modules[id_osl_shadeops], shadeops_result);
925 pool.wait_work();
926
927 {
928 if (services_result != OPTIX_SUCCESS) {
929 set_error(string_printf("Failed to load OptiX OSL services kernel from '%s' (%s)",
930 ptx_filename.c_str(),
931 optixGetErrorName(services_result)));
932 return false;
933 }
934 OptixProgramGroupDesc group_desc = {};
935 group_desc.kind = OPTIX_PROGRAM_GROUP_KIND_CALLABLES;
936 group_desc.callables.entryFunctionNameDC = "__direct_callable__dummy_services";
937 group_desc.callables.moduleDC = osl_modules[id_osl_services];
938
939 optix_assert(optixProgramGroupCreate(context,
940 &group_desc,
941 1,
942 &group_options,
943 nullptr,
944 nullptr,
945 &osl_groups[id_osl_services]));
946 }
947
948 {
949 if (shadeops_result != OPTIX_SUCCESS) {
950 set_error(string_printf("Failed to load OptiX OSL shadeops kernel (%s)",
951 optixGetErrorName(shadeops_result)));
952 return false;
953 }
954 OptixProgramGroupDesc group_desc = {};
955 group_desc.kind = OPTIX_PROGRAM_GROUP_KIND_CALLABLES;
956 group_desc.callables.entryFunctionNameDC = "__direct_callable__dummy_shadeops";
957 group_desc.callables.moduleDC = osl_modules[id_osl_shadeops];
958
959 optix_assert(optixProgramGroupCreate(context,
960 &group_desc,
961 1,
962 &group_options,
963 nullptr,
964 nullptr,
965 &osl_groups[id_osl_shadeops]));
966 }
967 }
968
969 TaskPool pool;
970 vector<OptixResult> results(osl_kernels.size(), OPTIX_SUCCESS);
971
972 for (size_t i = 0; i < osl_kernels.size(); ++i) {
973 if (osl_kernels[i].ptx.empty()) {
974 continue;
975 }
976
977 create_optix_module(pool, module_options, osl_kernels[i].ptx, osl_modules[i], results[i]);
978 }
979
980 pool.wait_work();
981
982 for (size_t i = 0; i < osl_kernels.size(); ++i) {
983 if (osl_kernels[i].ptx.empty()) {
984 continue;
985 }
986
987 if (results[i] != OPTIX_SUCCESS) {
988 set_error(string_printf("Failed to load OptiX OSL kernel for %s (%s)",
989 osl_kernels[i].fused_entry.c_str(),
990 optixGetErrorName(results[i])));
991 return false;
992 }
993
994 OptixProgramGroupDesc group_desc = {};
995 group_desc.kind = OPTIX_PROGRAM_GROUP_KIND_CALLABLES;
996 group_desc.callables.entryFunctionNameDC = osl_kernels[i].fused_entry.c_str();
997 group_desc.callables.moduleDC = osl_modules[i];
998
999 optix_assert(optixProgramGroupCreate(
1000 context, &group_desc, 1, &group_options, nullptr, nullptr, &osl_groups[i]));
1001 }
1002
1003 /* Update SBT with new entries. */
1004 sbt_data.alloc(NUM_PROGRAM_GROUPS + osl_groups.size());
1005 for (int i = 0; i < NUM_PROGRAM_GROUPS; ++i) {
1006 optix_assert(optixSbtRecordPackHeader(groups[i], &sbt_data[i]));
1007 }
1008 for (size_t i = 0; i < osl_groups.size(); ++i) {
1009 if (osl_groups[i] != nullptr) {
1010 optix_assert(optixSbtRecordPackHeader(osl_groups[i], &sbt_data[NUM_PROGRAM_GROUPS + i]));
1011 }
1012 else {
1013 /* Default to "__direct_callable__dummy_services", so that OSL evaluation for empty
1014 * materials has direct callables to call and does not crash. */
1015 optix_assert(optixSbtRecordPackHeader(osl_groups[id_osl_services],
1016 &sbt_data[NUM_PROGRAM_GROUPS + i]));
1017 }
1018 }
1019 sbt_data.copy_to_device(); /* Upload updated SBT to device. */
1020
1021 OptixPipelineLinkOptions link_options = {};
1022 link_options.maxTraceDepth = 0;
1023
1024 {
1025 vector<OptixProgramGroup> pipeline_groups;
1026 pipeline_groups.reserve(NUM_PROGRAM_GROUPS);
1027 pipeline_groups.push_back(groups[PG_RGEN_SHADE_BACKGROUND]);
1028 pipeline_groups.push_back(groups[PG_RGEN_SHADE_LIGHT]);
1029 pipeline_groups.push_back(groups[PG_RGEN_SHADE_SURFACE]);
1030 pipeline_groups.push_back(groups[PG_RGEN_SHADE_SURFACE_RAYTRACE]);
1031 pipeline_groups.push_back(groups[PG_CALL_SVM_AO]);
1032 pipeline_groups.push_back(groups[PG_CALL_SVM_BEVEL]);
1033 pipeline_groups.push_back(groups[PG_RGEN_SHADE_SURFACE_MNEE]);
1034 pipeline_groups.push_back(groups[PG_RGEN_SHADE_VOLUME]);
1035 pipeline_groups.push_back(groups[PG_RGEN_SHADE_SHADOW]);
1036 pipeline_groups.push_back(groups[PG_RGEN_SHADE_DEDICATED_LIGHT]);
1037 pipeline_groups.push_back(groups[PG_RGEN_EVAL_DISPLACE]);
1038 pipeline_groups.push_back(groups[PG_RGEN_EVAL_BACKGROUND]);
1039 pipeline_groups.push_back(groups[PG_RGEN_EVAL_CURVE_SHADOW_TRANSPARENCY]);
1040 pipeline_groups.push_back(groups[PG_RGEN_INIT_FROM_CAMERA]);
1041 pipeline_groups.push_back(groups[PG_RGEN_EVAL_VOLUME_DENSITY]);
1042
1043 for (const OptixProgramGroup &group : osl_groups) {
1044 if (group != nullptr) {
1045 pipeline_groups.push_back(group);
1046 }
1047 }
1048
1049 optix_assert(optixPipelineCreate(context,
1050 &pipeline_options,
1051 &link_options,
1052 pipeline_groups.data(),
1053 pipeline_groups.size(),
1054 nullptr,
1055 nullptr,
1056 &pipelines[PIP_SHADE]));
1057
1058 /* Get program stack sizes. */
1059 OptixStackSizes stack_size[NUM_PROGRAM_GROUPS] = {};
1060 vector<OptixStackSizes> osl_stack_size(osl_groups.size());
1061
1062 for (int i = 0; i < NUM_PROGRAM_GROUPS; ++i) {
1063 optix_assert(optixProgramGroupGetStackSize(groups[i], &stack_size[i], nullptr));
1064 }
1065 for (size_t i = 0; i < osl_groups.size(); ++i) {
1066 if (osl_groups[i] != nullptr) {
1067 optix_assert(optixProgramGroupGetStackSize(
1068 osl_groups[i], &osl_stack_size[i], pipelines[PIP_SHADE]));
1069 }
1070 }
1071
1072 const unsigned int css = std::max(stack_size[PG_RGEN_SHADE_SURFACE_RAYTRACE].cssRG,
1073 stack_size[PG_RGEN_SHADE_SURFACE_MNEE].cssRG);
1074 unsigned int dss = std::max(stack_size[PG_CALL_SVM_AO].dssDC,
1075 stack_size[PG_CALL_SVM_BEVEL].dssDC);
1076 for (unsigned int i = 0; i < osl_stack_size.size(); ++i) {
1077 dss = std::max(dss, osl_stack_size[i].dssDC);
1078 }
1079
1080 optix_assert(optixPipelineSetStackSize(
1081 pipelines[PIP_SHADE], 0, dss, css, pipeline_options.usesMotionBlur ? 3 : 2));
1082 }
1083
1084 /* Copy colorsystem data from OSL to the device. */
1085 {
1086 /* The interface here is somewhat complex, since the colorsystem contains strings whose
1087 * representation is different between CPU and GPU.
1088 * OSL's ColorSystem type therefore consists of two parts: First the "fixed data" (e.g. floats)
1089 * that is identical between both, and then the strings.
1090 * To perform this conversion, in addition to the pointer to the CPU data, we query two sizes:
1091 * The total size of the CPU data and the number of strings. */
1092 uint8_t *cpu_data = nullptr;
1093 size_t cpu_data_sizes[2] = {0, 0};
1094 osl_globals.ss->getattribute("colorsystem", OSL::TypeDesc::PTR, &cpu_data);
1095 osl_globals.ss->getattribute(
1096 "colorsystem:sizes", TypeDesc(TypeDesc::LONGLONG, 2), (void *)cpu_data_sizes);
1097
1098 size_t cpu_full_size = cpu_data_sizes[0];
1099 size_t num_strings = cpu_data_sizes[1];
1100 size_t fixed_data_size = cpu_full_size - sizeof(ustringhash) * num_strings;
1101
1102 /* Allocate a buffer to fit the fixed data, as well as all the strings in GPU form. */
1103 uint8_t *gpu_data = osl_colorsystem.alloc(fixed_data_size + sizeof(size_t) * num_strings);
1104
1105 /* Copy the fixed data as-is. */
1106 memcpy(gpu_data, cpu_data, fixed_data_size);
1107
1108 /* Convert each string to GPU format. */
1109 ustringhash *cpu_strings = reinterpret_cast<ustringhash *>(cpu_data + fixed_data_size);
1110 size_t *gpu_strings = reinterpret_cast<size_t *>(gpu_data + fixed_data_size);
1111 for (int i = 0; i < num_strings; i++) {
1112 gpu_strings[i] = cpu_strings[i].hash();
1113 }
1114
1115 /* Copy GPU form of the data to the device. */
1116 osl_colorsystem.copy_to_device();
1117
1118 update_launch_params(offsetof(KernelParamsOptiX, osl_colorsystem),
1119 &osl_colorsystem.device_pointer,
1120 sizeof(device_ptr));
1121 }
1122
1123 return !have_error();
1124# else
1125 return false;
1126# endif
1127}
1128
1129OSLGlobals *OptiXDevice::get_cpu_osl_memory()
1130{
1131# ifdef WITH_OSL
1132 return &osl_globals;
1133# else
1134 return nullptr;
1135# endif
1136}
1137
1138bool OptiXDevice::build_optix_bvh(BVHOptiX *bvh,
1139 OptixBuildOperation operation,
1140 const OptixBuildInput &build_input,
1141 const uint16_t num_motion_steps)
1142{
1143 /* Allocate and build acceleration structures only one at a time, to prevent parallel builds
1144 * from running out of memory (since both original and compacted acceleration structure memory
1145 * may be allocated at the same time for the duration of this function). The builds would
1146 * otherwise happen on the same CUDA stream anyway. */
1147 static thread_mutex mutex;
1149
1150 const CUDAContextScope scope(this);
1151
1152 bool use_fast_trace_bvh = (bvh->params.bvh_type == BVH_TYPE_STATIC);
1153
1154 /* Compute memory usage. */
1155 OptixAccelBufferSizes sizes = {};
1156 OptixAccelBuildOptions options = {};
1157 options.operation = operation;
1158 if (build_input.type == OPTIX_BUILD_INPUT_TYPE_CURVES) {
1159 /* The build flags have to match the ones used to query the built-in curve intersection
1160 * program (see optixBuiltinISModuleGet above) */
1161 options.buildFlags = OPTIX_BUILD_FLAG_PREFER_FAST_TRACE | OPTIX_BUILD_FLAG_ALLOW_COMPACTION |
1162 OPTIX_BUILD_FLAG_ALLOW_UPDATE;
1163 use_fast_trace_bvh = true;
1164 }
1165 else if (use_fast_trace_bvh) {
1166 LOG_INFO << "Using fast to trace OptiX BVH";
1167 options.buildFlags = OPTIX_BUILD_FLAG_PREFER_FAST_TRACE | OPTIX_BUILD_FLAG_ALLOW_COMPACTION;
1168 }
1169 else {
1170 LOG_INFO << "Using fast to update OptiX BVH";
1171 options.buildFlags = OPTIX_BUILD_FLAG_PREFER_FAST_BUILD | OPTIX_BUILD_FLAG_ALLOW_UPDATE;
1172 }
1173
1174 options.motionOptions.numKeys = num_motion_steps;
1175 options.motionOptions.flags = OPTIX_MOTION_FLAG_START_VANISH | OPTIX_MOTION_FLAG_END_VANISH;
1176 options.motionOptions.timeBegin = 0.0f;
1177 options.motionOptions.timeEnd = 1.0f;
1178
1179 optix_assert(optixAccelComputeMemoryUsage(context, &options, &build_input, 1, &sizes));
1180
1181 /* Allocate required output buffers. */
1182 device_only_memory<char> temp_mem(this, "optix temp as build mem", true);
1183 temp_mem.alloc_to_device(align_up(sizes.tempSizeInBytes, 8) + 8);
1184 if (!temp_mem.device_pointer) {
1185 /* Make sure temporary memory allocation succeeded. */
1186 return false;
1187 }
1188
1189 /* Acceleration structure memory has to be allocated on the device (not allowed on the host). */
1190 device_only_memory<char> &out_data = *bvh->as_data;
1191 if (operation == OPTIX_BUILD_OPERATION_BUILD) {
1192 assert(out_data.device == this);
1193 out_data.alloc_to_device(sizes.outputSizeInBytes);
1194 if (!out_data.device_pointer) {
1195 return false;
1196 }
1197 }
1198 else {
1199 assert(out_data.device_pointer && out_data.device_size >= sizes.outputSizeInBytes);
1200 }
1201
1202 /* Finally build the acceleration structure. */
1203 OptixAccelEmitDesc compacted_size_prop = {};
1204 compacted_size_prop.type = OPTIX_PROPERTY_TYPE_COMPACTED_SIZE;
1205 /* A tiny space was allocated for this property at the end of the temporary buffer above.
1206 * Make sure this pointer is 8-byte aligned. */
1207 compacted_size_prop.result = align_up(temp_mem.device_pointer + sizes.tempSizeInBytes, 8);
1208
1209 OptixTraversableHandle out_handle = 0;
1210 optix_assert(optixAccelBuild(context,
1211 nullptr,
1212 &options,
1213 &build_input,
1214 1,
1215 temp_mem.device_pointer,
1216 sizes.tempSizeInBytes,
1217 out_data.device_pointer,
1218 sizes.outputSizeInBytes,
1219 &out_handle,
1220 use_fast_trace_bvh ? &compacted_size_prop : nullptr,
1221 use_fast_trace_bvh ? 1 : 0));
1222 bvh->traversable_handle = static_cast<uint64_t>(out_handle);
1223
1224 /* Wait for all operations to finish. */
1225 cuda_assert(cuStreamSynchronize(nullptr));
1226
1227 /* Compact acceleration structure to save memory (do not do this in viewport for faster builds).
1228 */
1229 if (use_fast_trace_bvh) {
1230 uint64_t compacted_size = sizes.outputSizeInBytes;
1231 cuda_assert(cuMemcpyDtoH(&compacted_size, compacted_size_prop.result, sizeof(compacted_size)));
1232
1233 /* Temporary memory is no longer needed, so free it now to make space. */
1234 temp_mem.free();
1235
1236 /* There is no point compacting if the size does not change. */
1237 if (compacted_size < sizes.outputSizeInBytes) {
1238 device_only_memory<char> compacted_data(this, "optix compacted as", false);
1239 compacted_data.alloc_to_device(compacted_size);
1240 if (!compacted_data.device_pointer) {
1241 /* Do not compact if memory allocation for compacted acceleration structure fails.
1242 * Can just use the uncompacted one then, so succeed here regardless. */
1243 return !have_error();
1244 }
1245
1246 optix_assert(optixAccelCompact(context,
1247 nullptr,
1248 out_handle,
1249 compacted_data.device_pointer,
1250 compacted_size,
1251 &out_handle));
1252 bvh->traversable_handle = static_cast<uint64_t>(out_handle);
1253
1254 /* Wait for compaction to finish. */
1255 cuda_assert(cuStreamSynchronize(nullptr));
1256
1257 std::swap(out_data.device_size, compacted_data.device_size);
1258 std::swap(out_data.device_pointer, compacted_data.device_pointer);
1259 /* Original acceleration structure memory is freed when 'compacted_data' goes out of scope.
1260 */
1261 }
1262 }
1263
1264 return !have_error();
1265}
1266
1267void OptiXDevice::build_bvh(BVH *bvh, Progress &progress, bool refit)
1268{
1269 const bool use_fast_trace_bvh = (bvh->params.bvh_type == BVH_TYPE_STATIC);
1270
1271 free_bvh_memory_delayed();
1272
1273 BVHOptiX *const bvh_optix = static_cast<BVHOptiX *>(bvh);
1274
1275 progress.set_substatus("Building OptiX acceleration structure");
1276
1277 if (!bvh->params.top_level) {
1278 assert(bvh->objects.size() == 1 && bvh->geometry.size() == 1);
1279
1280 /* Refit is only possible in viewport for now (because AS is built with
1281 * OPTIX_BUILD_FLAG_ALLOW_UPDATE only there, see above). */
1282 OptixBuildOperation operation = OPTIX_BUILD_OPERATION_BUILD;
1283 if (refit && !use_fast_trace_bvh) {
1284 assert(bvh_optix->traversable_handle != 0);
1285 operation = OPTIX_BUILD_OPERATION_UPDATE;
1286 }
1287 else {
1288 bvh_optix->as_data->free();
1289 bvh_optix->traversable_handle = 0;
1290 }
1291
1292 /* Build bottom level acceleration structures (BLAS). */
1293 Geometry *const geom = bvh->geometry[0];
1294 if (geom->is_hair()) {
1295 /* Build BLAS for curve primitives. */
1296 Hair *const hair = static_cast<Hair *const>(geom);
1297 if (hair->num_segments() == 0) {
1298 return;
1299 }
1300
1301 const size_t num_segments = hair->num_segments();
1302
1303 size_t num_motion_steps = 1;
1305 if (pipeline_options.usesMotionBlur && hair->get_use_motion_blur() && motion_keys) {
1306 num_motion_steps = hair->get_motion_steps();
1307 }
1308
1309 device_vector<OptixAabb> aabb_data(this, "optix temp aabb data", MEM_READ_ONLY);
1310 device_vector<int> index_data(this, "optix temp index data", MEM_READ_ONLY);
1311 device_vector<float4> vertex_data(this, "optix temp vertex data", MEM_READ_ONLY);
1312 /* Four control points for each curve segment. */
1313 size_t num_vertices = num_segments * 4;
1314 if (hair->curve_shape == CURVE_THICK_LINEAR) {
1315 num_vertices = hair->num_keys();
1316 index_data.alloc(num_segments);
1317 vertex_data.alloc(num_vertices * num_motion_steps);
1318 }
1319 else if (hair->curve_shape == CURVE_THICK) {
1320 num_vertices = hair->num_keys() + 2 * hair->num_curves();
1321 index_data.alloc(num_segments);
1322 vertex_data.alloc(num_vertices * num_motion_steps);
1323 }
1324 else {
1325 aabb_data.alloc(num_segments * num_motion_steps);
1326 }
1327
1328 /* Get AABBs for each motion step. */
1329 for (size_t step = 0; step < num_motion_steps; ++step) {
1330 /* The center step for motion vertices is not stored in the attribute. */
1331 const float3 *keys = hair->get_curve_keys().data();
1332 size_t center_step = (num_motion_steps - 1) / 2;
1333 if (step != center_step) {
1334 size_t attr_offset = (step > center_step) ? step - 1 : step;
1335 /* Technically this is a float4 array, but sizeof(float3) == sizeof(float4). */
1336 keys = motion_keys->data_float3() + attr_offset * hair->get_curve_keys().size();
1337 }
1338
1339 if (hair->curve_shape == CURVE_THICK || hair->curve_shape == CURVE_THICK_LINEAR) {
1340 for (size_t curve_index = 0, segment_index = 0, vertex_index = step * num_vertices;
1341 curve_index < hair->num_curves();
1342 ++curve_index)
1343 {
1344 const Hair::Curve curve = hair->get_curve(curve_index);
1345 const array<float> &curve_radius = hair->get_curve_radius();
1346
1347 if (hair->curve_shape == CURVE_THICK_LINEAR) {
1348 const int first_key_index = curve.first_key;
1349
1350 for (int k = 0; k < curve.num_segments(); ++k) {
1351 if (step == 0) {
1352 index_data[segment_index++] = vertex_index;
1353 }
1354 vertex_data[vertex_index++] = make_float4(keys[first_key_index + k].x,
1355 keys[first_key_index + k].y,
1356 keys[first_key_index + k].z,
1357 curve_radius[first_key_index + k]);
1358 }
1359
1360 const int last_key_index = first_key_index + curve.num_keys - 1;
1361 {
1362 vertex_data[vertex_index++] = make_float4(keys[last_key_index].x,
1363 keys[last_key_index].y,
1364 keys[last_key_index].z,
1365 curve_radius[last_key_index]);
1366 }
1367 }
1368 else {
1369 const int first_key_index = curve.first_key;
1370 {
1371 vertex_data[vertex_index++] = make_float4(keys[first_key_index].x,
1372 keys[first_key_index].y,
1373 keys[first_key_index].z,
1374 curve_radius[first_key_index]);
1375 }
1376
1377 for (int k = 0; k < curve.num_segments(); ++k) {
1378 if (step == 0) {
1379 index_data[segment_index++] = vertex_index - 1;
1380 }
1381 vertex_data[vertex_index++] = make_float4(keys[first_key_index + k].x,
1382 keys[first_key_index + k].y,
1383 keys[first_key_index + k].z,
1384 curve_radius[first_key_index + k]);
1385 }
1386
1387 const int last_key_index = first_key_index + curve.num_keys - 1;
1388 {
1389 vertex_data[vertex_index++] = make_float4(keys[last_key_index].x,
1390 keys[last_key_index].y,
1391 keys[last_key_index].z,
1392 curve_radius[last_key_index]);
1393 vertex_data[vertex_index++] = make_float4(keys[last_key_index].x,
1394 keys[last_key_index].y,
1395 keys[last_key_index].z,
1396 curve_radius[last_key_index]);
1397 }
1398 }
1399 }
1400 }
1401 else {
1402 for (size_t curve_index = 0, i = 0; curve_index < hair->num_curves(); ++curve_index) {
1403 const Hair::Curve curve = hair->get_curve(curve_index);
1404
1405 for (int segment = 0; segment < curve.num_segments(); ++segment, ++i) {
1407 curve.bounds_grow(segment, keys, hair->get_curve_radius().data(), bounds);
1408
1409 const size_t index = step * num_segments + i;
1410 aabb_data[index].minX = bounds.min.x;
1411 aabb_data[index].minY = bounds.min.y;
1412 aabb_data[index].minZ = bounds.min.z;
1413 aabb_data[index].maxX = bounds.max.x;
1414 aabb_data[index].maxY = bounds.max.y;
1415 aabb_data[index].maxZ = bounds.max.z;
1416 }
1417 }
1418 }
1419 }
1420
1421 /* Upload AABB data to GPU. */
1422 aabb_data.copy_to_device();
1423 index_data.copy_to_device();
1424 vertex_data.copy_to_device();
1425
1426 vector<device_ptr> aabb_ptrs;
1427 aabb_ptrs.reserve(num_motion_steps);
1428 vector<device_ptr> width_ptrs;
1429 vector<device_ptr> vertex_ptrs;
1430 width_ptrs.reserve(num_motion_steps);
1431 vertex_ptrs.reserve(num_motion_steps);
1432 for (size_t step = 0; step < num_motion_steps; ++step) {
1433 aabb_ptrs.push_back(aabb_data.device_pointer + step * num_segments * sizeof(OptixAabb));
1434 const device_ptr base_ptr = vertex_data.device_pointer +
1435 step * num_vertices * sizeof(float4);
1436 width_ptrs.push_back(base_ptr + 3 * sizeof(float)); /* Offset by vertex size. */
1437 vertex_ptrs.push_back(base_ptr);
1438 }
1439
1440 /* Force a single any-hit call, so shadow record-all behavior works correctly. */
1441 unsigned int build_flags = OPTIX_GEOMETRY_FLAG_REQUIRE_SINGLE_ANYHIT_CALL;
1442 OptixBuildInput build_input = {};
1443 if (hair->curve_shape != CURVE_RIBBON) {
1444 build_input.type = OPTIX_BUILD_INPUT_TYPE_CURVES;
1445 if (hair->curve_shape == CURVE_THICK_LINEAR) {
1446 build_input.curveArray.curveType = OPTIX_PRIMITIVE_TYPE_ROUND_LINEAR;
1447 }
1448 else {
1449 build_input.curveArray.curveType = OPTIX_PRIMITIVE_TYPE_ROUND_CATMULLROM;
1450 }
1451 build_input.curveArray.numPrimitives = num_segments;
1452 build_input.curveArray.vertexBuffers = (CUdeviceptr *)vertex_ptrs.data();
1453 build_input.curveArray.numVertices = num_vertices;
1454 build_input.curveArray.vertexStrideInBytes = sizeof(float4);
1455 build_input.curveArray.widthBuffers = (CUdeviceptr *)width_ptrs.data();
1456 build_input.curveArray.widthStrideInBytes = sizeof(float4);
1457 build_input.curveArray.indexBuffer = (CUdeviceptr)index_data.device_pointer;
1458 build_input.curveArray.indexStrideInBytes = sizeof(int);
1459 build_input.curveArray.flag = build_flags;
1460 build_input.curveArray.primitiveIndexOffset = hair->curve_segment_offset;
1461 }
1462 else {
1463 /* Disable visibility test any-hit program, since it is already checked during
1464 * intersection. Those trace calls that require any-hit can force it with a ray flag. */
1465 build_flags |= OPTIX_GEOMETRY_FLAG_DISABLE_ANYHIT;
1466
1467 build_input.type = OPTIX_BUILD_INPUT_TYPE_CUSTOM_PRIMITIVES;
1468 build_input.customPrimitiveArray.aabbBuffers = (CUdeviceptr *)aabb_ptrs.data();
1469 build_input.customPrimitiveArray.numPrimitives = num_segments;
1470 build_input.customPrimitiveArray.strideInBytes = sizeof(OptixAabb);
1471 build_input.customPrimitiveArray.flags = &build_flags;
1472 build_input.customPrimitiveArray.numSbtRecords = 1;
1473 build_input.customPrimitiveArray.primitiveIndexOffset = hair->curve_segment_offset;
1474 }
1475
1476 if (!build_optix_bvh(bvh_optix, operation, build_input, num_motion_steps)) {
1477 progress.set_error("Failed to build OptiX acceleration structure");
1478 }
1479 }
1480 else if (geom->is_mesh() || geom->is_volume()) {
1481 /* Build BLAS for triangle primitives. */
1482 Mesh *const mesh = static_cast<Mesh *const>(geom);
1483 if (mesh->num_triangles() == 0) {
1484 return;
1485 }
1486
1487 const size_t num_verts = mesh->get_verts().size();
1488
1489 size_t num_motion_steps = 1;
1491 if (pipeline_options.usesMotionBlur && mesh->get_use_motion_blur() && motion_keys) {
1492 num_motion_steps = mesh->get_motion_steps();
1493 }
1494
1495 device_vector<int> index_data(this, "optix temp index data", MEM_READ_ONLY);
1496 index_data.alloc(mesh->get_triangles().size());
1497 memcpy(index_data.data(),
1498 mesh->get_triangles().data(),
1499 mesh->get_triangles().size() * sizeof(int));
1500 device_vector<float4> vertex_data(this, "optix temp vertex data", MEM_READ_ONLY);
1501 vertex_data.alloc(num_verts * num_motion_steps);
1502
1503 for (size_t step = 0; step < num_motion_steps; ++step) {
1504 const float3 *verts = mesh->get_verts().data();
1505
1506 size_t center_step = (num_motion_steps - 1) / 2;
1507 /* The center step for motion vertices is not stored in the attribute. */
1508 if (step != center_step) {
1509 verts = motion_keys->data_float3() + (step > center_step ? step - 1 : step) * num_verts;
1510 }
1511
1512 memcpy(vertex_data.data() + num_verts * step, verts, num_verts * sizeof(float3));
1513 }
1514
1515 /* Upload triangle data to GPU. */
1516 index_data.copy_to_device();
1517 vertex_data.copy_to_device();
1518
1519 vector<device_ptr> vertex_ptrs;
1520 vertex_ptrs.reserve(num_motion_steps);
1521 for (size_t step = 0; step < num_motion_steps; ++step) {
1522 vertex_ptrs.push_back(vertex_data.device_pointer + num_verts * step * sizeof(float3));
1523 }
1524
1525 /* Force a single any-hit call, so shadow record-all behavior works correctly. */
1526 unsigned int build_flags = OPTIX_GEOMETRY_FLAG_REQUIRE_SINGLE_ANYHIT_CALL;
1527 OptixBuildInput build_input = {};
1528 build_input.type = OPTIX_BUILD_INPUT_TYPE_TRIANGLES;
1529 build_input.triangleArray.vertexBuffers = (CUdeviceptr *)vertex_ptrs.data();
1530 build_input.triangleArray.numVertices = num_verts;
1531 build_input.triangleArray.vertexFormat = OPTIX_VERTEX_FORMAT_FLOAT3;
1532 build_input.triangleArray.vertexStrideInBytes = sizeof(float4);
1533 build_input.triangleArray.indexBuffer = index_data.device_pointer;
1534 build_input.triangleArray.numIndexTriplets = mesh->num_triangles();
1535 build_input.triangleArray.indexFormat = OPTIX_INDICES_FORMAT_UNSIGNED_INT3;
1536 build_input.triangleArray.indexStrideInBytes = 3 * sizeof(int);
1537 build_input.triangleArray.flags = &build_flags;
1538 /* The SBT does not store per primitive data since Cycles already allocates separate
1539 * buffers for that purpose. OptiX does not allow this to be zero though, so just pass in
1540 * one and rely on that having the same meaning in this case. */
1541 build_input.triangleArray.numSbtRecords = 1;
1542 build_input.triangleArray.primitiveIndexOffset = mesh->prim_offset;
1543
1544 if (!build_optix_bvh(bvh_optix, operation, build_input, num_motion_steps)) {
1545 progress.set_error("Failed to build OptiX acceleration structure");
1546 }
1547 }
1548 else if (geom->is_pointcloud()) {
1549 /* Build BLAS for points primitives. */
1550 PointCloud *const pointcloud = static_cast<PointCloud *const>(geom);
1551 const size_t num_points = pointcloud->num_points();
1552 if (num_points == 0) {
1553 return;
1554 }
1555
1556 size_t num_motion_steps = 1;
1557 Attribute *motion_points = pointcloud->attributes.find(ATTR_STD_MOTION_VERTEX_POSITION);
1558 if (pipeline_options.usesMotionBlur && pointcloud->get_use_motion_blur() && motion_points) {
1559 num_motion_steps = pointcloud->get_motion_steps();
1560 }
1561
1562 device_vector<OptixAabb> aabb_data(this, "optix temp aabb data", MEM_READ_ONLY);
1563 aabb_data.alloc(num_points * num_motion_steps);
1564
1565 /* Get AABBs for each motion step. */
1566 for (size_t step = 0; step < num_motion_steps; ++step) {
1567 /* The center step for motion vertices is not stored in the attribute. */
1568 size_t center_step = (num_motion_steps - 1) / 2;
1569
1570 if (step == center_step) {
1571 const float3 *points = pointcloud->get_points().data();
1572 const float *radius = pointcloud->get_radius().data();
1573
1574 for (size_t i = 0; i < num_points; ++i) {
1575 const PointCloud::Point point = pointcloud->get_point(i);
1577 point.bounds_grow(points, radius, bounds);
1578
1579 const size_t index = step * num_points + i;
1580 aabb_data[index].minX = bounds.min.x;
1581 aabb_data[index].minY = bounds.min.y;
1582 aabb_data[index].minZ = bounds.min.z;
1583 aabb_data[index].maxX = bounds.max.x;
1584 aabb_data[index].maxY = bounds.max.y;
1585 aabb_data[index].maxZ = bounds.max.z;
1586 }
1587 }
1588 else {
1589 size_t attr_offset = (step > center_step) ? step - 1 : step;
1590 const float4 *points = motion_points->data_float4() + attr_offset * num_points;
1591
1592 for (size_t i = 0; i < num_points; ++i) {
1593 const PointCloud::Point point = pointcloud->get_point(i);
1595 point.bounds_grow(points[i], bounds);
1596
1597 const size_t index = step * num_points + i;
1598 aabb_data[index].minX = bounds.min.x;
1599 aabb_data[index].minY = bounds.min.y;
1600 aabb_data[index].minZ = bounds.min.z;
1601 aabb_data[index].maxX = bounds.max.x;
1602 aabb_data[index].maxY = bounds.max.y;
1603 aabb_data[index].maxZ = bounds.max.z;
1604 }
1605 }
1606 }
1607
1608 /* Upload AABB data to GPU. */
1609 aabb_data.copy_to_device();
1610
1611 vector<device_ptr> aabb_ptrs;
1612 aabb_ptrs.reserve(num_motion_steps);
1613 for (size_t step = 0; step < num_motion_steps; ++step) {
1614 aabb_ptrs.push_back(aabb_data.device_pointer + step * num_points * sizeof(OptixAabb));
1615 }
1616
1617 /* Disable visibility test any-hit program, since it is already checked during
1618 * intersection. Those trace calls that require anyhit can force it with a ray flag.
1619 * For those, force a single any-hit call, so shadow record-all behavior works correctly. */
1620 unsigned int build_flags = OPTIX_GEOMETRY_FLAG_DISABLE_ANYHIT |
1621 OPTIX_GEOMETRY_FLAG_REQUIRE_SINGLE_ANYHIT_CALL;
1622 OptixBuildInput build_input = {};
1623 build_input.type = OPTIX_BUILD_INPUT_TYPE_CUSTOM_PRIMITIVES;
1624 build_input.customPrimitiveArray.aabbBuffers = (CUdeviceptr *)aabb_ptrs.data();
1625 build_input.customPrimitiveArray.numPrimitives = num_points;
1626 build_input.customPrimitiveArray.strideInBytes = sizeof(OptixAabb);
1627 build_input.customPrimitiveArray.flags = &build_flags;
1628 build_input.customPrimitiveArray.numSbtRecords = 1;
1629 build_input.customPrimitiveArray.primitiveIndexOffset = pointcloud->prim_offset;
1630
1631 if (!build_optix_bvh(bvh_optix, operation, build_input, num_motion_steps)) {
1632 progress.set_error("Failed to build OptiX acceleration structure");
1633 }
1634 }
1635 }
1636 else {
1637 unsigned int num_instances = 0;
1638 unsigned int max_num_instances = 0xFFFFFFFF;
1639
1640 bvh_optix->as_data->free();
1641 bvh_optix->traversable_handle = 0;
1642 bvh_optix->motion_transform_data->free();
1643
1644 optixDeviceContextGetProperty(context,
1645 OPTIX_DEVICE_PROPERTY_LIMIT_MAX_INSTANCE_ID,
1646 &max_num_instances,
1647 sizeof(max_num_instances));
1648 /* Do not count first bit, which is used to distinguish instanced and non-instanced objects. */
1649 max_num_instances >>= 1;
1650 if (bvh->objects.size() > max_num_instances) {
1651 progress.set_error(
1652 "Failed to build OptiX acceleration structure because there are too many instances");
1653 return;
1654 }
1655
1656 /* Fill instance descriptions. */
1657 device_vector<OptixInstance> instances(this, "optix tlas instances", MEM_READ_ONLY);
1658 instances.alloc(bvh->objects.size());
1659
1660 /* Calculate total motion transform size and allocate memory for them. */
1661 size_t motion_transform_offset = 0;
1662 if (pipeline_options.usesMotionBlur) {
1663 size_t total_motion_transform_size = 0;
1664 for (Object *const ob : bvh->objects) {
1665 if (ob->is_traceable() && ob->use_motion()) {
1666 total_motion_transform_size = align_up(total_motion_transform_size,
1667 OPTIX_TRANSFORM_BYTE_ALIGNMENT);
1668 const size_t motion_keys = max(ob->get_motion().size(), (size_t)2) - 2;
1669 total_motion_transform_size = total_motion_transform_size +
1670 sizeof(OptixSRTMotionTransform) +
1671 motion_keys * sizeof(OptixSRTData);
1672 }
1673 }
1674
1675 assert(bvh_optix->motion_transform_data->device == this);
1676 bvh_optix->motion_transform_data->alloc_to_device(total_motion_transform_size);
1677 }
1678
1679 for (Object *ob : bvh->objects) {
1680 /* Skip non-traceable objects. */
1681 if (!ob->is_traceable()) {
1682 continue;
1683 }
1684
1685 BVHOptiX *const blas = static_cast<BVHOptiX *>(ob->get_geometry()->bvh.get());
1686 OptixTraversableHandle handle = blas->traversable_handle;
1687 if (handle == 0) {
1688 continue;
1689 }
1690
1691 OptixInstance &instance = instances[num_instances++];
1692 memset(&instance, 0, sizeof(instance));
1693
1694 /* Clear transform to identity matrix. */
1695 instance.transform[0] = 1.0f;
1696 instance.transform[5] = 1.0f;
1697 instance.transform[10] = 1.0f;
1698
1699 /* Set user instance ID to object index. */
1700 instance.instanceId = ob->get_device_index();
1701
1702 /* Add some of the object visibility bits to the mask.
1703 * __prim_visibility contains the combined visibility bits of all instances, so is not
1704 * reliable if they differ between instances. But the OptiX visibility mask can only contain
1705 * 8 bits, so have to trade-off here and select just a few important ones.
1706 */
1707 instance.visibilityMask = ob->visibility_for_tracing() & 0xFF;
1708
1709 /* Have to have at least one bit in the mask, or else instance would always be culled. */
1710 if (0 == instance.visibilityMask) {
1711 instance.visibilityMask = 0xFF;
1712 }
1713
1714 if (ob->get_geometry()->is_hair()) {
1715 const Hair *hair = static_cast<const Hair *>(ob->get_geometry());
1716 if (hair->curve_shape == CURVE_RIBBON) {
1717 instance.sbtOffset = PG_HITD_CURVE_RIBBON - PG_HITD;
1718
1719 /* Also skip curve ribbons in local trace calls. */
1720 instance.visibilityMask |= 4;
1721 }
1722 else if (hair->curve_shape == CURVE_THICK_LINEAR) {
1723 instance.sbtOffset = PG_HITD_CURVE_LINEAR - PG_HITD;
1724 if (pipeline_options.usesMotionBlur && hair->has_motion_blur()) {
1725 instance.sbtOffset = PG_HITD_CURVE_LINEAR_MOTION - PG_HITD;
1726 }
1727 }
1728 else {
1729 if (pipeline_options.usesMotionBlur && hair->has_motion_blur()) {
1730 /* Select between motion blur and non-motion blur built-in intersection module. */
1731 instance.sbtOffset = PG_HITD_MOTION - PG_HITD;
1732 }
1733 }
1734 }
1735 else if (ob->get_geometry()->is_pointcloud()) {
1736 /* Use the hit group that has an intersection program for point clouds. */
1737 instance.sbtOffset = PG_HITD_POINTCLOUD - PG_HITD;
1738
1739 /* Also skip point clouds in local trace calls. */
1740 instance.visibilityMask |= 4;
1741 }
1742 {
1743 /* Can disable __anyhit__kernel_optix_visibility_test by default (except for thick curves,
1744 * since it needs to filter out end-caps there).
1745 *
1746 * It is enabled where necessary (visibility mask exceeds 8 bits or the other any-hit
1747 * programs like __anyhit__kernel_optix_shadow_all_hit) via OPTIX_RAY_FLAG_ENFORCE_ANYHIT.
1748 */
1749 instance.flags = OPTIX_INSTANCE_FLAG_DISABLE_ANYHIT;
1750 }
1751
1752 /* Insert motion traversable if object has motion. */
1753 if (pipeline_options.usesMotionBlur && ob->use_motion()) {
1754 size_t motion_keys = max(ob->get_motion().size(), (size_t)2) - 2;
1755 size_t motion_transform_size = sizeof(OptixSRTMotionTransform) +
1756 motion_keys * sizeof(OptixSRTData);
1757
1758 const CUDAContextScope scope(this);
1759
1760 motion_transform_offset = align_up(motion_transform_offset,
1761 OPTIX_TRANSFORM_BYTE_ALIGNMENT);
1762 CUdeviceptr motion_transform_gpu = bvh_optix->motion_transform_data->device_pointer +
1763 motion_transform_offset;
1764 motion_transform_offset += motion_transform_size;
1765
1766 /* Allocate host side memory for motion transform and fill it with transform data. */
1767 array<uint8_t> motion_transform_storage(motion_transform_size);
1768 OptixSRTMotionTransform *motion_transform = reinterpret_cast<OptixSRTMotionTransform *>(
1769 motion_transform_storage.data());
1770 motion_transform->child = handle;
1771 motion_transform->motionOptions.numKeys = ob->get_motion().size();
1772 motion_transform->motionOptions.flags = OPTIX_MOTION_FLAG_NONE;
1773 motion_transform->motionOptions.timeBegin = 0.0f;
1774 motion_transform->motionOptions.timeEnd = 1.0f;
1775
1776 OptixSRTData *const srt_data = motion_transform->srtData;
1777 array<DecomposedTransform> decomp(ob->get_motion().size());
1779 decomp.data(), ob->get_motion().data(), ob->get_motion().size());
1780
1781 for (size_t i = 0; i < ob->get_motion().size(); ++i) {
1782 /* Scale. */
1783 srt_data[i].sx = decomp[i].y.w; /* scale.x.x */
1784 srt_data[i].sy = decomp[i].z.w; /* scale.y.y */
1785 srt_data[i].sz = decomp[i].w.w; /* scale.z.z */
1786
1787 /* Shear. */
1788 srt_data[i].a = decomp[i].z.x; /* scale.x.y */
1789 srt_data[i].b = decomp[i].z.y; /* scale.x.z */
1790 srt_data[i].c = decomp[i].w.x; /* scale.y.z */
1791 assert(decomp[i].z.z == 0.0f); /* scale.y.x */
1792 assert(decomp[i].w.y == 0.0f); /* scale.z.x */
1793 assert(decomp[i].w.z == 0.0f); /* scale.z.y */
1794
1795 /* Pivot point. */
1796 srt_data[i].pvx = 0.0f;
1797 srt_data[i].pvy = 0.0f;
1798 srt_data[i].pvz = 0.0f;
1799
1800 /* Rotation. */
1801 srt_data[i].qx = decomp[i].x.x;
1802 srt_data[i].qy = decomp[i].x.y;
1803 srt_data[i].qz = decomp[i].x.z;
1804 srt_data[i].qw = decomp[i].x.w;
1805
1806 /* Translation. */
1807 srt_data[i].tx = decomp[i].y.x;
1808 srt_data[i].ty = decomp[i].y.y;
1809 srt_data[i].tz = decomp[i].y.z;
1810 }
1811
1812 /* Upload motion transform to GPU. */
1813 cuMemcpyHtoD(motion_transform_gpu, motion_transform, motion_transform_size);
1814 motion_transform = nullptr;
1815 motion_transform_storage.clear();
1816
1817 /* Get traversable handle to motion transform. */
1818 optixConvertPointerToTraversableHandle(context,
1819 motion_transform_gpu,
1820 OPTIX_TRAVERSABLE_TYPE_SRT_MOTION_TRANSFORM,
1821 &instance.traversableHandle);
1822 }
1823 else {
1824 instance.traversableHandle = handle;
1825
1826 if (ob->get_geometry()->is_instanced()) {
1827 /* Set transform matrix. */
1828 memcpy(instance.transform, &ob->get_tfm(), sizeof(instance.transform));
1829 }
1830 }
1831 }
1832
1833 /* Upload instance descriptions. */
1834 instances.resize(num_instances);
1835 instances.copy_to_device();
1836
1837 /* Build top-level acceleration structure (TLAS) */
1838 OptixBuildInput build_input = {};
1839 build_input.type = OPTIX_BUILD_INPUT_TYPE_INSTANCES;
1840 build_input.instanceArray.instances = instances.device_pointer;
1841 build_input.instanceArray.numInstances = num_instances;
1842
1843 if (!build_optix_bvh(bvh_optix, OPTIX_BUILD_OPERATION_BUILD, build_input, 0)) {
1844 progress.set_error("Failed to build OptiX acceleration structure");
1845 }
1846 tlas_handle = bvh_optix->traversable_handle;
1847 }
1848}
1849
1850void OptiXDevice::release_bvh(BVH *bvh)
1851{
1852 thread_scoped_lock lock(delayed_free_bvh_mutex);
1853 /* Do delayed free of BVH memory, since geometry holding BVH might be deleted
1854 * while GPU is still rendering. */
1855 BVHOptiX *const bvh_optix = static_cast<BVHOptiX *>(bvh);
1856
1857 delayed_free_bvh_memory.emplace_back(std::move(bvh_optix->as_data));
1858 delayed_free_bvh_memory.emplace_back(std::move(bvh_optix->motion_transform_data));
1859 bvh_optix->traversable_handle = 0;
1860}
1861
1862void OptiXDevice::free_bvh_memory_delayed()
1863{
1864 thread_scoped_lock lock(delayed_free_bvh_mutex);
1865 delayed_free_bvh_memory.free_memory();
1866}
1867
1868void OptiXDevice::const_copy_to(const char *name, void *host, const size_t size)
1869{
1870 /* Set constant memory for CUDA module. */
1871 CUDADevice::const_copy_to(name, host, size);
1872
1873 if (strcmp(name, "data") == 0) {
1874 assert(size <= sizeof(KernelData));
1875
1876 /* Update traversable handle (since it is different for each device on multi devices). */
1877 KernelData *const data = (KernelData *)host;
1878 *(OptixTraversableHandle *)&data->device_bvh = tlas_handle;
1879
1880 update_launch_params(offsetof(KernelParamsOptiX, data), host, size);
1881 return;
1882 }
1883
1884 /* Update data storage pointers in launch parameters. */
1885# define KERNEL_DATA_ARRAY(data_type, data_name) \
1886 if (strcmp(name, #data_name) == 0) { \
1887 update_launch_params(offsetof(KernelParamsOptiX, data_name), host, size); \
1888 return; \
1889 }
1890 KERNEL_DATA_ARRAY(IntegratorStateGPU, integrator_state)
1891# include "kernel/data_arrays.h"
1892# undef KERNEL_DATA_ARRAY
1893}
1894
1895void OptiXDevice::update_launch_params(const size_t offset, void *data, const size_t data_size)
1896{
1897 const CUDAContextScope scope(this);
1898
1899 cuda_assert(cuMemcpyHtoD(launch_params.device_pointer + offset, data, data_size));
1900}
1901
1903
1904#endif /* WITH_OPTIX */
unsigned int uint
volatile int lock
BMesh const char void * data
unsigned long long int uint64_t
static DBVT_INLINE btScalar size(const btDbvtVolume &a)
Definition btDbvt.cpp:52
static btDbvtVolume bounds(btDbvtNode **leaves, int count)
Definition btDbvt.cpp:299
void refit(btStridingMeshInterface *triangles, const btVector3 &aabbMin, const btVector3 &aabbMax)
SIMD_FORCE_INLINE const btScalar & z() const
Return the z value.
Definition btQuadWord.h:117
SIMD_FORCE_INLINE const btScalar & w() const
Return the w value.
Definition btQuadWord.h:119
Attribute * find(ustring name) const
bool top_level
Definition params.h:80
int bvh_type
Definition params.h:105
Definition bvh/bvh.h:67
vector< Geometry * > geometry
Definition bvh/bvh.h:70
BVHParams params
Definition bvh/bvh.h:69
vector< Object * > objects
Definition bvh/bvh.h:71
bool is_volume() const
bool is_pointcloud() const
bool is_hair() const
size_t prim_offset
virtual bool has_motion_blur() const
AttributeSet attributes
bool is_mesh() const
Definition hair.h:13
Curve get_curve(const size_t i) const
Definition hair.h:111
size_t curve_segment_offset
Definition hair.h:90
size_t num_curves() const
Definition hair.h:126
size_t num_segments() const
Definition hair.h:131
CurveShapeType curve_shape
Definition hair.h:91
size_t num_keys() const
Definition hair.h:121
void set_substatus(const string &substatus_)
Definition progress.h:259
void set_error(const string &error_message_)
Definition progress.h:98
void alloc_to_device(const size_t num, bool shrink_to_fit=true)
@ MEM_READ_ONLY
CCL_NAMESPACE_BEGIN struct Options options
#define KERNEL_DATA_ARRAY(type, name)
Definition data_arrays.h:8
DebugFlags & DebugFlags()
Definition debug.h:145
#define KERNEL_FEATURE_HAIR_RIBBON
#define KERNEL_FEATURE_OBJECT_MOTION
#define KERNEL_FEATURE_OSL_SHADING
#define KERNEL_FEATURE_SUBSURFACE
#define KERNEL_FEATURE_HAIR_THICK
#define KERNEL_FEATURE_PATH_TRACING
#define KERNEL_FEATURE_OSL_CAMERA
#define KERNEL_FEATURE_NODE_RAYTRACE
#define KERNEL_FEATURE_BAKING
#define KERNEL_FEATURE_MNEE
#define KERNEL_FEATURE_POINTCLOUD
#define CCL_NAMESPACE_END
#define offsetof(t, d)
static float verts[][3]
ThreadMutex mutex
#define assert(assertion)
VecBase< float, D > step(VecOp< float, D >, VecOp< float, D >) RET
@ ATTR_STD_MOTION_VERTEX_POSITION
@ CURVE_THICK
@ CURVE_RIBBON
@ CURVE_THICK_LINEAR
@ BVH_LAYOUT_OPTIX
#define LOG_DEBUG
Definition log.h:107
#define LOG_FATAL
Definition log.h:99
#define LOG_ERROR
Definition log.h:101
#define LOG_WARNING
Definition log.h:103
#define LOG_INFO
Definition log.h:106
Segment< FEdge *, Vec3r > segment
int BVHLayoutMask
Definition params.h:50
@ BVH_TYPE_STATIC
Definition params.h:40
size_t path_file_size(const string &path)
Definition path.cpp:554
bool path_is_directory(const string &path)
Definition path.cpp:582
string path_get(const string &sub)
Definition path.cpp:337
string path_join(const string &dir, const string &file)
Definition path.cpp:415
bool path_read_compressed_text(const string &path, string &text)
Definition path.cpp:754
static struct PyModuleDef module
Definition python.cpp:796
const char * name
#define make_float4
CCL_NAMESPACE_BEGIN string string_printf(const char *format,...)
Definition string.cpp:23
float3 * data_float3()
float4 * data_float4()
void bounds_grow(const int k, const float3 *curve_keys, const float *curve_radius, BoundBox &bounds) const
Definition hair.cpp:44
int first_key
Definition hair.h:19
int num_segments() const
Definition hair.h:22
int num_keys
Definition hair.h:20
size_t num_triangles() const
Definition scene/mesh.h:77
bool use_motion() const
int get_device_index() const
bool is_traceable() const
uint visibility_for_tracing() const
void bounds_grow(const float3 *points, const float *radius, BoundBox &bounds) const
Point get_point(const int i) const
size_t num_points() const
void push(TaskRunFunction &&task)
Definition task.cpp:21
void wait_work(Summary *stats=nullptr)
Definition task.cpp:27
i
Definition text_draw.cc:230
max
Definition text_draw.cc:251
std::mutex thread_mutex
Definition thread.h:27
std::unique_lock< std::mutex > thread_scoped_lock
Definition thread.h:28
void transform_motion_decompose(DecomposedTransform *decomp, const Transform *motion, const size_t size)
uint64_t device_ptr
Definition types_base.h:44
ccl_device_inline size_t align_up(const size_t offset, const size_t alignment)
Definition types_base.h:47