18const char *kernel_type_as_string(MetalPipelineType pso_type)
23 case PSO_SPECIALIZED_INTERSECT:
24 return "PSO_SPECIALIZED_INTERSECT";
25 case PSO_SPECIALIZED_SHADE:
26 return "PSO_SPECIALIZED_SHADE";
34 ShaderCache(id<MTLDevice> _mtlDevice) : mtlDevice(_mtlDevice)
41 switch (MetalInfo::get_apple_gpu_architecture(mtlDevice)) {
46 occupancy_tuning[i] = {64, 64};
93 MetalKernelPipeline *get_best_pipeline(
DeviceKernel kernel,
const MetalDevice *device);
97 void load_kernel(
DeviceKernel kernel, MetalDevice *device, MetalPipelineType pso_type);
100 MetalDevice
const *device,
101 MetalPipelineType pso_type);
105 friend ShaderCache *get_shader_cache(id<MTLDevice> mtlDevice);
107 void compile_thread_func();
109 using PipelineCollection = std::vector<unique_ptr<MetalKernelPipeline>>;
111 struct OccupancyTuningParameters {
112 int threads_per_threadgroup = 0;
113 int num_threads_per_block = 0;
116 std::mutex cache_mutex;
119 id<MTLDevice> mtlDevice;
122 std::condition_variable cond_var;
123 std::deque<MetalKernelPipeline *> request_queue;
124 std::vector<std::thread> compile_threads;
125 std::atomic_int incomplete_requests = 0;
126 std::atomic_int incomplete_specialization_requests = 0;
129bool ShaderCache::running =
true;
131const int MAX_POSSIBLE_GPUS_ON_SYSTEM = 8;
132using DeviceShaderCache = std::pair<id<MTLDevice>, unique_ptr<ShaderCache>>;
133int g_shaderCacheCount = 0;
134DeviceShaderCache g_shaderCache[MAX_POSSIBLE_GPUS_ON_SYSTEM];
137static std::atomic_int g_next_pipeline_id = 0;
139ShaderCache *get_shader_cache(id<MTLDevice> mtlDevice)
141 for (
int i = 0; i < g_shaderCacheCount; i++) {
142 if (g_shaderCache[i].first == mtlDevice) {
143 return g_shaderCache[i].second.get();
148 g_shaderCacheCountMutex.lock();
149 int index = g_shaderCacheCount++;
150 g_shaderCacheCountMutex.unlock();
152 assert(index < MAX_POSSIBLE_GPUS_ON_SYSTEM);
153 g_shaderCache[index].first = mtlDevice;
154 g_shaderCache[index].second = make_unique<ShaderCache>(mtlDevice);
155 return g_shaderCache[index].second.get();
158ShaderCache::~ShaderCache()
161 cond_var.notify_all();
163 metal_printf(
"Waiting for ShaderCache threads... (incomplete_requests = %d)\n",
164 int(incomplete_requests));
165 for (
auto &
thread : compile_threads) {
168 metal_printf(
"ShaderCache shut down.\n");
171void ShaderCache::wait_for_all()
173 while (incomplete_requests > 0) {
174 std::this_thread::sleep_for(std::chrono::milliseconds(100));
178void ShaderCache::compile_thread_func()
183 MetalKernelPipeline *pipeline;
186 cond_var.wait(
lock, [&] {
return !running || !request_queue.empty(); });
187 if (!running || request_queue.empty()) {
191 pipeline = request_queue.front();
192 request_queue.pop_front();
197 MetalPipelineType pso_type = pipeline->pso_type;
199 if (MetalDevice::is_device_cancelled(pipeline->originating_device_id)) {
201 metal_printf(
"Cancelling compilation of %s (%s)\n",
203 kernel_type_as_string(pso_type));
210 auto &collection = pipelines[device_kernel];
213 int max_entries_of_same_pso_type = 3;
214 for (
int i = (
int)collection.size() - 1; i >= 0; i--) {
215 if (collection[i]->pso_type == pso_type) {
216 max_entries_of_same_pso_type -= 1;
217 if (max_entries_of_same_pso_type == 0) {
218 metal_printf(
"Purging oldest %s:%s kernel from ShaderCache\n",
219 kernel_type_as_string(pso_type),
221 collection.erase(collection.begin() + i);
226 collection.push_back(unique_ptr<MetalKernelPipeline>(pipeline));
228 incomplete_requests--;
229 if (pso_type != PSO_GENERIC) {
230 incomplete_specialization_requests--;
235bool ShaderCache::should_load_kernel(
DeviceKernel device_kernel,
236 MetalDevice
const *device,
237 MetalPipelineType pso_type)
262 if (pso_type != PSO_GENERIC) {
272 bool is_shade_pso = (pso_type == PSO_SPECIALIZED_SHADE);
273 if (is_shade_pso != is_shade_kernel) {
281 for (
auto &pipeline : pipelines[device_kernel]) {
282 if (pipeline->kernels_md5 == device->kernels_md5[pso_type]) {
291void ShaderCache::load_kernel(
DeviceKernel device_kernel,
293 MetalPipelineType pso_type)
298 if (compile_threads.empty()) {
301 int max_mtlcompiler_threads = 2;
303# if defined(MAC_OS_VERSION_13_3)
304 if (@available(macOS 13.3, *)) {
306 max_mtlcompiler_threads =
max(2,
307 int([mtlDevice maximumConcurrentCompilationTaskCount]) - 1);
311 metal_printf(
"Spawning %d Cycles kernel compilation threads\n", max_mtlcompiler_threads);
312 for (
int i = 0; i < max_mtlcompiler_threads; i++) {
313 compile_threads.push_back(std::thread([
this] { this->compile_thread_func(); }));
318 if (!should_load_kernel(device_kernel, device, pso_type)) {
322 incomplete_requests++;
323 if (pso_type != PSO_GENERIC) {
324 incomplete_specialization_requests++;
327 MetalKernelPipeline *pipeline =
new MetalKernelPipeline;
331 pipeline->pipeline_id = g_next_pipeline_id.fetch_add(1);
332 pipeline->originating_device_id = device->device_id;
333 memcpy(&pipeline->kernel_data_, &device->launch_params.data,
sizeof(pipeline->kernel_data_));
334 pipeline->pso_type = pso_type;
335 pipeline->mtlDevice = mtlDevice;
336 pipeline->kernels_md5 = device->kernels_md5[pso_type];
337 pipeline->mtlLibrary = device->mtlLibrary[pso_type];
338 pipeline->device_kernel = device_kernel;
339 pipeline->threads_per_threadgroup = device->max_threads_per_threadgroup;
341 if (occupancy_tuning[device_kernel].threads_per_threadgroup) {
342 pipeline->threads_per_threadgroup = occupancy_tuning[device_kernel].threads_per_threadgroup;
343 pipeline->num_threads_per_block = occupancy_tuning[device_kernel].num_threads_per_block;
347 pipeline->use_metalrt = device->use_metalrt;
348 pipeline->kernel_features = device->kernel_features;
352 request_queue.push_back(pipeline);
354 cond_var.notify_one();
357MetalKernelPipeline *ShaderCache::get_best_pipeline(
DeviceKernel kernel,
const MetalDevice *device)
359 while (running && !device->has_error) {
361 MetalKernelPipeline *best_match =
nullptr;
364 for (
auto &candidate : pipelines[kernel]) {
365 if (candidate->loaded &&
366 candidate->kernels_md5 == device->kernels_md5[candidate->pso_type])
369 if (!best_match || candidate->pso_type > best_match->pso_type) {
370 best_match = candidate.get();
377 if (best_match->usage_count == 0 && best_match->pso_type != PSO_GENERIC) {
378 metal_printf(
"Swapping in %s version of %s\n",
379 kernel_type_as_string(best_match->pso_type),
382 best_match->usage_count += 1;
387 std::this_thread::sleep_for(std::chrono::milliseconds(100));
392bool MetalKernelPipeline::should_use_binary_archive()
const
395 if (@available(macOS 13.0, *)) {
396 if (
auto str = getenv(
"CYCLES_METAL_DISABLE_BINARY_ARCHIVES")) {
397 if (atoi(
str) != 0) {
408 if (pso_type == PSO_GENERIC) {
428static MTLFunctionConstantValues *GetConstantValues(
KernelData const *data =
nullptr)
430 MTLFunctionConstantValues *constant_values = [MTLFunctionConstantValues
new];
432 MTLDataType MTLDataType_int = MTLDataTypeInt;
433 MTLDataType MTLDataType_float = MTLDataTypeFloat;
434 MTLDataType MTLDataType_float4 = MTLDataTypeFloat4;
439 [constant_values setConstantValue:&zero_data type:MTLDataType_int atIndex:
Kernel_DummyConstant];
441 bool next_member_is_specialized =
true;
443# define KERNEL_STRUCT_MEMBER_DONT_SPECIALIZE next_member_is_specialized = false;
445# define KERNEL_STRUCT_MEMBER(parent, _type, name) \
446 [constant_values setConstantValue:next_member_is_specialized ? (void *)&data->parent.name : \
448 type:MTLDataType_##_type \
449 atIndex:KernelData_##parent##_##name]; \
450 next_member_is_specialized = true;
454 return constant_values;
457void MetalDispatchPipeline::free_intersection_function_tables()
459 for (
int table = 0; table < METALRT_TABLE_NUM; table++) {
460 if (intersection_func_table[table]) {
461 [intersection_func_table[table] release];
462 intersection_func_table[table] = nil;
467MetalDispatchPipeline::~MetalDispatchPipeline()
469 free_intersection_function_tables();
472bool MetalDispatchPipeline::update(MetalDevice *metal_device,
DeviceKernel kernel)
474 const MetalKernelPipeline *best_pipeline = MetalDeviceKernels::get_best_pipeline(metal_device,
476 if (!best_pipeline) {
480 if (pipeline_id == best_pipeline->pipeline_id) {
484 pipeline_id = best_pipeline->pipeline_id;
485 pipeline = best_pipeline->pipeline;
486 pso_type = best_pipeline->pso_type;
487 num_threads_per_block = best_pipeline->num_threads_per_block;
491 free_intersection_function_tables();
493 for (
int table = 0; table < METALRT_TABLE_NUM; table++) {
495 MTLIntersectionFunctionTableDescriptor *ift_desc =
496 [[MTLIntersectionFunctionTableDescriptor alloc]
init];
497 ift_desc.functionCount = best_pipeline->table_functions[table].count;
498 intersection_func_table[table] = [this->pipeline
499 newIntersectionFunctionTableWithDescriptor:ift_desc];
502 int size =
int([best_pipeline->table_functions[table]
count]);
503 for (
int i = 0; i <
size; i++) {
504 id<MTLFunctionHandle> handle = [pipeline
505 functionHandleWithFunction:best_pipeline->table_functions[table][i]];
506 [intersection_func_table[table] setFunction:handle atIndex:i];
515id<MTLFunction> MetalKernelPipeline::make_intersection_function(
const char *function_name)
517 MTLFunctionDescriptor *desc = [MTLIntersectionFunctionDescriptor functionDescriptor];
518 desc.name = [@(function_name)
copy];
520 if (pso_type != PSO_GENERIC) {
521 desc.constantValues = GetConstantValues(&kernel_data_);
524 desc.constantValues = GetConstantValues();
528 id<MTLFunction> rt_intersection_function = [mtlLibrary newFunctionWithDescriptor:desc
531 if (rt_intersection_function == nil) {
532 NSString *err = [
error localizedDescription];
533 string errors = [err UTF8String];
536 "Error getting intersection function \"%s\": %s", function_name, errors.c_str());
539 rt_intersection_function.label = [@(function_name)
copy];
541 return rt_intersection_function;
544void MetalKernelPipeline::compile()
546 const std::string function_name = std::string(
"cycles_metal_") +
551 MTLFunctionDescriptor *func_desc = [MTLIntersectionFunctionDescriptor functionDescriptor];
552 func_desc.name = [@(function_name.c_str())
copy];
554 if (pso_type != PSO_GENERIC) {
555 func_desc.constantValues = GetConstantValues(&kernel_data_);
558 func_desc.constantValues = GetConstantValues();
561 function = [mtlLibrary newFunctionWithDescriptor:func_desc
error:&
error];
563 if (function == nil) {
564 NSString *err = [
error localizedDescription];
565 string errors = [err UTF8String];
566 metal_printf(
"Error getting function \"%s\": %s", function_name.c_str(), errors.c_str());
570 function.label = [@(function_name.c_str())
copy];
572 NSArray *linked_functions = nil;
576 NSMutableSet *unique_functions = [[NSMutableSet alloc]
init];
578 auto add_intersection_functions = [&](
int table_index,
580 const char *curve_fn =
nullptr,
581 const char *point_fn =
nullptr) {
582 table_functions[table_index] = [NSArray
583 arrayWithObjects:make_intersection_function(tri_fn),
584 curve_fn ? make_intersection_function(curve_fn) : nil,
585 point_fn ? make_intersection_function(point_fn) : nil,
588 [unique_functions addObjectsFromArray:table_functions[table_index]];
591 add_intersection_functions(METALRT_TABLE_DEFAULT,
592 "__intersection__tri",
593 "__intersection__curve",
594 "__intersection__point");
595 add_intersection_functions(METALRT_TABLE_SHADOW,
596 "__intersection__tri_shadow",
597 "__intersection__curve_shadow",
598 "__intersection__point_shadow");
599 add_intersection_functions(METALRT_TABLE_SHADOW_ALL,
600 "__intersection__tri_shadow_all",
601 "__intersection__curve_shadow_all",
602 "__intersection__point_shadow_all");
603 add_intersection_functions(METALRT_TABLE_VOLUME,
"__intersection__volume_tri");
604 add_intersection_functions(METALRT_TABLE_LOCAL,
"__intersection__local_tri");
605 add_intersection_functions(METALRT_TABLE_LOCAL_MBLUR,
"__intersection__local_tri_mblur");
606 add_intersection_functions(METALRT_TABLE_LOCAL_SINGLE_HIT,
607 "__intersection__local_tri_single_hit");
608 add_intersection_functions(METALRT_TABLE_LOCAL_SINGLE_HIT_MBLUR,
609 "__intersection__local_tri_single_hit_mblur");
611 linked_functions = [[NSArray arrayWithArray:[unique_functions allObjects]]
612 sortedArrayUsingComparator:^NSComparisonResult(id<MTLFunction> f1, id<MTLFunction> f2) {
613 return [f1.label compare:f2.label];
615 unique_functions = nil;
618 MTLComputePipelineDescriptor *computePipelineStateDescriptor =
619 [[MTLComputePipelineDescriptor alloc]
init];
621 computePipelineStateDescriptor.buffers[0].mutability = MTLMutabilityImmutable;
622 computePipelineStateDescriptor.buffers[1].mutability = MTLMutabilityImmutable;
623 computePipelineStateDescriptor.buffers[2].mutability = MTLMutabilityImmutable;
625 computePipelineStateDescriptor.maxTotalThreadsPerThreadgroup = threads_per_threadgroup;
626 computePipelineStateDescriptor.threadGroupSizeIsMultipleOfThreadExecutionWidth =
true;
628 computePipelineStateDescriptor.computeFunction = function;
631 if (linked_functions) {
632 computePipelineStateDescriptor.linkedFunctions = [[MTLLinkedFunctions alloc]
init];
633 computePipelineStateDescriptor.linkedFunctions.functions = linked_functions;
635 computePipelineStateDescriptor.maxCallStackDepth = 1;
637 computePipelineStateDescriptor.maxCallStackDepth = 2;
640 MTLPipelineOption pipelineOptions = MTLPipelineOptionNone;
642 bool use_binary_archive = should_use_binary_archive();
643 bool loading_existing_archive =
false;
644 bool creating_new_archive =
false;
646 id<MTLBinaryArchive> archive = nil;
647 string metalbin_path;
648 string metalbin_name;
649 if (use_binary_archive) {
650 NSProcessInfo *processInfo = [NSProcessInfo processInfo];
651 string osVersion = [[processInfo operatingSystemVersionString] UTF8String];
653 local_md5.
append(kernels_md5);
654 local_md5.
append(osVersion);
656 sizeof(this->threads_per_threadgroup));
659 string device_name = [mtlDevice.name UTF8String];
660 for (
char &c : device_name) {
661 if ((c <
'0' || c >
'9') && (c <
'a' || c >
'z') && (c <
'A' || c >
'Z')) {
666 metalbin_name = device_name;
668 metalbin_name =
path_join(metalbin_name, kernel_type_as_string(pso_type));
677 creating_new_archive = !loading_existing_archive;
679 MTLBinaryArchiveDescriptor *archiveDesc = [[MTLBinaryArchiveDescriptor alloc]
init];
680 if (loading_existing_archive) {
681 archiveDesc.url = [NSURL fileURLWithPath:@(metalbin_path.c_str())];
683 NSError *
error = nil;
684 archive = [mtlDevice newBinaryArchiveWithDescriptor:archiveDesc
error:&
error];
686 const char *err =
error ? [[
error localizedDescription] UTF8String] :
nullptr;
687 metal_printf(
"newBinaryArchiveWithDescriptor failed: %s\n", err ? err :
"nil");
689 [archiveDesc release];
691 if (loading_existing_archive) {
692 pipelineOptions = MTLPipelineOptionFailOnBinaryArchiveMiss;
693 computePipelineStateDescriptor.binaryArchives = [NSArray arrayWithObjects:archive, nil];
697 bool recreate_archive =
false;
700 auto do_compilation = [&]() {
701 __block
bool compilation_finished =
false;
702 __block
string error_str;
708 NSError *
error = nil;
709 pipeline = [mtlDevice newComputePipelineStateWithDescriptor:computePipelineStateDescriptor
713 const char *err =
error ? [[
error localizedDescription] UTF8String] :
nullptr;
714 error_str = err ? err :
"nil";
720 newComputePipelineStateWithDescriptor:computePipelineStateDescriptor
722 completionHandler:^(id<MTLComputePipelineState> computePipelineState,
723 MTLComputePipelineReflection * ,
725 pipeline = computePipelineState;
732 const char *err =
error ?
733 [[
error localizedDescription] UTF8String] :
735 error_str = err ? err :
"nil";
737 compilation_finished =
true;
741 while (ShaderCache::running && !compilation_finished) {
742 std::this_thread::sleep_for(std::chrono::milliseconds(5));
746 if (creating_new_archive && pipeline) {
749 if (![archive addComputePipelineFunctionsWithDescriptor:computePipelineStateDescriptor
752 NSString *errStr = [
error localizedDescription];
753 metal_printf(
"Failed to add PSO to archive:\n%s\n", errStr ? [errStr UTF8String] :
"nil");
759 "newComputePipelineStateWithDescriptor failed for \"%s\"%s. "
762 (archive && !recreate_archive) ?
" Archive may be incomplete or corrupt - attempting "
775 if (pipeline == nil && archive) {
776 recreate_archive =
true;
777 pipelineOptions = MTLPipelineOptionNone;
783 double duration =
time_dt() - starttime;
785 if (pipeline == nil) {
786 metal_printf(
"%16s | %2d | %-55s | %7.2fs | FAILED!\n",
787 kernel_type_as_string(pso_type),
794 if (!num_threads_per_block) {
795 num_threads_per_block =
round_down(pipeline.maxTotalThreadsPerThreadgroup,
796 pipeline.threadExecutionWidth);
797 num_threads_per_block = std::max(num_threads_per_block, (
int)pipeline.threadExecutionWidth);
800 if (ShaderCache::running) {
801 if (creating_new_archive || recreate_archive) {
802 if (![archive serializeToURL:[NSURL fileURLWithPath:@(metalbin_path.c_str())]
error:&
error])
804 metal_printf(
"Failed to save binary archive to %s, error:\n%s\n",
805 metalbin_path.c_str(),
806 [[
error localizedDescription] UTF8String]);
815 [computePipelineStateDescriptor release];
816 computePipelineStateDescriptor = nil;
818 if (!use_binary_archive) {
819 metal_printf(
"%16s | %2d | %-55s | %7.2fs\n",
820 kernel_type_as_string(pso_type),
826 metal_printf(
"%16s | %2d | %-55s | %7.2fs | %s: %s\n",
827 kernel_type_as_string(pso_type),
831 creating_new_archive ?
" new" :
"load",
832 metalbin_name.c_str());
836bool MetalDeviceKernels::load(MetalDevice *device, MetalPipelineType pso_type)
838 auto shader_cache = get_shader_cache(device->mtlDevice);
840 shader_cache->load_kernel((
DeviceKernel)i, device, pso_type);
845void MetalDeviceKernels::wait_for_all()
847 for (
int i = 0; i < g_shaderCacheCount; i++) {
848 g_shaderCache[i].second->wait_for_all();
852int MetalDeviceKernels::num_incomplete_specialization_requests()
857 for (
int i = 0; i < g_shaderCacheCount; i++) {
858 total += g_shaderCache[i].second->incomplete_specialization_requests;
863int MetalDeviceKernels::get_loaded_kernel_count(MetalDevice
const *device,
864 MetalPipelineType pso_type)
866 auto shader_cache = get_shader_cache(device->mtlDevice);
869 if (shader_cache->should_load_kernel((
DeviceKernel)i, device, pso_type)) {
876bool MetalDeviceKernels::should_load_kernels(MetalDevice
const *device, MetalPipelineType pso_type)
881const MetalKernelPipeline *MetalDeviceKernels::get_best_pipeline(
const MetalDevice *device,
884 return get_shader_cache(device->mtlDevice)->get_best_pipeline(kernel, device);
887bool MetalDeviceKernels::is_benchmark_warmup()
889 NSArray *args = [[NSProcessInfo processInfo] arguments];
890 for (
int i = 0; i < args.count; i++) {
891 if (
const char *arg = [[args objectAtIndex:i] cStringUsingEncoding:NSASCIIStringEncoding]) {
892 if (!strcmp(arg,
"--warm-up")) {
900void MetalDeviceKernels::static_deinitialize()
902 for (
int i = 0; i < g_shaderCacheCount; i++) {
903 g_shaderCache[i] = DeviceShaderCache();
static DBVT_INLINE btScalar size(const btDbvtVolume &a)
void append(const uint8_t *data, int size)
CCL_NAMESPACE_BEGIN struct Options options
DebugFlags & DebugFlags()
#define CCL_NAMESPACE_END
bool device_kernel_has_intersection(DeviceKernel kernel)
const char * device_kernel_as_string(DeviceKernel kernel)
draw_view push_constant(Type::INT, "radiance_src") .push_constant(Type capture_info_buf storage_buf(1, Qualifier::READ, "ObjectBounds", "bounds_buf[]") .push_constant(Type draw_view int
#define KERNEL_FEATURE_NODE_RAYTRACE
#define KERNEL_FEATURE_MNEE
@ DEVICE_KERNEL_INTEGRATOR_QUEUED_PATHS_ARRAY
@ DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE
@ DEVICE_KERNEL_INTEGRATOR_SORT_WRITE_PASS
@ DEVICE_KERNEL_SHADER_EVAL_DISPLACE
@ DEVICE_KERNEL_INTEGRATOR_INTERSECT_SUBSURFACE
@ DEVICE_KERNEL_INTEGRATOR_SHADE_SHADOW
@ DEVICE_KERNEL_INTEGRATOR_SORTED_PATHS_ARRAY
@ DEVICE_KERNEL_INTEGRATOR_COMPACT_SHADOW_STATES
@ DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE
@ DEVICE_KERNEL_INTEGRATOR_SORT_BUCKET_PASS
@ DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_MNEE
@ DEVICE_KERNEL_INTEGRATOR_MEGAKERNEL
@ DEVICE_KERNEL_INTEGRATOR_INIT_FROM_CAMERA
@ DEVICE_KERNEL_SHADER_EVAL_CURVE_SHADOW_TRANSPARENCY
@ DEVICE_KERNEL_INTEGRATOR_INTERSECT_SHADOW
@ DEVICE_KERNEL_INTEGRATOR_INTERSECT_CLOSEST
@ DEVICE_KERNEL_INTEGRATOR_SHADE_BACKGROUND
static void error(const char *str)
static void copy(bNodeTree *dest_ntree, bNode *dest_node, const bNode *src_node)
string path_cache_get(const string &sub)
string path_join(const string &dir, const string &file)
bool path_cache_kernel_exists_and_mark_used(const string &path)
void path_cache_kernel_mark_added_and_clear_old(const string &new_path, const size_t max_old_kernel_of_same_type)
void path_create_directories(const string &filepath)
bool path_remove(const string &path)
CCL_NAMESPACE_BEGIN string string_printf(const char *format,...)
std::unique_lock< std::mutex > thread_scoped_lock
CCL_NAMESPACE_BEGIN typedef std::mutex thread_mutex
CCL_NAMESPACE_BEGIN double time_dt()
ccl_device_inline size_t round_down(size_t x, size_t multiple)