17CUDADeviceQueue::CUDADeviceQueue(CUDADevice *device)
18 :
DeviceQueue(device), cuda_device_(device), cuda_stream_(nullptr)
20 const CUDAContextScope scope(cuda_device_);
21 cuda_device_assert(cuda_device_, cuStreamCreate(&cuda_stream_, CU_STREAM_NON_BLOCKING));
24CUDADeviceQueue::~CUDADeviceQueue()
26 const CUDAContextScope scope(cuda_device_);
27 cuStreamDestroy(cuda_stream_);
30int CUDADeviceQueue::num_concurrent_states(
const size_t state_size)
const
32 const int max_num_threads = cuda_device_->get_num_multiprocessors() *
33 cuda_device_->get_max_num_threads_per_multiprocessor();
36 const char *factor_str = getenv(
"CYCLES_CONCURRENT_STATES_FACTOR");
38 const float factor = (
float)atof(factor_str);
53int CUDADeviceQueue::num_concurrent_busy_states(
const size_t )
const
55 const int max_num_threads = cuda_device_->get_num_multiprocessors() *
56 cuda_device_->get_max_num_threads_per_multiprocessor();
58 if (max_num_threads == 0) {
62 return 4 * max_num_threads;
65void CUDADeviceQueue::init_execution()
68 CUDAContextScope scope(cuda_device_);
69 cuda_device_->load_texture_info();
70 cuda_device_assert(cuda_device_, cuCtxSynchronize());
72 debug_init_execution();
79 if (cuda_device_->have_error()) {
85 const CUDAContextScope scope(cuda_device_);
86 const CUDADeviceKernel &cuda_kernel = cuda_device_->kernels.get(kernel);
89 const int num_threads_per_block = cuda_kernel.num_threads_per_block;
92 int shared_mem_bytes = 0;
104 shared_mem_bytes = (num_threads_per_block + 1) *
sizeof(
int);
112 assert_success(cuLaunchKernel(cuda_kernel.function,
116 num_threads_per_block,
121 const_cast<void **
>(args.
values),
127 return !(cuda_device_->have_error());
130bool CUDADeviceQueue::synchronize()
132 if (cuda_device_->have_error()) {
136 const CUDAContextScope scope(cuda_device_);
137 assert_success(cuStreamSynchronize(cuda_stream_),
"synchronize");
141 return !(cuda_device_->have_error());
154 cuda_device_->mem_alloc(mem);
160 const CUDAContextScope scope(cuda_device_);
176 cuda_device_->mem_alloc(mem);
183 const CUDAContextScope scope(cuda_device_);
202 const CUDAContextScope scope(cuda_device_);
209void CUDADeviceQueue::assert_success(CUresult result,
const char *operation)
211 if (result != CUDA_SUCCESS) {
212 const char *name = cuewErrorString(result);
214 "%s in CUDA queue %s (%s)", name, operation, debug_active_kernels().c_str()));
218unique_ptr<DeviceGraphicsInterop> CUDADeviceQueue::graphics_interop_create()
220 return make_unique<CUDADeviceGraphicsInterop>(
this);
device_ptr device_pointer
#define CCL_NAMESPACE_END
draw_view in_light_buf[] float
ccl_gpu_kernel_postfix ccl_global const int ccl_global float const int work_size
@ DEVICE_KERNEL_INTEGRATOR_QUEUED_PATHS_ARRAY
@ DEVICE_KERNEL_INTEGRATOR_QUEUED_SHADOW_PATHS_ARRAY
@ DEVICE_KERNEL_INTEGRATOR_TERMINATED_PATHS_ARRAY
@ DEVICE_KERNEL_INTEGRATOR_SORTED_PATHS_ARRAY
@ DEVICE_KERNEL_INTEGRATOR_COMPACT_SHADOW_PATHS_ARRAY
@ DEVICE_KERNEL_INTEGRATOR_TERMINATED_SHADOW_PATHS_ARRAY
@ DEVICE_KERNEL_INTEGRATOR_ACTIVE_PATHS_ARRAY
@ DEVICE_KERNEL_INTEGRATOR_COMPACT_PATHS_ARRAY
#define VLOG_DEVICE_STATS
string string_human_readable_size(size_t size)
CCL_NAMESPACE_BEGIN string string_printf(const char *format,...)
ccl_device_inline size_t divide_up(size_t x, size_t y)