17HIPDeviceQueue::HIPDeviceQueue(HIPDevice *device)
20 const HIPContextScope scope(hip_device_);
21 hip_device_assert(hip_device_, hipStreamCreateWithFlags(&hip_stream_, hipStreamNonBlocking));
24HIPDeviceQueue::~HIPDeviceQueue()
26 const HIPContextScope scope(hip_device_);
27 hipStreamDestroy(hip_stream_);
30int HIPDeviceQueue::num_concurrent_states(
const size_t state_size)
const
32 const int max_num_threads = hip_device_->get_num_multiprocessors() *
33 hip_device_->get_max_num_threads_per_multiprocessor();
34 int num_states = ((max_num_threads == 0) ? 65536 : max_num_threads) * 16;
36 const char *factor_str = getenv(
"CYCLES_CONCURRENT_STATES_FACTOR");
38 const float factor = (
float)atof(factor_str);
43 LOG_TRACE <<
"CYCLES_CONCURRENT_STATES_FACTOR evaluated to 0";
53int HIPDeviceQueue::num_concurrent_busy_states(
const size_t )
const
55 const int max_num_threads = hip_device_->get_num_multiprocessors() *
56 hip_device_->get_max_num_threads_per_multiprocessor();
58 if (max_num_threads == 0) {
62 return 4 * max_num_threads;
65void HIPDeviceQueue::init_execution()
68 HIPContextScope scope(hip_device_);
69 hip_device_->load_texture_info();
70 hip_device_assert(hip_device_, hipDeviceSynchronize());
72 debug_init_execution();
79 if (hip_device_->have_error()) {
85 const HIPContextScope scope(hip_device_);
88 if (hip_device_->load_texture_info()) {
89 hip_device_assert(hip_device_, hipDeviceSynchronize());
90 if (hip_device_->have_error()) {
96 const HIPDeviceKernel &hip_kernel = hip_device_->kernels.get(kernel);
97 const int num_threads_per_block = hip_kernel.num_threads_per_block;
100 int shared_mem_bytes = 0;
112 shared_mem_bytes = (num_threads_per_block + 1) *
sizeof(
int);
119 assert_success(hipModuleLaunchKernel(hip_kernel.function,
123 num_threads_per_block,
128 const_cast<void **
>(args.
values),
134 return !(hip_device_->have_error());
137bool HIPDeviceQueue::synchronize()
139 if (hip_device_->have_error()) {
143 const HIPContextScope scope(hip_device_);
144 assert_success(hipStreamSynchronize(hip_stream_),
"synchronize");
147 return !(hip_device_->have_error());
160 hip_device_->mem_alloc(mem);
166 const HIPContextScope scope(hip_device_);
182 hip_device_->mem_alloc(mem);
189 const HIPContextScope scope(hip_device_);
208 const HIPContextScope scope(hip_device_);
215void HIPDeviceQueue::assert_success(hipError_t
result,
const char *operation)
217 if (
result != hipSuccess) {
219 hip_device_->set_error(
220 string_printf(
"%s in HIP queue %s (%s)",
name, operation, debug_active_kernels().c_str()));
226 return make_unique<HIPDeviceGraphicsInterop>(
this);
device_ptr device_pointer
#define CCL_NAMESPACE_END
#define assert(assertion)
ccl_gpu_kernel_postfix const ccl_global int ccl_global float const int work_size
@ DEVICE_KERNEL_INTEGRATOR_QUEUED_PATHS_ARRAY
@ DEVICE_KERNEL_INTEGRATOR_QUEUED_SHADOW_PATHS_ARRAY
@ DEVICE_KERNEL_INTEGRATOR_TERMINATED_PATHS_ARRAY
@ DEVICE_KERNEL_INTEGRATOR_SORTED_PATHS_ARRAY
@ DEVICE_KERNEL_INTEGRATOR_COMPACT_SHADOW_PATHS_ARRAY
@ DEVICE_KERNEL_INTEGRATOR_TERMINATED_SHADOW_PATHS_ARRAY
@ DEVICE_KERNEL_INTEGRATOR_ACTIVE_PATHS_ARRAY
@ DEVICE_KERNEL_INTEGRATOR_COMPACT_PATHS_ARRAY
string string_human_readable_size(size_t size)
CCL_NAMESPACE_BEGIN string string_printf(const char *format,...)
ccl_device_inline size_t divide_up(const size_t x, const size_t y)