15HIPRTDeviceQueue::HIPRTDeviceQueue(HIPRTDevice *device)
16 : HIPDeviceQueue((HIPDevice *)device), hiprt_device_(device)
24 if (hiprt_device_->have_error()) {
29 return HIPDeviceQueue::enqueue(kernel,
work_size, args);
34 const HIPContextScope scope(hiprt_device_);
35 const HIPDeviceKernel &hip_kernel = hiprt_device_->kernels.get(kernel);
37 if (!hiprt_device_->global_stack_buffer.stackData) {
38 uint32_t max_path = num_concurrent_states(0);
39 hiprtGlobalStackBufferInput stack_buffer_input{
42 hiprtError rt_result = hiprtCreateGlobalStackBuffer(hiprt_device_->get_hiprt_context(),
44 hiprt_device_->global_stack_buffer);
46 if (rt_result != hiprtSuccess) {
47 LOG(ERROR) <<
"Failed to create hiprt Global Stack Buffer";
54 (
void *)(&hiprt_device_->global_stack_buffer),
55 sizeof(hiprtGlobalStackBuffer));
60 int shared_mem_bytes = 0;
62 assert_success(hipModuleLaunchKernel(hip_kernel.function,
66 num_threads_per_block,
71 const_cast<void **
>(args_copy.
values),
77 return !(hiprt_device_->have_error());
#define CCL_NAMESPACE_END
#define HIPRT_THREAD_STACK_SIZE
#define HIPRT_THREAD_GROUP_SIZE
bool device_kernel_has_intersection(DeviceKernel kernel)
ccl_gpu_kernel_postfix ccl_global const int ccl_global float const int work_size
void add(const KernelFilmConvert *value)
ccl_device_inline size_t divide_up(size_t x, size_t y)