17HIPRTDeviceQueue::HIPRTDeviceQueue(HIPRTDevice *device)
18 : HIPDeviceQueue((HIPDevice *)device), hiprt_device_(device)
26 if (hiprt_device_->have_error()) {
31 return HIPDeviceQueue::enqueue(kernel,
work_size, args);
36 const HIPContextScope scope(hiprt_device_);
37 const HIPDeviceKernel &hip_kernel = hiprt_device_->kernels.get(kernel);
39 if (!hiprt_device_->global_stack_buffer.stackData) {
40 uint32_t max_path = num_concurrent_states(0);
41 hiprtGlobalStackBufferInput stack_buffer_input{
44 hiprtError rt_result = hiprtCreateGlobalStackBuffer(hiprt_device_->get_hiprt_context(),
46 hiprt_device_->global_stack_buffer);
48 if (rt_result != hiprtSuccess) {
49 LOG_ERROR <<
"Failed to create hiprt Global Stack Buffer";
56 (
void *)(&hiprt_device_->global_stack_buffer),
57 sizeof(hiprtGlobalStackBuffer));
62 int shared_mem_bytes = 0;
64 assert_success(hipModuleLaunchKernel(hip_kernel.function,
68 num_threads_per_block,
73 const_cast<void **
>(args_copy.
values),
79 return !(hiprt_device_->have_error());
#define CCL_NAMESPACE_END
#define HIPRT_THREAD_STACK_SIZE
#define HIPRT_THREAD_GROUP_SIZE
bool device_kernel_has_intersection(DeviceKernel kernel)
ccl_gpu_kernel_postfix const ccl_global int ccl_global float const int work_size
void add(const KernelFilmConvert *value)
ccl_device_inline size_t divide_up(const size_t x, const size_t y)