Blender V5.0
hiprt/queue.cpp
Go to the documentation of this file.
1/* SPDX-FileCopyrightText: 2011-2022 Blender Foundation
2 *
3 * SPDX-License-Identifier: Apache-2.0 */
4
5#ifdef WITH_HIPRT
6
7# include "device/hiprt/queue.h"
8
10# include "device/hip/kernel.h"
12
14
16
17HIPRTDeviceQueue::HIPRTDeviceQueue(HIPRTDevice *device)
18 : HIPDeviceQueue((HIPDevice *)device), hiprt_device_(device)
19{
20}
21
22bool HIPRTDeviceQueue::enqueue(DeviceKernel kernel,
23 const int work_size,
24 const DeviceKernelArguments &args)
25{
26 if (hiprt_device_->have_error()) {
27 return false;
28 }
29
30 if (!device_kernel_has_intersection(kernel)) {
31 return HIPDeviceQueue::enqueue(kernel, work_size, args);
32 }
33
34 debug_enqueue_begin(kernel, work_size);
35
36 const HIPContextScope scope(hiprt_device_);
37 const HIPDeviceKernel &hip_kernel = hiprt_device_->kernels.get(kernel);
38
39 if (!hiprt_device_->global_stack_buffer.stackData) {
40 uint32_t max_path = num_concurrent_states(0);
41 hiprtGlobalStackBufferInput stack_buffer_input{
42 hiprtStackTypeGlobal, hiprtStackEntryTypeInteger, HIPRT_THREAD_STACK_SIZE, max_path};
43
44 hiprtError rt_result = hiprtCreateGlobalStackBuffer(hiprt_device_->get_hiprt_context(),
45 stack_buffer_input,
46 hiprt_device_->global_stack_buffer);
47
48 if (rt_result != hiprtSuccess) {
49 LOG_ERROR << "Failed to create hiprt Global Stack Buffer";
50 return false;
51 }
52 }
53
54 DeviceKernelArguments args_copy = args;
56 (void *)(&hiprt_device_->global_stack_buffer),
57 sizeof(hiprtGlobalStackBuffer));
58
59 /* Compute kernel launch parameters. */
60 const int num_threads_per_block = HIPRT_THREAD_GROUP_SIZE;
61 const int num_blocks = divide_up(work_size, num_threads_per_block);
62 int shared_mem_bytes = 0;
63
64 assert_success(hipModuleLaunchKernel(hip_kernel.function,
65 num_blocks,
66 1,
67 1,
68 num_threads_per_block,
69 1,
70 1,
71 shared_mem_bytes,
72 hip_stream_,
73 const_cast<void **>(args_copy.values),
74 nullptr),
75 "enqueue");
76
77 debug_enqueue_end();
78
79 return !(hiprt_device_->have_error());
80}
81
83
84#endif /* WITH_HIPRT */
#define CCL_NAMESPACE_END
#define HIPRT_THREAD_STACK_SIZE
#define HIPRT_THREAD_GROUP_SIZE
bool device_kernel_has_intersection(DeviceKernel kernel)
ccl_gpu_kernel_postfix const ccl_global int ccl_global float const int work_size
DeviceKernel
#define LOG_ERROR
Definition log.h:101
void * values[MAX_ARGS]
void add(const KernelFilmConvert *value)
ccl_device_inline size_t divide_up(const size_t x, const size_t y)
Definition types_base.h:52