Blender V4.3
hiprt/queue.cpp
Go to the documentation of this file.
1/* SPDX-FileCopyrightText: 2011-2022 Blender Foundation
2 *
3 * SPDX-License-Identifier: Apache-2.0 */
4
5#ifdef WITH_HIPRT
6
7# include "device/hiprt/queue.h"
8
10# include "device/hip/kernel.h"
12
14
15HIPRTDeviceQueue::HIPRTDeviceQueue(HIPRTDevice *device)
16 : HIPDeviceQueue((HIPDevice *)device), hiprt_device_(device)
17{
18}
19
20bool HIPRTDeviceQueue::enqueue(DeviceKernel kernel,
21 const int work_size,
22 DeviceKernelArguments const &args)
23{
24 if (hiprt_device_->have_error()) {
25 return false;
26 }
27
28 if (!device_kernel_has_intersection(kernel)) {
29 return HIPDeviceQueue::enqueue(kernel, work_size, args);
30 }
31
32 debug_enqueue_begin(kernel, work_size);
33
34 const HIPContextScope scope(hiprt_device_);
35 const HIPDeviceKernel &hip_kernel = hiprt_device_->kernels.get(kernel);
36
37 if (!hiprt_device_->global_stack_buffer.stackData) {
38 uint32_t max_path = num_concurrent_states(0);
39 hiprtGlobalStackBufferInput stack_buffer_input{
40 hiprtStackTypeGlobal, hiprtStackEntryTypeInteger, HIPRT_THREAD_STACK_SIZE, max_path};
41
42 hiprtError rt_result = hiprtCreateGlobalStackBuffer(hiprt_device_->get_hiprt_context(),
43 stack_buffer_input,
44 hiprt_device_->global_stack_buffer);
45
46 if (rt_result != hiprtSuccess) {
47 LOG(ERROR) << "Failed to create hiprt Global Stack Buffer";
48 return false;
49 }
50 }
51
52 DeviceKernelArguments args_copy = args;
54 (void *)(&hiprt_device_->global_stack_buffer),
55 sizeof(hiprtGlobalStackBuffer));
56
57 /* Compute kernel launch parameters. */
58 const int num_threads_per_block = HIPRT_THREAD_GROUP_SIZE;
59 const int num_blocks = divide_up(work_size, num_threads_per_block);
60 int shared_mem_bytes = 0;
61
62 assert_success(hipModuleLaunchKernel(hip_kernel.function,
63 num_blocks,
64 1,
65 1,
66 num_threads_per_block,
67 1,
68 1,
69 shared_mem_bytes,
70 hip_stream_,
71 const_cast<void **>(args_copy.values),
72 0),
73 "enqueue");
74
75 debug_enqueue_end();
76
77 return !(hiprt_device_->have_error());
78}
79
81
82#endif /* WITH_HIPRT */
#define CCL_NAMESPACE_END
#define HIPRT_THREAD_STACK_SIZE
#define HIPRT_THREAD_GROUP_SIZE
bool device_kernel_has_intersection(DeviceKernel kernel)
ccl_gpu_kernel_postfix ccl_global const int ccl_global float const int work_size
DeviceKernel
#define LOG(severity)
Definition log.h:33
unsigned int uint32_t
Definition stdint.h:80
void * values[MAX_ARGS]
void add(const KernelFilmConvert *value)
ccl_device_inline size_t divide_up(size_t x, size_t y)
Definition util/types.h:53