Blender V4.3
hip/queue.cpp
Go to the documentation of this file.
1/* SPDX-FileCopyrightText: 2011-2022 Blender Foundation
2 *
3 * SPDX-License-Identifier: Apache-2.0 */
4
5#ifdef WITH_HIP
6
7# include "device/hip/queue.h"
8
11# include "device/hip/kernel.h"
12
14
15/* HIPDeviceQueue */
16
17HIPDeviceQueue::HIPDeviceQueue(HIPDevice *device)
18 : DeviceQueue(device), hip_device_(device), hip_stream_(nullptr)
19{
20 const HIPContextScope scope(hip_device_);
21 hip_device_assert(hip_device_, hipStreamCreateWithFlags(&hip_stream_, hipStreamNonBlocking));
22}
23
24HIPDeviceQueue::~HIPDeviceQueue()
25{
26 const HIPContextScope scope(hip_device_);
27 hipStreamDestroy(hip_stream_);
28}
29
30int HIPDeviceQueue::num_concurrent_states(const size_t state_size) const
31{
32 const int max_num_threads = hip_device_->get_num_multiprocessors() *
33 hip_device_->get_max_num_threads_per_multiprocessor();
34 int num_states = ((max_num_threads == 0) ? 65536 : max_num_threads) * 16;
35
36 const char *factor_str = getenv("CYCLES_CONCURRENT_STATES_FACTOR");
37 if (factor_str) {
38 const float factor = (float)atof(factor_str);
39 if (factor != 0.0f) {
40 num_states = max((int)(num_states * factor), 1024);
41 }
42 else {
43 VLOG_DEVICE_STATS << "CYCLES_CONCURRENT_STATES_FACTOR evaluated to 0";
44 }
45 }
46
47 VLOG_DEVICE_STATS << "GPU queue concurrent states: " << num_states << ", using up to "
49
50 return num_states;
51}
52
53int HIPDeviceQueue::num_concurrent_busy_states(const size_t /*state_size*/) const
54{
55 const int max_num_threads = hip_device_->get_num_multiprocessors() *
56 hip_device_->get_max_num_threads_per_multiprocessor();
57
58 if (max_num_threads == 0) {
59 return 65536;
60 }
61
62 return 4 * max_num_threads;
63}
64
65void HIPDeviceQueue::init_execution()
66{
67 /* Synchronize all textures and memory copies before executing task. */
68 HIPContextScope scope(hip_device_);
69 hip_device_->load_texture_info();
70 hip_device_assert(hip_device_, hipDeviceSynchronize());
71
72 debug_init_execution();
73}
74
75bool HIPDeviceQueue::enqueue(DeviceKernel kernel,
76 const int work_size,
77 DeviceKernelArguments const &args)
78{
79 if (hip_device_->have_error()) {
80 return false;
81 }
82
83 debug_enqueue_begin(kernel, work_size);
84
85 const HIPContextScope scope(hip_device_);
86 const HIPDeviceKernel &hip_kernel = hip_device_->kernels.get(kernel);
87
88 /* Compute kernel launch parameters. */
89 const int num_threads_per_block = hip_kernel.num_threads_per_block;
90 const int num_blocks = divide_up(work_size, num_threads_per_block);
91
92 int shared_mem_bytes = 0;
93
94 switch (kernel) {
103 /* See parall_active_index.h for why this amount of shared memory is needed. */
104 shared_mem_bytes = (num_threads_per_block + 1) * sizeof(int);
105 break;
106 default:
107 break;
108 }
109
110 /* Launch kernel. */
111 assert_success(hipModuleLaunchKernel(hip_kernel.function,
112 num_blocks,
113 1,
114 1,
115 num_threads_per_block,
116 1,
117 1,
118 shared_mem_bytes,
119 hip_stream_,
120 const_cast<void **>(args.values),
121 0),
122 "enqueue");
123
124 debug_enqueue_end();
125
126 return !(hip_device_->have_error());
127}
128
129bool HIPDeviceQueue::synchronize()
130{
131 if (hip_device_->have_error()) {
132 return false;
133 }
134
135 const HIPContextScope scope(hip_device_);
136 assert_success(hipStreamSynchronize(hip_stream_), "synchronize");
137 debug_synchronize();
138
139 return !(hip_device_->have_error());
140}
141
142void HIPDeviceQueue::zero_to_device(device_memory &mem)
143{
144 assert(mem.type != MEM_GLOBAL && mem.type != MEM_TEXTURE);
145
146 if (mem.memory_size() == 0) {
147 return;
148 }
149
150 /* Allocate on demand. */
151 if (mem.device_pointer == 0) {
152 hip_device_->mem_alloc(mem);
153 }
154
155 /* Zero memory on device. */
156 assert(mem.device_pointer != 0);
157
158 const HIPContextScope scope(hip_device_);
159 assert_success(
160 hipMemsetD8Async((hipDeviceptr_t)mem.device_pointer, 0, mem.memory_size(), hip_stream_),
161 "zero_to_device");
162}
163
164void HIPDeviceQueue::copy_to_device(device_memory &mem)
165{
166 assert(mem.type != MEM_GLOBAL && mem.type != MEM_TEXTURE);
167
168 if (mem.memory_size() == 0) {
169 return;
170 }
171
172 /* Allocate on demand. */
173 if (mem.device_pointer == 0) {
174 hip_device_->mem_alloc(mem);
175 }
176
177 assert(mem.device_pointer != 0);
178 assert(mem.host_pointer != nullptr);
179
180 /* Copy memory to device. */
181 const HIPContextScope scope(hip_device_);
182 assert_success(
183 hipMemcpyHtoDAsync(
184 (hipDeviceptr_t)mem.device_pointer, mem.host_pointer, mem.memory_size(), hip_stream_),
185 "copy_to_device");
186}
187
188void HIPDeviceQueue::copy_from_device(device_memory &mem)
189{
190 assert(mem.type != MEM_GLOBAL && mem.type != MEM_TEXTURE);
191
192 if (mem.memory_size() == 0) {
193 return;
194 }
195
196 assert(mem.device_pointer != 0);
197 assert(mem.host_pointer != nullptr);
198
199 /* Copy memory from device. */
200 const HIPContextScope scope(hip_device_);
201 assert_success(
202 hipMemcpyDtoHAsync(
203 mem.host_pointer, (hipDeviceptr_t)mem.device_pointer, mem.memory_size(), hip_stream_),
204 "copy_from_device");
205}
206
207void HIPDeviceQueue::assert_success(hipError_t result, const char *operation)
208{
209 if (result != hipSuccess) {
210 const char *name = hipewErrorString(result);
211 hip_device_->set_error(
212 string_printf("%s in HIP queue %s (%s)", name, operation, debug_active_kernels().c_str()));
213 }
214}
215
216unique_ptr<DeviceGraphicsInterop> HIPDeviceQueue::graphics_interop_create()
217{
218 return make_unique<HIPDeviceGraphicsInterop>(this);
219}
220
222
223#endif /* WITH_HIP */
@ MEM_TEXTURE
#define CCL_NAMESPACE_END
draw_view in_light_buf[] float
ccl_gpu_kernel_postfix ccl_global const int ccl_global float const int work_size
DeviceKernel
@ DEVICE_KERNEL_INTEGRATOR_QUEUED_PATHS_ARRAY
@ DEVICE_KERNEL_INTEGRATOR_QUEUED_SHADOW_PATHS_ARRAY
@ DEVICE_KERNEL_INTEGRATOR_TERMINATED_PATHS_ARRAY
@ DEVICE_KERNEL_INTEGRATOR_SORTED_PATHS_ARRAY
@ DEVICE_KERNEL_INTEGRATOR_COMPACT_SHADOW_PATHS_ARRAY
@ DEVICE_KERNEL_INTEGRATOR_TERMINATED_SHADOW_PATHS_ARRAY
@ DEVICE_KERNEL_INTEGRATOR_ACTIVE_PATHS_ARRAY
@ DEVICE_KERNEL_INTEGRATOR_COMPACT_PATHS_ARRAY
#define VLOG_DEVICE_STATS
Definition log.h:78
string string_human_readable_size(size_t size)
Definition string.cpp:234
CCL_NAMESPACE_BEGIN string string_printf(const char *format,...)
Definition string.cpp:23
void * values[MAX_ARGS]
float max
ccl_device_inline size_t divide_up(size_t x, size_t y)
Definition util/types.h:53