Blender V5.0
path_trace_work_gpu.h
Go to the documentation of this file.
1/* SPDX-FileCopyrightText: 2011-2022 Blender Foundation
2 *
3 * SPDX-License-Identifier: Apache-2.0 */
4
5#pragma once
6
8
10#include "device/memory.h"
11#include "device/queue.h"
12
15
16#include "util/vector.h"
17
19
20struct KernelWorkTile;
21
22/* Implementation of PathTraceWork which schedules work to the device in tiles which are sized
23 * to match device queue's number of path states.
24 * This implementation suits best devices which have a lot of integrator states, such as GPU. */
26 public:
28 Film *film,
29 DeviceScene *device_scene,
30 const bool *cancel_requested_flag);
31
32 void alloc_work_memory() override;
33 void init_execution() override;
34
35 void render_samples(RenderStatistics &statistics,
36 const int start_sample,
37 const int samples_num,
38 const int sample_offset) override;
39
41 PassMode pass_mode,
42 const int num_samples) override;
43 void destroy_gpu_resources(PathTraceDisplay *display) override;
44
45 bool copy_render_buffers_from_device() override;
46 bool copy_render_buffers_to_device() override;
47 bool zero_render_buffers() override;
48
49 int adaptive_sampling_converge_filter_count_active(const float threshold, bool reset) override;
50 void cryptomatte_postproces() override;
51 void denoise_volume_guiding_buffers() override;
52
53 protected:
58
59 /* Returns DEVICE_KERNEL_NUM if there are no scheduled kernels. */
61
62 void enqueue_reset();
63
64 bool enqueue_work_tiles(bool &finished);
66 const KernelWorkTile work_tiles[],
67 const int num_work_tiles,
68 const int num_active_paths,
69 const int num_predicted_splits);
70
72 void enqueue_path_iteration(DeviceKernel kernel, const int num_paths_limit = INT_MAX);
73
74 void compute_queued_paths(DeviceKernel kernel, DeviceKernel queued_kernel);
75 void compute_sorted_queued_paths(DeviceKernel queued_kernel, const int num_paths_limit);
76
77 void compact_main_paths(const int num_active_paths);
79 void compact_paths(const int num_active_paths,
80 const int max_active_path_index,
81 DeviceKernel terminated_paths_kernel,
82 DeviceKernel compact_paths_kernel,
83 DeviceKernel compact_kernel);
84
86
87 /* Check whether graphics interop can be used for the PathTraceDisplay update. */
89
90 /* Naive implementation of the `copy_to_display()` which performs film conversion on the
91 * device, then copies pixels to the host and pushes them to the `display`. */
92 void copy_to_display_naive(PathTraceDisplay *display, PassMode pass_mode, const int num_samples);
93
94 /* Implementation of `copy_to_display()` which uses driver's OpenGL/GPU interoperability
95 * functionality, avoiding copy of pixels to the host. */
97 PassMode pass_mode,
98 const int num_samples);
99
100 /* Synchronously run film conversion kernel and store display result in the given destination. */
102 PassMode pass_mode,
103 int num_samples);
104
105 int adaptive_sampling_convergence_check_count_active(const float threshold, bool reset);
108
109 bool has_shadow_catcher() const;
110
111 /* Count how many currently scheduled paths can still split. */
113
114 /* Kernel properties. */
120
121 /* Integrator queue. */
123
124 /* Scheduler which gives work to path tracing threads. */
126
127 /* Integrate state for paths. */
129 /* SoA arrays for integrator state. */
133 /* Keep track of number of queued kernels. */
135 /* Shader sorting. */
141 /* Path split. */
144
145 /* Temporary buffer to get an array of queued path for a particular kernel. */
148
149 /* Temporary buffer for passing work tiles to kernel. */
151
152 /* Temporary buffer used by the copy_to_display() whenever graphics interoperability is not
153 * available. Is allocated on-demand. */
155
157
158 /* Cached result of device->should_use_graphics_interop(). */
160 bool interop_use_ = false;
161
162 /* Number of partitions to sort state indices into prior to material sort. */
164
165 /* Maximum number of concurrent integrator states. */
167
168 /* Minimum number of paths which keeps the device bust. If the actual number of paths falls below
169 * this value more work will be scheduled. */
171
172 /* Maximum path index, effective number of paths used may be smaller than
173 * the size of the integrator_state_ buffer so can avoid iterating over the
174 * full buffer. */
176};
177
unsigned int uint
void reset()
clear internal cached data and reset random seed
Definition film.h:29
bool kernel_is_shadow_path(DeviceKernel kernel)
bool copy_render_buffers_from_device() override
void compact_paths(const int num_active_paths, const int max_active_path_index, DeviceKernel terminated_paths_kernel, DeviceKernel compact_paths_kernel, DeviceKernel compact_kernel)
IntegratorStateGPU integrator_state_gpu_
device_vector< int > integrator_shader_sort_counter_
bool copy_to_display_interop(PathTraceDisplay *display, PassMode pass_mode, const int num_samples)
device_vector< int > num_queued_paths_
void compute_sorted_queued_paths(DeviceKernel queued_kernel, const int num_paths_limit)
void destroy_gpu_resources(PathTraceDisplay *display) override
void alloc_work_memory() override
device_vector< int > integrator_next_main_path_index_
unique_ptr< DeviceQueue > queue_
PathTraceWorkGPU(Device *device, Film *film, DeviceScene *device_scene, const bool *cancel_requested_flag)
bool zero_render_buffers() override
bool kernel_uses_sorting(DeviceKernel kernel)
void init_execution() override
device_vector< int > integrator_shader_sort_prefix_sum_
device_vector< KernelWorkTile > work_tiles_
void copy_to_display_naive(PathTraceDisplay *display, PassMode pass_mode, const int num_samples)
void cryptomatte_postproces() override
void render_samples(RenderStatistics &statistics, const int start_sample, const int samples_num, const int sample_offset) override
device_vector< IntegratorQueueCounter > integrator_queue_counter_
void denoise_volume_guiding_buffers() override
bool enqueue_work_tiles(bool &finished)
device_vector< int > queued_paths_
void compact_main_paths(const int num_active_paths)
bool kernel_creates_ao_paths(DeviceKernel kernel)
unique_ptr< DeviceGraphicsInterop > device_graphics_interop_
void copy_to_display(PathTraceDisplay *display, PassMode pass_mode, const int num_samples) override
device_vector< int > integrator_next_shadow_path_index_
void compute_queued_paths(DeviceKernel kernel, DeviceKernel queued_kernel)
DeviceKernel get_most_queued_kernel() const
device_vector< int > integrator_shader_raytrace_sort_counter_
bool kernel_creates_shadow_paths(DeviceKernel kernel)
device_vector< int > integrator_shader_sort_partition_key_offsets_
void get_render_tile_film_pixels(const PassAccessor::Destination &destination, PassMode pass_mode, int num_samples)
bool should_use_graphics_interop(PathTraceDisplay *display)
vector< unique_ptr< device_memory > > integrator_state_soa_
device_vector< int > integrator_shader_mnee_sort_counter_
WorkTileScheduler work_tile_scheduler_
int adaptive_sampling_convergence_check_count_active(const float threshold, bool reset)
bool copy_render_buffers_to_device() override
int adaptive_sampling_converge_filter_count_active(const float threshold, bool reset) override
int kernel_max_active_main_path_index(DeviceKernel kernel)
device_vector< half4 > display_rgba_half_
PathTraceWork(Device *device, Film *film, DeviceScene *device_scene, const bool *cancel_requested_flag)
#define CCL_NAMESPACE_END
DeviceKernel
PassMode
Definition pass.h:20