Blender V4.3
path_trace_work_gpu.h
Go to the documentation of this file.
1/* SPDX-FileCopyrightText: 2011-2022 Blender Foundation
2 *
3 * SPDX-License-Identifier: Apache-2.0 */
4
5#pragma once
6
8
10#include "device/memory.h"
11#include "device/queue.h"
12
15
16#include "util/vector.h"
17
19
20struct KernelWorkTile;
21
22/* Implementation of PathTraceWork which schedules work to the device in tiles which are sized
23 * to match device queue's number of path states.
24 * This implementation suits best devices which have a lot of integrator states, such as GPU. */
26 public:
28 Film *film,
29 DeviceScene *device_scene,
30 bool *cancel_requested_flag);
31
32 virtual void alloc_work_memory() override;
33 virtual void init_execution() override;
34
35 virtual void render_samples(RenderStatistics &statistics,
36 int start_sample,
37 int samples_num,
38 int sample_offset) override;
39
40 virtual void copy_to_display(PathTraceDisplay *display,
41 PassMode pass_mode,
42 int num_samples) override;
43 virtual void destroy_gpu_resources(PathTraceDisplay *display) override;
44
45 virtual bool copy_render_buffers_from_device() override;
46 virtual bool copy_render_buffers_to_device() override;
47 virtual bool zero_render_buffers() override;
48
49 virtual int adaptive_sampling_converge_filter_count_active(float threshold, bool reset) override;
50 virtual void cryptomatte_postproces() override;
51
52 protected:
57
58 /* Returns DEVICE_KERNEL_NUM if there are no scheduled kernels. */
60
61 void enqueue_reset();
62
63 bool enqueue_work_tiles(bool &finished);
65 const KernelWorkTile work_tiles[],
66 const int num_work_tiles,
67 const int num_active_paths,
68 const int num_predicted_splits);
69
71 void enqueue_path_iteration(DeviceKernel kernel, const int num_paths_limit = INT_MAX);
72
73 void compute_queued_paths(DeviceKernel kernel, DeviceKernel queued_kernel);
74 void compute_sorted_queued_paths(DeviceKernel queued_kernel, const int num_paths_limit);
75
76 void compact_main_paths(const int num_active_paths);
78 void compact_paths(const int num_active_paths,
79 const int max_active_path_index,
80 DeviceKernel terminated_paths_kernel,
81 DeviceKernel compact_paths_kernel,
82 DeviceKernel compact_kernel);
83
85
86 /* Check whether graphics interop can be used for the PathTraceDisplay update. */
88
89 /* Naive implementation of the `copy_to_display()` which performs film conversion on the
90 * device, then copies pixels to the host and pushes them to the `display`. */
91 void copy_to_display_naive(PathTraceDisplay *display, PassMode pass_mode, int num_samples);
92
93 /* Implementation of `copy_to_display()` which uses driver's OpenGL/GPU interoperability
94 * functionality, avoiding copy of pixels to the host. */
95 bool copy_to_display_interop(PathTraceDisplay *display, PassMode pass_mode, int num_samples);
96
97 /* Synchronously run film conversion kernel and store display result in the given destination. */
99 PassMode pass_mode,
100 int num_samples);
101
105
106 bool has_shadow_catcher() const;
107
108 /* Count how many currently scheduled paths can still split. */
110
111 /* Kernel properties. */
117
118 /* Integrator queue. */
119 unique_ptr<DeviceQueue> queue_;
120
121 /* Scheduler which gives work to path tracing threads. */
123
124 /* Integrate state for paths. */
126 /* SoA arrays for integrator state. */
130 /* Keep track of number of queued kernels. */
132 /* Shader sorting. */
138 /* Path split. */
141
142 /* Temporary buffer to get an array of queued path for a particular kernel. */
145
146 /* Temporary buffer for passing work tiles to kernel. */
148
149 /* Temporary buffer used by the copy_to_display() whenever graphics interoperability is not
150 * available. Is allocated on-demand. */
152
153 unique_ptr<DeviceGraphicsInterop> device_graphics_interop_;
154
155 /* Cached result of device->should_use_graphics_interop(). */
157 bool interop_use_ = false;
158
159 /* Number of partitions to sort state indices into prior to material sort. */
161
162 /* Maximum number of concurrent integrator states. */
164
165 /* Minimum number of paths which keeps the device bust. If the actual number of paths falls below
166 * this value more work will be scheduled. */
168
169 /* Maximum path index, effective number of paths used may be smaller than
170 * the size of the integrator_state_ buffer so can avoid iterating over the
171 * full buffer. */
173};
174
unsigned int uint
void reset()
clear internal cached data and reset random seed
Definition film.h:30
bool kernel_is_shadow_path(DeviceKernel kernel)
virtual bool copy_render_buffers_from_device() override
void compact_paths(const int num_active_paths, const int max_active_path_index, DeviceKernel terminated_paths_kernel, DeviceKernel compact_paths_kernel, DeviceKernel compact_kernel)
IntegratorStateGPU integrator_state_gpu_
device_vector< int > integrator_shader_sort_counter_
PathTraceWorkGPU(Device *device, Film *film, DeviceScene *device_scene, bool *cancel_requested_flag)
device_vector< int > num_queued_paths_
void compute_sorted_queued_paths(DeviceKernel queued_kernel, const int num_paths_limit)
bool copy_to_display_interop(PathTraceDisplay *display, PassMode pass_mode, int num_samples)
virtual void destroy_gpu_resources(PathTraceDisplay *display) override
virtual void alloc_work_memory() override
device_vector< int > integrator_next_main_path_index_
unique_ptr< DeviceQueue > queue_
int adaptive_sampling_convergence_check_count_active(float threshold, bool reset)
virtual bool zero_render_buffers() override
bool kernel_uses_sorting(DeviceKernel kernel)
virtual void init_execution() override
device_vector< int > integrator_shader_sort_prefix_sum_
device_vector< KernelWorkTile > work_tiles_
virtual void cryptomatte_postproces() override
device_vector< IntegratorQueueCounter > integrator_queue_counter_
bool enqueue_work_tiles(bool &finished)
device_vector< int > queued_paths_
void compact_main_paths(const int num_active_paths)
bool kernel_creates_ao_paths(DeviceKernel kernel)
virtual void render_samples(RenderStatistics &statistics, int start_sample, int samples_num, int sample_offset) override
unique_ptr< DeviceGraphicsInterop > device_graphics_interop_
device_vector< int > integrator_next_shadow_path_index_
void compute_queued_paths(DeviceKernel kernel, DeviceKernel queued_kernel)
DeviceKernel get_most_queued_kernel() const
device_vector< int > integrator_shader_raytrace_sort_counter_
bool kernel_creates_shadow_paths(DeviceKernel kernel)
void copy_to_display_naive(PathTraceDisplay *display, PassMode pass_mode, int num_samples)
device_vector< int > integrator_shader_sort_partition_key_offsets_
void get_render_tile_film_pixels(const PassAccessor::Destination &destination, PassMode pass_mode, int num_samples)
vector< unique_ptr< device_memory > > integrator_state_soa_
device_vector< int > integrator_shader_mnee_sort_counter_
WorkTileScheduler work_tile_scheduler_
virtual bool copy_render_buffers_to_device() override
virtual void copy_to_display(PathTraceDisplay *display, PassMode pass_mode, int num_samples) override
virtual int adaptive_sampling_converge_filter_count_active(float threshold, bool reset) override
int kernel_max_active_main_path_index(DeviceKernel kernel)
device_vector< half4 > display_rgba_half_
#define CCL_NAMESPACE_END
DeviceKernel
PassMode
Definition pass.h:20