Blender V4.3
work_tile_scheduler.cpp
Go to the documentation of this file.
1/* SPDX-FileCopyrightText: 2011-2022 Blender Foundation
2 *
3 * SPDX-License-Identifier: Apache-2.0 */
4
6
7#include "device/queue.h"
8#include "integrator/tile.h"
9#include "session/buffers.h"
10#include "util/atomic.h"
11#include "util/log.h"
12
14
16
18{
19 accelerated_rt_ = accelerated_rt;
20}
21
22void WorkTileScheduler::set_max_num_path_states(int max_num_path_states)
23{
24 max_num_path_states_ = max_num_path_states;
25}
26
27void WorkTileScheduler::reset(const BufferParams &buffer_params,
28 int sample_start,
29 int samples_num,
30 int sample_offset,
31 float scrambling_distance)
32{
33 /* Image buffer parameters. */
34 image_full_offset_px_.x = buffer_params.full_x;
35 image_full_offset_px_.y = buffer_params.full_y;
36
37 image_size_px_ = make_int2(buffer_params.width, buffer_params.height);
38 scrambling_distance_ = scrambling_distance;
39
40 offset_ = buffer_params.offset;
41 stride_ = buffer_params.stride;
42
43 /* Samples parameters. */
44 sample_start_ = sample_start;
45 samples_num_ = samples_num;
46 sample_offset_ = sample_offset;
47
48 /* Initialize new scheduling. */
50}
51
53{
56
57 const int num_path_states_in_tile = tile_size_.width * tile_size_.height *
59
60 if (num_path_states_in_tile == 0) {
61 VLOG_WORK << "Will not schedule any tiles: no work remained for the device";
62 num_tiles_x_ = 0;
63 num_tiles_y_ = 0;
65 }
66 else {
67 const int num_tiles = max_num_path_states_ / num_path_states_in_tile;
68 VLOG_WORK << "Will schedule " << num_tiles << " tiles of " << tile_size_;
69
70 /* The logging is based on multiple tiles scheduled, ignoring overhead of multi-tile
71 * scheduling and purely focusing on the number of used path states. */
72 VLOG_WORK << "Number of unused path states: "
73 << max_num_path_states_ - num_tiles * num_path_states_in_tile;
74
78 }
79
81
84}
85
86bool WorkTileScheduler::get_work(KernelWorkTile *work_tile_, const int max_work_size)
87{
88 /* Note that the `max_work_size` can be higher than the `max_num_path_states_`: this is because
89 * the path trace work can decide to use smaller tile sizes and greedily schedule multiple tiles,
90 * improving overall device occupancy.
91 * So the `max_num_path_states_` is a "scheduling unit", and the `max_work_size` is a "scheduling
92 * limit". */
93
95
96 const int work_index = next_work_index_++;
97 if (work_index >= total_work_size_) {
98 return false;
99 }
100
101 const int sample_range_index = work_index % num_tiles_per_sample_range_;
102 const int start_sample = sample_range_index * tile_size_.num_samples;
103 const int tile_index = work_index / num_tiles_per_sample_range_;
104 const int tile_y = tile_index / num_tiles_x_;
105 const int tile_x = tile_index - tile_y * num_tiles_x_;
106
107 KernelWorkTile work_tile;
108 work_tile.x = tile_x * tile_size_.width;
109 work_tile.y = tile_y * tile_size_.height;
110 work_tile.w = tile_size_.width;
111 work_tile.h = tile_size_.height;
112 work_tile.start_sample = sample_start_ + start_sample;
113 work_tile.num_samples = min(tile_size_.num_samples, samples_num_ - start_sample);
114 work_tile.sample_offset = sample_offset_;
115 work_tile.offset = offset_;
116 work_tile.stride = stride_;
117
118 work_tile.w = min(work_tile.w, image_size_px_.x - work_tile.x);
119 work_tile.h = min(work_tile.h, image_size_px_.y - work_tile.y);
120
121 work_tile.x += image_full_offset_px_.x;
122 work_tile.y += image_full_offset_px_.y;
123
124 const int tile_work_size = work_tile.w * work_tile.h * work_tile.num_samples;
125
126 DCHECK_GT(tile_work_size, 0);
127
128 if (max_work_size && tile_work_size > max_work_size) {
129 /* The work did not fit into the requested limit of the work size. Unschedule the tile,
130 * so it can be picked up again later. */
132 return false;
133 }
134
135 *work_tile_ = work_tile;
136
137 return true;
138}
139
NODE_DECLARE int width
Definition buffers.h:72
bool get_work(KernelWorkTile *work_tile, const int max_work_size=0)
void set_accelerated_rt(bool state)
void set_max_num_path_states(int max_num_path_states)
void reset(const BufferParams &buffer_params, int sample_start, int samples_num, int sample_offset, float scrambling_distance)
#define CCL_NAMESPACE_END
ccl_device_forceinline int2 make_int2(const int x, const int y)
TileSize tile_calculate_best_size(const bool accel_rt, const int2 &image_size, const int num_samples, const int max_num_path_states, const float scrambling_distance)
ccl_gpu_kernel_postfix ccl_global KernelWorkTile const int num_tiles
const int tile_index
#define VLOG_WORK
Definition log.h:75
#define DCHECK_GT(a, b)
Definition log.h:60
#define DCHECK_NE(a, b)
Definition log.h:58
#define min(a, b)
Definition sort.c:32
int x
Definition types_int2.h:15
int y
Definition types_int2.h:15
ccl_device_inline size_t divide_up(size_t x, size_t y)
Definition util/types.h:53