Blender V5.0
work_tile_scheduler.cpp
Go to the documentation of this file.
1/* SPDX-FileCopyrightText: 2011-2022 Blender Foundation
2 *
3 * SPDX-License-Identifier: Apache-2.0 */
4
6
7#include "device/queue.h"
8#include "integrator/tile.h"
9#include "session/buffers.h"
10#include "util/log.h"
11
13
15
17{
18 accelerated_rt_ = accelerated_rt;
19}
20
21void WorkTileScheduler::set_max_num_path_states(const int max_num_path_states)
22{
23 max_num_path_states_ = max_num_path_states;
24}
25
26void WorkTileScheduler::reset(const BufferParams &buffer_params,
27 const int sample_start,
28 const int samples_num,
29 const int sample_offset,
30 const float scrambling_distance)
31{
32 /* Image buffer parameters. */
33 image_full_offset_px_.x = buffer_params.full_x;
34 image_full_offset_px_.y = buffer_params.full_y;
35
36 image_size_px_ = make_int2(buffer_params.width, buffer_params.height);
37 scrambling_distance_ = scrambling_distance;
38
39 offset_ = buffer_params.offset;
40 stride_ = buffer_params.stride;
41
42 /* Samples parameters. */
43 sample_start_ = sample_start;
44 samples_num_ = samples_num;
45 sample_offset_ = sample_offset;
46
47 /* Initialize new scheduling. */
49}
50
52{
55
56 const int num_path_states_in_tile = tile_size_.width * tile_size_.height *
57 tile_size_.num_samples;
58
59 if (num_path_states_in_tile == 0) {
60 LOG_DEBUG << "Will not schedule any tiles: no work remained for the device";
61 num_tiles_x_ = 0;
62 num_tiles_y_ = 0;
64 }
65 else {
66 const int num_tiles = max_num_path_states_ / num_path_states_in_tile;
67 LOG_DEBUG << "Will schedule " << num_tiles << " tiles of " << tile_size_;
68
69 /* The logging is based on multiple tiles scheduled, ignoring overhead of multi-tile
70 * scheduling and purely focusing on the number of used path states. */
71 LOG_DEBUG << "Number of unused path states: "
72 << max_num_path_states_ - num_tiles * num_path_states_in_tile;
73
77 }
78
80
83}
84
85bool WorkTileScheduler::get_work(KernelWorkTile *work_tile_, const int max_work_size)
86{
87 /* Note that the `max_work_size` can be higher than the `max_num_path_states_`: this is because
88 * the path trace work can decide to use smaller tile sizes and greedily schedule multiple tiles,
89 * improving overall device occupancy.
90 * So the `max_num_path_states_` is a "scheduling unit", and the `max_work_size` is a "scheduling
91 * limit". */
92
94
95 const int work_index = next_work_index_++;
96 if (work_index >= total_work_size_) {
97 return false;
98 }
99
100 const int sample_range_index = work_index % num_tiles_per_sample_range_;
101 const int start_sample = sample_range_index * tile_size_.num_samples;
102 const int tile_index = work_index / num_tiles_per_sample_range_;
103 const int tile_y = tile_index / num_tiles_x_;
104 const int tile_x = tile_index - tile_y * num_tiles_x_;
105
106 KernelWorkTile work_tile;
107 work_tile.x = tile_x * tile_size_.width;
108 work_tile.y = tile_y * tile_size_.height;
109 work_tile.w = tile_size_.width;
110 work_tile.h = tile_size_.height;
111 work_tile.start_sample = sample_start_ + start_sample;
112 work_tile.num_samples = min(tile_size_.num_samples, samples_num_ - start_sample);
113 work_tile.sample_offset = sample_offset_;
114 work_tile.offset = offset_;
115 work_tile.stride = stride_;
116
117 work_tile.w = min(work_tile.w, image_size_px_.x - work_tile.x);
118 work_tile.h = min(work_tile.h, image_size_px_.y - work_tile.y);
119
120 work_tile.x += image_full_offset_px_.x;
121 work_tile.y += image_full_offset_px_.y;
122
123 const int tile_work_size = work_tile.w * work_tile.h * work_tile.num_samples;
124
125 DCHECK_GT(tile_work_size, 0);
126
127 if (max_work_size && tile_work_size > max_work_size) {
128 /* The work did not fit into the requested limit of the work size. Unschedule the tile,
129 * so it can be picked up again later. */
131 return false;
132 }
133
134 *work_tile_ = work_tile;
135
136 return true;
137}
138
NODE_DECLARE int width
Definition buffers.h:70
void set_accelerated_rt(bool accelerated_rt)
void reset(const BufferParams &buffer_params, const int sample_start, const int samples_num, const int sample_offset, float scrambling_distance)
void set_max_num_path_states(const int max_num_path_states)
bool get_work(KernelWorkTile *work_tile, const int max_work_size=0)
#define CCL_NAMESPACE_END
ccl_device_forceinline int2 make_int2(const int x, const int y)
TileSize tile_calculate_best_size(const bool accel_rt, const int2 &image_size, const int num_samples, const int max_num_path_states, const float scrambling_distance)
ccl_gpu_kernel_postfix ccl_global KernelWorkTile const int num_tiles
const int tile_index
#define LOG_DEBUG
Definition log.h:107
#define DCHECK_GT(a, b)
Definition log.h:145
#define DCHECK_NE(a, b)
Definition log.h:143
#define min(a, b)
Definition sort.cc:36
ccl_device_inline size_t divide_up(const size_t x, const size_t y)
Definition types_base.h:52