Blender V4.3
integrator/tile.cpp
Go to the documentation of this file.
1/* SPDX-FileCopyrightText: 2011-2022 Blender Foundation
2 *
3 * SPDX-License-Identifier: Apache-2.0 */
4
5#include "integrator/tile.h"
6
7#include "util/log.h"
8#include "util/math.h"
9
10#include <ostream>
11
13
14std::ostream &operator<<(std::ostream &os, const TileSize &tile_size)
15{
16 os << "size: (" << tile_size.width << ", " << tile_size.height << ")";
17 os << ", num_samples: " << tile_size.num_samples;
18 return os;
19}
20
22{
23 if (is_power_of_two(x)) {
24 return x;
25 }
26
27 return prev_power_of_two(x);
28}
29
31{
32 if (is_power_of_two(x)) {
33 return x;
34 }
35
36 return next_power_of_two(x);
37}
38
40 const int2 &image_size,
41 const int num_samples,
42 const int max_num_path_states,
43 const float scrambling_distance)
44{
45 if (max_num_path_states == 1) {
46 /* Simple case: avoid any calculation, which could cause rounding issues. */
47 return TileSize(1, 1, 1);
48 }
49
50 const int64_t num_pixels = image_size.x * image_size.y;
51 const int64_t num_pixel_samples = num_pixels * num_samples;
52
53 if (max_num_path_states >= num_pixel_samples) {
54 /* Image fully fits into the state (could be border render, for example). */
55 return TileSize(image_size.x, image_size.y, num_samples);
56 }
57
58 /* The idea here is to keep number of samples per tile as much as possible to improve coherency
59 * across threads.
60 *
61 * Some general ideas:
62 * - Prefer smaller tiles with more samples, which improves spatial coherency of paths.
63 * - Keep values a power of two, for more integer fit into the maximum number of paths. */
64
65 TileSize tile_size;
66 const int num_path_states_per_sample = max_num_path_states / num_samples;
67 if (scrambling_distance < 0.9f && accel_rt) {
68 /* Prefer large tiles for scrambling distance, bounded by max num path states. */
69 tile_size.width = min(image_size.x, max_num_path_states);
70 tile_size.height = min(image_size.y, max(max_num_path_states / tile_size.width, 1));
71 }
72 else {
73 /* Calculate tile size as if it is the most possible one to fit an entire range of samples.
74 * The idea here is to keep tiles as small as possible, and keep device occupied by scheduling
75 * multiple tiles with the same coordinates rendering different samples. */
76
77 if (num_path_states_per_sample != 0) {
78 tile_size.width = round_down_to_power_of_two(lround(sqrt(num_path_states_per_sample)));
79 tile_size.height = tile_size.width;
80 }
81 else {
82 tile_size.width = tile_size.height = 1;
83 }
84 }
85
86 if (num_samples == 1) {
87 tile_size.num_samples = 1;
88 }
89 else {
90 /* Heuristic here is to have more uniform division of the sample range: for example prefer
91 * [32 <38 times>, 8] over [1024, 200]. This allows to greedily add more tiles early on. */
92 tile_size.num_samples = min(round_up_to_power_of_two(lround(sqrt(num_samples / 2))),
93 static_cast<uint>(num_samples));
94
95 const int tile_area = tile_size.width * tile_size.height;
96 tile_size.num_samples = min(tile_size.num_samples, max_num_path_states / tile_area);
97 }
98
99 DCHECK_GE(tile_size.width, 1);
100 DCHECK_GE(tile_size.height, 1);
101 DCHECK_GE(tile_size.num_samples, 1);
102 DCHECK_LE(tile_size.width * tile_size.height * tile_size.num_samples, max_num_path_states);
103
104 return tile_size;
105}
106
sqrt(x)+1/max(0
unsigned int uint
#define ccl_device_inline
#define CCL_NAMESPACE_END
ccl_device_inline uint round_down_to_power_of_two(uint x)
CCL_NAMESPACE_BEGIN std::ostream & operator<<(std::ostream &os, const TileSize &tile_size)
TileSize tile_calculate_best_size(const bool accel_rt, const int2 &image_size, const int num_samples, const int max_num_path_states, const float scrambling_distance)
ccl_device_inline uint round_up_to_power_of_two(uint x)
#define DCHECK_GE(a, b)
Definition log.h:57
#define DCHECK_LE(a, b)
Definition log.h:62
#define min(a, b)
Definition sort.c:32
__int64 int64_t
Definition stdint.h:89
int x
Definition types_int2.h:15
int y
Definition types_int2.h:15
ccl_device_inline uint next_power_of_two(uint x)
Definition util/math.h:1000
ccl_device_inline uint prev_power_of_two(uint x)
Definition util/math.h:1006
ccl_device_inline bool is_power_of_two(size_t x)
Definition util/types.h:68