Blender V4.5
scheduler.cc
Go to the documentation of this file.
1/* SPDX-FileCopyrightText: 2023 Blender Authors
2 *
3 * SPDX-License-Identifier: GPL-2.0-or-later */
4
5#include <algorithm>
6
7#include "BLI_map.hh"
8#include "BLI_set.hh"
9#include "BLI_stack.hh"
10#include "BLI_vector.hh"
11
13
15
16#include "COM_context.hh"
17#include "COM_scheduler.hh"
18#include "COM_utilities.hh"
19
20namespace blender::compositor {
21
22using namespace nodes::derived_node_tree_types;
23
24/* Returns true if any of the node group nodes that make up this tree context are muted. */
25static bool is_tree_context_muted(const DTreeContext &tree_context)
26{
27 /* Root contexts are never muted. */
28 if (tree_context.is_root()) {
29 return false;
30 }
31
32 /* The node group that represents this context is muted. */
33 if (tree_context.parent_node()->is_muted()) {
34 return true;
35 }
36
37 /* Recursively check parent contexts up until the root context. */
38 return is_tree_context_muted(*tree_context.parent_context());
39}
40
41/* Add the active viewer node in the given tree context to the given stack. If viewer nodes are
42 * treated as composite outputs, this function will also add either the viewer or the composite
43 * node since composite nodes were skipped in add_output_nodes such that viewer nodes take
44 * precedence. */
45static bool add_viewer_nodes_in_context(const Context &context,
46 const DTreeContext *tree_context,
47 Stack<DNode> &node_stack)
48{
49 /* Do not add viewers that are inside muted contexts. */
50 if (is_tree_context_muted(*tree_context)) {
51 return false;
52 }
53
54 for (const bNode *node : tree_context->btree().nodes_by_type("CompositorNodeViewer")) {
55 if (node->flag & NODE_DO_OUTPUT && !node->is_muted()) {
56 node_stack.push(DNode(tree_context, node));
57 return true;
58 }
59 }
60
61 /* If we are not treating viewers as composite outputs, there is nothing else to do. Otherwise,
62 * the active composite node might be added. */
63 if (!context.treat_viewer_as_composite_output()) {
64 return false;
65 }
66
67 /* No active viewers exist in this tree context, add the active Composite node if one exist. */
68 for (const bNode *node : tree_context->btree().nodes_by_type("CompositorNodeComposite")) {
69 if (node->flag & NODE_DO_OUTPUT && !node->is_muted()) {
70 node_stack.push(DNode(tree_context, node));
71 return true;
72 }
73 }
74
75 return false;
76}
77
78/* Add all File Output nodes inside the given tree_context recursively to the node stack. */
79static void add_file_output_nodes(const DTreeContext &tree_context, Stack<DNode> &node_stack)
80{
81 for (const bNode *node : tree_context.btree().nodes_by_type("CompositorNodeOutputFile")) {
82 if (node->is_muted()) {
83 continue;
84 }
85
86 node_stack.push(DNode(&tree_context, node));
87 }
88
89 for (const bNode *node : tree_context.btree().group_nodes()) {
90 if (node->is_muted()) {
91 continue;
92 }
93
94 const bNodeTree *group_tree = reinterpret_cast<const bNodeTree *>(node->id);
95 if (!group_tree) {
96 continue;
97 }
98
99 add_file_output_nodes(*tree_context.child_context(*node), node_stack);
100 }
101}
102
103/* Add the output nodes whose result should be computed to the given stack. This includes File
104 * Output, Composite, and Viewer nodes. Viewer nodes are a special case, as only the nodes that
105 * satisfies the requirements in the add_viewer_nodes_in_context function are added. First, the
106 * active context is searched for viewer nodes, if non were found, the root context is searched.
107 * For more information on what contexts mean here, see the DerivedNodeTree::active_context()
108 * function. */
109static void add_output_nodes(const Context &context,
110 const DerivedNodeTree &tree,
111 Stack<DNode> &node_stack)
112{
113 const DTreeContext &root_context = tree.root_context();
114
115 /* Only add File Output nodes if the context supports them. */
116 if (bool(context.needed_outputs() & OutputTypes::FileOutput)) {
117 add_file_output_nodes(root_context, node_stack);
118 }
119
120 /* Add the active composite node in the root tree if needed, but only if we are not treating
121 * viewer outputs as composite ones. That's because in cases where viewer nodes will be treated
122 * as composite outputs, viewer nodes will take precedence, so this is handled as a special case
123 * in the add_viewer_nodes_in_context function instead and no need to add it here. */
124 if (bool(context.needed_outputs() & OutputTypes::Composite)) {
125 if (!context.treat_viewer_as_composite_output()) {
126 for (const bNode *node : root_context.btree().nodes_by_type("CompositorNodeComposite")) {
127 if (node->flag & NODE_DO_OUTPUT && !node->is_muted()) {
128 node_stack.push(DNode(&root_context, node));
129 break;
130 }
131 }
132 }
133 }
134
135 if (!bool(context.needed_outputs() & OutputTypes::Viewer)) {
136 return;
137 }
138
139 const DTreeContext &active_context = tree.active_context();
140 const bool viewer_was_added = add_viewer_nodes_in_context(context, &active_context, node_stack);
141
142 /* An active viewer was added, no need to search further. */
143 if (viewer_was_added) {
144 return;
145 }
146
147 /* If the active context is the root one and no viewer nodes were found, we consider this node
148 * tree to have no viewer nodes, even if one of the non-active descendants have viewer nodes. */
149 if (active_context.is_root()) {
150 return;
151 }
152
153 /* The active context doesn't have a viewer node, search in the root context as a fallback. */
154 add_viewer_nodes_in_context(context, &tree.root_context(), node_stack);
155}
156
157/* A type representing a mapping that associates each node with a heuristic estimation of the
158 * number of intermediate buffers needed to compute it and all of its dependencies. See the
159 * compute_number_of_needed_buffers function for more information. */
161
162/* Compute a heuristic estimation of the number of intermediate buffers needed to compute each node
163 * and all of its dependencies for all nodes that the given node depends on. The output is a map
164 * that maps each node with the number of intermediate buffers needed to compute it and all of its
165 * dependencies.
166 *
167 * Consider a node that takes n number of buffers as an input from a number of node dependencies,
168 * which we shall call the input nodes. The node also computes and outputs m number of buffers.
169 * In order for the node to compute its output, a number of intermediate buffers will be needed.
170 * Since the node takes n buffers and outputs m buffers, then the number of buffers directly
171 * needed by the node is (n + m). But each of the input buffers are computed by a node that, in
172 * turn, needs a number of buffers to compute its output. So the total number of buffers needed
173 * to compute the output of the node is max(n + m, d) where d is the number of buffers needed by
174 * the input node that needs the largest number of buffers. We only consider the input node that
175 * needs the largest number of buffers, because those buffers can be reused by any input node
176 * that needs a lesser number of buffers.
177 *
178 * Pixel nodes, however, are a special case because links between two pixel nodes inside the same
179 * pixel operation don't pass a buffer, but a single value in the pixel processor. So for pixel
180 * nodes, only inputs and outputs linked to nodes that are not pixel nodes should be considered.
181 * Note that this might not actually be true, because the compiler may decide to split a pixel
182 * operation into multiples ones that will pass buffers, but this is not something that can be
183 * known at scheduling-time. See the discussion in COM_compile_state.hh, COM_evaluator.hh, and
184 * COM_shader_operation.hh for more information. In the node tree shown below, node 4 will have
185 * exactly the same number of needed buffers by node 3, because its inputs and outputs are all
186 * internally linked in the pixel operation.
187 *
188 * Pixel Operation
189 * +------------------------------------------------------+
190 * .------------. | .------------. .------------. .------------. | .------------.
191 * | Node 1 | | | Node 3 | | Node 4 | | Node 5 | | | Node 6 |
192 * | |----|--| |--| |------| |--|--| |
193 * | | .-|--| | | | .---| | | | |
194 * '------------' | | '------------' '------------' | '------------' | '------------'
195 * | +----------------------------------|-------------------+
196 * .------------. | |
197 * | Node 2 | | |
198 * | |--'------------------------------------'
199 * | |
200 * '------------'
201 *
202 * Note that the computed output is not guaranteed to be accurate, and will not be in most cases.
203 * The computation is merely a heuristic estimation that works well in most cases. This is due to a
204 * number of reasons:
205 * - The node tree is actually a graph that allows output sharing, which is not something that was
206 * taken into consideration in this implementation because it is difficult to correctly consider.
207 * - Each node may allocate any number of internal buffers, which is not taken into account in this
208 * implementation because it rarely affects the output and is done by very few nodes.
209 * - The compiler may decide to compiler the schedule differently depending on runtime information
210 * which we can merely speculate at scheduling-time as described above. */
212{
213 NeededBuffers needed_buffers;
214
215 /* A stack of nodes used to traverse the node tree starting from the output nodes. */
216 Stack<DNode> node_stack = output_nodes;
217
218 /* Traverse the node tree in a post order depth first manner and compute the number of needed
219 * buffers for each node. Post order traversal guarantee that all the node dependencies of each
220 * node are computed before it. This is done by pushing all the uncomputed node dependencies to
221 * the node stack first and only popping and computing the node when all its node dependencies
222 * were computed. */
223 while (!node_stack.is_empty()) {
224 /* Do not pop the node immediately, as it may turn out that we can't compute its number of
225 * needed buffers just yet because its dependencies weren't computed, it will be popped later
226 * when needed. */
227 DNode &node = node_stack.peek();
228
229 /* Go over the node dependencies connected to the inputs of the node and push them to the node
230 * stack if they were not computed already. */
231 Set<DNode> pushed_nodes;
232 for (const bNodeSocket *input : node->input_sockets()) {
233 const DInputSocket dinput{node.context(), input};
234
236 continue;
237 }
238
239 /* Get the output linked to the input. If it is null, that means the input is unlinked and
240 * has no dependency node. */
241 const DOutputSocket doutput = get_output_linked_to_input(dinput);
242 if (!doutput) {
243 continue;
244 }
245
246 /* The node dependency was already computed or pushed before, so skip it. */
247 if (needed_buffers.contains(doutput.node()) || pushed_nodes.contains(doutput.node())) {
248 continue;
249 }
250
251 /* The output node needs to be computed, push the node dependency to the node stack and
252 * indicate that it was pushed. */
253 node_stack.push(doutput.node());
254 pushed_nodes.add_new(doutput.node());
255 }
256
257 /* If any of the node dependencies were pushed, that means that not all of them were computed
258 * and consequently we can't compute the number of needed buffers for this node just yet. */
259 if (!pushed_nodes.is_empty()) {
260 continue;
261 }
262
263 /* We don't need to store the result of the pop because we already peeked at it before. */
264 node_stack.pop();
265
266 /* Compute the number of buffers that the node takes as an input as well as the number of
267 * buffers needed to compute the most demanding of the node dependencies. */
268 int number_of_input_buffers = 0;
269 int buffers_needed_by_dependencies = 0;
270 for (const bNodeSocket *input : node->input_sockets()) {
271 const DInputSocket dinput{node.context(), input};
272
274 continue;
275 }
276
277 /* Get the output linked to the input. If it is null, that means the input is unlinked.
278 * Unlinked inputs do not take a buffer, so skip those inputs. */
279 const DOutputSocket doutput = get_output_linked_to_input(dinput);
280 if (!doutput) {
281 continue;
282 }
283
284 /* Since this input is linked, if the link is not between two pixel nodes, it means that the
285 * node takes a buffer through this input and so we increment the number of input buffers. */
286 if (!is_pixel_node(node) || !is_pixel_node(doutput.node())) {
287 number_of_input_buffers++;
288 }
289
290 /* If the number of buffers needed by the node dependency is more than the total number of
291 * buffers needed by the dependencies, then update the latter to be the former. This is
292 * computing the "d" in the aforementioned equation "max(n + m, d)". */
293 const int buffers_needed_by_dependency = needed_buffers.lookup(doutput.node());
294 buffers_needed_by_dependencies = std::max(buffers_needed_by_dependency,
295 buffers_needed_by_dependencies);
296 }
297
298 /* Compute the number of buffers that will be computed/output by this node. */
299 int number_of_output_buffers = 0;
300 for (const bNodeSocket *output : node->output_sockets()) {
301 const DOutputSocket doutput{node.context(), output};
302
304 continue;
305 }
306
307 /* The output is not linked, it outputs no buffer. */
308 if (!output->is_logically_linked()) {
309 continue;
310 }
311
312 /* If any of the links is not between two pixel nodes, it means that the node outputs
313 * a buffer through this output and so we increment the number of output buffers. */
315 number_of_output_buffers++;
316 }
317 }
318
319 /* Compute the heuristic estimation of the number of needed intermediate buffers to compute
320 * this node and all of its dependencies. This is computing the aforementioned equation
321 * "max(n + m, d)". */
322 const int total_buffers = std::max(number_of_input_buffers + number_of_output_buffers,
323 buffers_needed_by_dependencies);
324 needed_buffers.add(node, total_buffers);
325 }
326
327 return needed_buffers;
328}
329
330/* There are multiple different possible orders of evaluating a node graph, each of which needs
331 * to allocate a number of intermediate buffers to store its intermediate results. It follows
332 * that we need to find the evaluation order which uses the least amount of intermediate buffers.
333 * For instance, consider a node that takes two input buffers A and B. Each of those buffers is
334 * computed through a number of nodes constituting a sub-graph whose root is the node that
335 * outputs that buffer. Suppose the number of intermediate buffers needed to compute A and B are
336 * N(A) and N(B) respectively and N(A) > N(B). Then evaluating the sub-graph computing A would be
337 * a better option than that of B, because had B was computed first, its outputs will need to be
338 * stored in extra buffers in addition to the buffers needed by A. The number of buffers needed by
339 * each node is estimated as described in the compute_number_of_needed_buffers function.
340 *
341 * This is a heuristic generalization of the Sethi-Ullman algorithm, a generalization that
342 * doesn't always guarantee an optimal evaluation order, as the optimal evaluation order is very
343 * difficult to compute, however, this method works well in most cases. Moreover it assumes that
344 * all buffers will have roughly the same size, which may not always be the case. */
346{
347 Schedule schedule;
348
349 /* A stack of nodes used to traverse the node tree starting from the output nodes. */
350 Stack<DNode> node_stack;
351
352 /* Add the output nodes whose result should be computed to the stack. */
353 add_output_nodes(context, tree, node_stack);
354
355 /* No output nodes, the node tree has no effect, return an empty schedule. */
356 if (node_stack.is_empty()) {
357 return schedule;
358 }
359
360 /* Compute the number of buffers needed by each node connected to the outputs. */
361 const NeededBuffers needed_buffers = compute_number_of_needed_buffers(node_stack);
362
363 /* Traverse the node tree in a post order depth first manner, scheduling the nodes in an order
364 * informed by the number of buffers needed by each node. Post order traversal guarantee that all
365 * the node dependencies of each node are scheduled before it. This is done by pushing all the
366 * unscheduled node dependencies to the node stack first and only popping and scheduling the node
367 * when all its node dependencies were scheduled. */
368 while (!node_stack.is_empty()) {
369 /* Do not pop the node immediately, as it may turn out that we can't schedule it just yet
370 * because its dependencies weren't scheduled, it will be popped later when needed. */
371 DNode &node = node_stack.peek();
372
373 /* Compute the nodes directly connected to the node inputs sorted by their needed buffers such
374 * that the node with the lowest number of needed buffers comes first. Note that we actually
375 * want the node with the highest number of needed buffers to be schedule first, but since
376 * those are pushed to the traversal stack, we need to push them in reverse order. */
377 Vector<DNode> sorted_dependency_nodes;
378 for (const bNodeSocket *input : node->input_sockets()) {
379 const DInputSocket dinput{node.context(), input};
380
382 continue;
383 }
384
385 /* Get the output linked to the input. If it is null, that means the input is unlinked and
386 * has no dependency node, so skip it. */
387 const DOutputSocket doutput = get_output_linked_to_input(dinput);
388 if (!doutput) {
389 continue;
390 }
391
392 /* The dependency node was added before, so skip it. The number of dependency nodes is very
393 * small, typically less than 3, so a linear search is okay. */
394 if (sorted_dependency_nodes.contains(doutput.node())) {
395 continue;
396 }
397
398 /* The dependency node was already schedule, so skip it. */
399 if (schedule.contains(doutput.node())) {
400 continue;
401 }
402
403 /* Sort in ascending order on insertion, the number of dependency nodes is very small,
404 * typically less than 3, so insertion sort is okay. */
405 int insertion_position = 0;
406 for (int i = 0; i < sorted_dependency_nodes.size(); i++) {
407 if (needed_buffers.lookup(doutput.node()) >
408 needed_buffers.lookup(sorted_dependency_nodes[i]))
409 {
410 insertion_position++;
411 }
412 else {
413 break;
414 }
415 }
416 sorted_dependency_nodes.insert(insertion_position, doutput.node());
417 }
418
419 /* Push the sorted dependency nodes to the node stack in order. */
420 for (const DNode &dependency_node : sorted_dependency_nodes) {
421 node_stack.push(dependency_node);
422 }
423
424 /* If there are no sorted dependency nodes, that means they were all already scheduled or that
425 * none exists in the first place, so we can pop and schedule the node now. */
426 if (sorted_dependency_nodes.is_empty()) {
427 /* The node might have already been scheduled, so we don't use add_new here and simply don't
428 * add it if it was already scheduled. */
429 schedule.add(node_stack.pop());
430 }
431 }
432
433 return schedule;
434}
435
436} // namespace blender::compositor
@ NODE_DO_OUTPUT
bool add(const Key &key, const Value &value)
Definition BLI_map.hh:295
const Value & lookup(const Key &key) const
Definition BLI_map.hh:545
bool contains(const Key &key) const
Definition BLI_map.hh:353
bool contains(const Key &key) const
Definition BLI_set.hh:310
bool is_empty() const
Definition BLI_set.hh:595
void add_new(const Key &key)
Definition BLI_set.hh:233
bool is_empty() const
Definition BLI_stack.hh:308
void push(const T &value)
Definition BLI_stack.hh:213
bool add(const Key &key)
bool contains(const Key &key) const
int64_t size() const
bool contains(const T &value) const
void insert(const int64_t insert_index, const T &value)
bool is_empty() const
const DTreeContext * child_context(const bNode &node) const
const DTreeContext * parent_context() const
const bNodeTree & btree() const
KDTree_3d * tree
#define input
#define output
static bool add_viewer_nodes_in_context(const Context &context, const DTreeContext *tree_context, Stack< DNode > &node_stack)
Definition scheduler.cc:45
bool is_output_linked_to_node_conditioned(DOutputSocket output, FunctionRef< bool(DNode)> condition)
Definition utilities.cc:88
VectorSet< DNode > Schedule
static void add_output_nodes(const Context &context, const DerivedNodeTree &tree, Stack< DNode > &node_stack)
Definition scheduler.cc:109
static bool is_tree_context_muted(const DTreeContext &tree_context)
Definition scheduler.cc:25
Schedule compute_schedule(const Context &context, const DerivedNodeTree &tree)
Definition scheduler.cc:345
DOutputSocket get_output_linked_to_input(DInputSocket input)
Definition utilities.cc:50
static NeededBuffers compute_number_of_needed_buffers(Stack< DNode > &output_nodes)
Definition scheduler.cc:211
Map< DNode, int > NeededBuffers
Definition scheduler.cc:160
bool is_pixel_node(DNode node)
Definition utilities.cc:118
bool is_socket_available(const bNodeSocket *socket)
Definition utilities.cc:25
static void add_file_output_nodes(const DTreeContext &tree_context, Stack< DNode > &node_stack)
Definition scheduler.cc:79
i
Definition text_draw.cc:230