Blender V4.3
render_scheduler.cpp
Go to the documentation of this file.
1/* SPDX-FileCopyrightText: 2011-2022 Blender Foundation
2 *
3 * SPDX-License-Identifier: Apache-2.0 */
4
6
7#include "session/session.h"
8#include "session/tile.h"
9#include "util/log.h"
10#include "util/math.h"
11#include "util/time.h"
12
14
15/* --------------------------------------------------------------------
16 * Render scheduler.
17 */
18
20 : headless_(params.headless),
21 background_(params.background),
22 pixel_size_(params.pixel_size),
23 tile_manager_(tile_manager),
24 default_start_resolution_divider_(params.use_resolution_divider ? pixel_size_ * 8 : 0)
25{
27}
28
29void RenderScheduler::set_need_schedule_cryptomatte(bool need_schedule_cryptomatte)
30{
31 need_schedule_cryptomatte_ = need_schedule_cryptomatte;
32}
33
34void RenderScheduler::set_need_schedule_rebalance(bool need_schedule_rebalance)
35{
36 need_schedule_rebalance_works_ = need_schedule_rebalance;
37}
38
40{
41 return background_;
42}
43
48
53
55{
56 limit_samples_per_update_ = limit_samples;
57}
58
60{
61 adaptive_sampling_ = adaptive_sampling;
62}
63
68
70{
71 start_sample_ = start_sample;
72}
73
75{
76 return start_sample_;
77}
78
80{
81 num_samples_ = num_samples;
82}
83
85{
86 return num_samples_;
87}
88
90{
91 sample_offset_ = sample_offset;
92}
93
98
99void RenderScheduler::set_time_limit(double time_limit)
100{
101 time_limit_ = time_limit;
102}
103
105{
106 return time_limit_;
107}
108
115
117{
118 return state_.num_rendered_samples;
119}
120
121void RenderScheduler::reset(const BufferParams &buffer_params, int num_samples, int sample_offset)
122{
123 buffer_params_ = buffer_params;
124
126
127 set_num_samples(num_samples);
128 set_start_sample(sample_offset);
129 set_sample_offset(sample_offset);
130
131 /* In background mode never do lower resolution render preview, as it is not really supported
132 * by the software. */
134 state_.resolution_divider = 1;
135 }
136 else {
137 state_.user_is_navigating = true;
138 state_.resolution_divider = start_resolution_divider_;
139 }
140
141 state_.num_rendered_samples = 0;
142 state_.last_display_update_time = 0.0;
143 state_.last_display_update_sample = -1;
144
145 state_.last_rebalance_time = 0.0;
146 state_.num_rebalance_requested = 0;
147 state_.num_rebalance_changes = 0;
148 state_.last_rebalance_changed = false;
149 state_.need_rebalance_at_next_work = false;
150
151 /* TODO(sergey): Choose better initial value. */
152 /* NOTE: The adaptive sampling settings might not be available here yet. */
153 state_.adaptive_sampling_threshold = 0.4f;
154
155 state_.last_work_tile_was_denoised = false;
156 state_.tile_result_was_written = false;
157 state_.postprocess_work_scheduled = false;
158 state_.full_frame_work_scheduled = false;
159 state_.full_frame_was_written = false;
160
161 state_.path_trace_finished = false;
162
163 state_.start_render_time = 0.0;
164 state_.end_render_time = 0.0;
165 state_.time_limit_reached = false;
166
167 state_.occupancy_num_samples = 0;
168 state_.occupancy = 1.0f;
169
170 first_render_time_.path_trace_per_sample = 0.0;
171 first_render_time_.denoise_time = 0.0;
172 first_render_time_.display_update_time = 0.0;
173
179}
180
185
187{
188 /* Move to the next resolution divider. Assume adaptive filtering is not needed during
189 * navigation. */
190 if (state_.resolution_divider != pixel_size_) {
191 return false;
192 }
193
194 if (render_work_reschedule_on_idle(render_work)) {
195 return true;
196 }
197
198 state_.path_trace_finished = true;
199
200 bool denoiser_delayed, denoiser_ready_to_display;
201 render_work.tile.denoise = work_need_denoise(denoiser_delayed, denoiser_ready_to_display);
202
203 render_work.display.update = work_need_update_display(denoiser_delayed);
204 render_work.display.use_denoised_result = denoiser_ready_to_display;
205
206 return false;
207}
208
210{
212 return false;
213 }
214
215 /* Move to the next resolution divider. Assume adaptive filtering is not needed during
216 * navigation. */
217 if (state_.resolution_divider != pixel_size_) {
218 return false;
219 }
220
222 if (state_.adaptive_sampling_threshold > adaptive_sampling_.threshold) {
223 state_.adaptive_sampling_threshold = max(state_.adaptive_sampling_threshold / 2,
225
226 render_work.adaptive_sampling.threshold = state_.adaptive_sampling_threshold;
227 render_work.adaptive_sampling.reset = true;
228
229 return true;
230 }
231 }
232
233 return false;
234}
235
237{
238 VLOG_WORK << "Schedule work for cancel.";
239
240 /* Un-schedule samples: they will not be rendered and should not be counted. */
241 state_.num_rendered_samples -= render_work.path_trace.num_samples;
242
243 const bool has_rendered_samples = get_num_rendered_samples() != 0;
244
245 /* Reset all fields of the previous work, canceling things like adaptive sampling filtering and
246 * denoising.
247 * However, need to preserve write requests, since those will not be possible to recover and
248 * writes are only to happen once. */
249 const bool tile_write = render_work.tile.write;
250 const bool full_write = render_work.full.write;
251
252 render_work = RenderWork();
253
254 render_work.tile.write = tile_write;
255 render_work.full.write = full_write;
256
257 /* Do not write tile if it has zero samples in it, treat it similarly to all other tiles which
258 * got canceled. */
259 if (!state_.tile_result_was_written && has_rendered_samples) {
260 render_work.tile.write = true;
261 }
262
263 if (!state_.full_frame_was_written) {
264 render_work.full.write = true;
265 }
266
267 /* Update current tile, but only if any sample was rendered.
268 * Allows to have latest state of tile visible while full buffer is being processed.
269 *
270 * Note that if there are no samples in the current tile its render buffer might have pixels
271 * remained from previous state.
272 *
273 * If the full result was written, then there is no way any updates were made to the render
274 * buffers. And the buffers might have been freed from the device, so display update is not
275 * possible. */
276 if (has_rendered_samples && !state_.full_frame_was_written) {
277 render_work.display.update = true;
278 }
279}
280
282{
283 if (state_.resolution_divider != pixel_size_) {
284 return false;
285 }
286
287 if (state_.path_trace_finished || state_.time_limit_reached) {
288 return true;
289 }
290
292}
293
295{
297
298 const double time_now = time_dt();
299
300 if (done()) {
301 RenderWork render_work;
302 render_work.resolution_divider = state_.resolution_divider;
303
304 if (!set_postprocess_render_work(&render_work)) {
305 set_full_frame_render_work(&render_work);
306 }
307
308 if (!render_work) {
309 state_.end_render_time = time_now;
310 }
311
312 update_state_for_render_work(render_work);
313
314 return render_work;
315 }
316
317 RenderWork render_work;
318
319 if (state_.resolution_divider != pixel_size_) {
320 if (state_.user_is_navigating) {
321 /* Don't progress the resolution divider as the user is currently navigating in the scene. */
322 state_.user_is_navigating = false;
323 }
324 else {
325 /* If the resolution divider is greater than or equal to default_start_resolution_divider_,
326 * drop the resolution divider down to 4. This is so users with slow hardware and thus high
327 * resolution dividers (E.G. 16), get an update to let them know something is happening
328 * rather than having to wait for the full 1:1 render to show up. */
330 (4 * pixel_size_) :
331 1;
332 }
333
334 state_.resolution_divider = max(state_.resolution_divider, pixel_size_);
335 state_.num_rendered_samples = 0;
336 state_.last_display_update_sample = -1;
337 }
338
339 render_work.resolution_divider = state_.resolution_divider;
340
344
345 render_work.init_render_buffers = (render_work.path_trace.start_sample == get_start_sample());
346
347 /* NOTE: Rebalance scheduler requires current number of samples to not be advanced forward. */
348 render_work.rebalance = work_need_rebalance();
349
350 /* NOTE: Advance number of samples now, so that filter and denoising check can see that all the
351 * samples are rendered. */
352 state_.num_rendered_samples += render_work.path_trace.num_samples;
353
356 render_work.adaptive_sampling.reset = false;
357
358 bool denoiser_delayed, denoiser_ready_to_display;
359 render_work.tile.denoise = work_need_denoise(denoiser_delayed, denoiser_ready_to_display);
360
361 render_work.tile.write = done();
362
363 render_work.display.update = work_need_update_display(denoiser_delayed);
364 render_work.display.use_denoised_result = denoiser_ready_to_display;
365
366 if (done()) {
367 set_postprocess_render_work(&render_work);
368 }
369
370 update_state_for_render_work(render_work);
371
372 return render_work;
373}
374
376{
377 const double time_now = time_dt();
378
379 if (render_work.rebalance) {
380 state_.last_rebalance_time = time_now;
381 ++state_.num_rebalance_requested;
382 }
383
384 /* A fallback display update time, for the case there is an error of display update, or when
385 * there is no display at all. */
386 if (render_work.display.update) {
387 state_.last_display_update_time = time_now;
388 state_.last_display_update_sample = state_.num_rendered_samples;
389 }
390
391 state_.last_work_tile_was_denoised = render_work.tile.denoise;
392 state_.tile_result_was_written |= render_work.tile.write;
393 state_.full_frame_was_written |= render_work.full.write;
394}
395
397{
398 if (state_.postprocess_work_scheduled) {
399 return false;
400 }
401 state_.postprocess_work_scheduled = true;
402
403 bool any_scheduled = false;
404
406 render_work->cryptomatte.postprocess = true;
407 any_scheduled = true;
408 }
409
410 if (denoiser_params_.use && !state_.last_work_tile_was_denoised) {
412 any_scheduled = true;
413 }
414
415 if (!state_.tile_result_was_written) {
416 render_work->tile.write = true;
417 any_scheduled = true;
418 }
419
420 if (any_scheduled) {
421 render_work->display.update = true;
422 }
423
424 return any_scheduled;
425}
426
428{
429 if (state_.full_frame_work_scheduled) {
430 return;
431 }
432
434 /* There is only single tile, so all work has been performed already. */
435 return;
436 }
437
438 if (!tile_manager_.done()) {
439 /* There are still tiles to be rendered. */
440 return;
441 }
442
443 if (state_.full_frame_was_written) {
444 return;
445 }
446
447 state_.full_frame_work_scheduled = true;
448
449 render_work->full.write = true;
450}
451
452/* Knowing time which it took to complete a task at the current resolution divider approximate how
453 * long it would have taken to complete it at a final resolution. */
454static double approximate_final_time(const RenderWork &render_work, double time)
455{
456 if (render_work.resolution_divider == 1) {
457 return time;
458 }
459
460 const double resolution_divider_sq = render_work.resolution_divider *
461 render_work.resolution_divider;
462 return time * resolution_divider_sq;
463}
464
466{
467 /* Start counting render time when rendering samples at their final resolution.
468 *
469 * NOTE: The work might have the path trace part be all zero: this happens when a post-processing
470 * work is scheduled after the path tracing. Checking for just a start sample doesn't work here
471 * because it might be wrongly 0. Check for whether path tracing is actually happening as it is
472 * expected to happen in the first work. */
473 if (render_work.resolution_divider == pixel_size_ && render_work.path_trace.num_samples != 0 &&
475 {
476 state_.start_render_time = time_dt();
477 }
478}
479
481 double time,
482 bool is_cancelled)
483{
485
486 if (is_cancelled) {
487 return;
488 }
489
490 const double final_time_approx = approximate_final_time(render_work, time);
491
493 first_render_time_.path_trace_per_sample = final_time_approx /
494 render_work.path_trace.num_samples;
495 }
496
497 if (work_report_reset_average(render_work)) {
499 }
500
501 path_trace_time_.add_average(final_time_approx, render_work.path_trace.num_samples);
502
503 VLOG_WORK << "Average path tracing time: " << path_trace_time_.get_average() << " seconds.";
504}
505
506void RenderScheduler::report_path_trace_occupancy(const RenderWork &render_work, float occupancy)
507{
508 state_.occupancy_num_samples = render_work.path_trace.num_samples;
509 state_.occupancy = occupancy;
510 VLOG_WORK << "Measured path tracing occupancy: " << occupancy;
511}
512
514 double time,
515 bool is_cancelled)
516{
518
519 if (is_cancelled) {
520 return;
521 }
522
523 const double final_time_approx = approximate_final_time(render_work, time);
524
525 if (work_report_reset_average(render_work)) {
527 }
528
529 adaptive_filter_time_.add_average(final_time_approx, render_work.path_trace.num_samples);
530
531 VLOG_WORK << "Average adaptive sampling filter time: " << adaptive_filter_time_.get_average()
532 << " seconds.";
533}
534
535void RenderScheduler::report_denoise_time(const RenderWork &render_work, double time)
536{
538
539 const double final_time_approx = approximate_final_time(render_work, time);
540
542 first_render_time_.denoise_time = final_time_approx;
543 }
544
545 if (work_report_reset_average(render_work)) {
547 }
548
549 denoise_time_.add_average(final_time_approx);
550
551 VLOG_WORK << "Average denoising time: " << denoise_time_.get_average() << " seconds.";
552}
553
554void RenderScheduler::report_display_update_time(const RenderWork &render_work, double time)
555{
557
558 const double final_time_approx = approximate_final_time(render_work, time);
559
561 first_render_time_.display_update_time = final_time_approx;
562 }
563
564 if (work_report_reset_average(render_work)) {
566 }
567
568 display_update_time_.add_average(final_time_approx);
569
570 VLOG_WORK << "Average display update time: " << display_update_time_.get_average()
571 << " seconds.";
572
573 /* Move the display update moment further in time, so that logic which checks when last update
574 * did happen have more reliable point in time (without path tracing and denoising parts of the
575 * render work). */
576 state_.last_display_update_time = time_dt();
577}
578
580 double time,
581 bool balance_changed)
582{
584
585 if (work_report_reset_average(render_work)) {
587 }
588
590
591 if (balance_changed) {
592 ++state_.num_rebalance_changes;
593 }
594
595 state_.last_rebalance_changed = balance_changed;
596
597 VLOG_WORK << "Average rebalance time: " << rebalance_time_.get_average() << " seconds.";
598}
599
601{
602 const double render_wall_time = state_.end_render_time - state_.start_render_time;
604
605 string result = "\nRender Scheduler Summary\n\n";
606
607 {
608 string mode;
609 if (headless_) {
610 mode = "Headless";
611 }
612 else if (background_) {
613 mode = "Background";
614 }
615 else {
616 mode = "Interactive";
617 }
618 result += "Mode: " + mode + "\n";
619 }
620
621 result += "Resolution: " + to_string(buffer_params_.width) + "x" +
623
624 result += "\nAdaptive sampling:\n";
625 result += " Use: " + string_from_bool(adaptive_sampling_.use) + "\n";
627 result += " Step: " + to_string(adaptive_sampling_.adaptive_step) + "\n";
628 result += " Min Samples: " + to_string(adaptive_sampling_.min_samples) + "\n";
629 result += " Threshold: " + to_string(adaptive_sampling_.threshold) + "\n";
630 }
631
632 result += "\nDenoiser:\n";
633 result += " Use: " + string_from_bool(denoiser_params_.use) + "\n";
634 if (denoiser_params_.use) {
635 result += " Type: " + string(denoiserTypeToHumanReadable(denoiser_params_.type)) + "\n";
636 result += " Start Sample: " + to_string(denoiser_params_.start_sample) + "\n";
637
638 string passes = "Color";
640 passes += ", Albedo";
641 }
643 passes += ", Normal";
644 }
645
646 result += " Passes: " + passes + "\n";
647 }
648
649 if (state_.num_rebalance_requested) {
650 result += "\nRebalancer:\n";
651 result += " Number of requested rebalances: " + to_string(state_.num_rebalance_requested) +
652 "\n";
653 result += " Number of performed rebalances: " + to_string(state_.num_rebalance_changes) +
654 "\n";
655 }
656
657 result += "\nTime (in seconds):\n";
658 result += string_printf(" %20s %20s %20s\n", "", "Wall", "Average");
659 result += string_printf(" %20s %20f %20f\n",
660 "Path Tracing",
663
665 result += string_printf(" %20s %20f %20f\n",
666 "Adaptive Filter",
669 }
670
671 if (denoiser_params_.use) {
672 result += string_printf(
673 " %20s %20f %20f\n", "Denoiser", denoise_time_.get_wall(), denoise_time_.get_average());
674 }
675
676 result += string_printf(" %20s %20f %20f\n",
677 "Display Update",
680
681 if (state_.num_rebalance_requested) {
682 result += string_printf(" %20s %20f %20f\n",
683 "Rebalance",
686 }
687
690 result += "\n Total: " + to_string(total_time) + "\n";
691
692 result += string_printf(
693 "\nRendered %d samples in %f seconds\n", num_rendered_samples, render_wall_time);
694
695 /* When adaptive sampling is used the average time becomes meaningless, because different samples
696 * will likely render different number of pixels. */
697 if (!adaptive_sampling_.use) {
698 result += string_printf("Average time per sample: %f seconds\n",
699 render_wall_time / num_rendered_samples);
700 }
701
702 return result;
703}
704
709
711 int num_rendered_samples) const
712{
715
716 if (time_limit_ != 0.0 && state_.start_render_time != 0.0) {
717 const double remaining_render_time = max(0.0,
718 time_limit_ - (time_dt() - state_.start_render_time));
719
720 update_interval = min(update_interval, remaining_render_time);
721 }
722
723 return update_interval;
724}
725
726/* TODO(sergey): This is just a quick implementation, exact values might need to be tweaked based
727 * on a more careful experiments with viewport rendering. */
729 int num_rendered_samples) const
730{
731 /* TODO(sergey): Need a decision on whether this should be using number of samples rendered
732 * within the current render session, or use absolute number of samples with the start sample
733 * taken into account. It will depend on whether the start sample offset clears the render
734 * buffer. */
735
736 if (state_.need_rebalance_at_next_work) {
737 return 0.1;
738 }
739 if (state_.last_rebalance_changed) {
740 return 0.2;
741 }
742
743 if (headless_) {
744 /* In headless mode do rare updates, so that the device occupancy is high, but there are still
745 * progress messages printed to the logs. */
746 return 30.0;
747 }
748
749 if (background_) {
750 if (num_rendered_samples < 32) {
751 return 1.0;
752 }
753 return 2.0;
754 }
755
756 /* Render time and number of samples rendered are used to figure out the display update interval.
757 * Render time is used to allow for fast display updates in the first few seconds of rendering
758 * on fast devices. Number of samples rendered is used to allow for potentially quicker display
759 * updates on slow devices during the first few samples. */
760 const double render_time = path_trace_time_.get_wall();
761 if (render_time < 1) {
762 return 0.1;
763 }
764 if (render_time < 2) {
765 return 0.25;
766 }
767 if (render_time < 4) {
768 return 0.5;
769 }
770 if (render_time < 8 || num_rendered_samples < 32) {
771 return 1.0;
772 }
773 return 2.0;
774}
775
777{
778 const double time_per_sample_average = path_trace_time_.get_average();
779 /* Fall back to 1 sample if we have not recorded a time yet. */
780 if (time_per_sample_average == 0.0) {
781 return 1;
782 }
783
784 const double num_samples_in_second = pixel_size_ * pixel_size_ / time_per_sample_average;
785
786 const double update_interval_in_seconds = guess_display_update_interval_in_seconds();
787
788 int num_samples_per_update = max(int(num_samples_in_second * update_interval_in_seconds), 1);
789
791 num_samples_per_update = min(limit_samples_per_update_, num_samples_per_update);
792 }
793
794 return num_samples_per_update;
795}
796
798{
799 return start_sample_ + state_.num_rendered_samples;
800}
801
802/* Round number of samples to the closest power of two.
803 * Rounding might happen to higher or lower value depending on which one is closer. Such behavior
804 * allows to have number of samples to be power of two without diverging from the planned number of
805 * samples too much. */
806static inline uint round_num_samples_to_power_of_2(const uint num_samples)
807{
808 if (num_samples == 1) {
809 return 1;
810 }
811
812 if (is_power_of_two(num_samples)) {
813 return num_samples;
814 }
815
816 const uint num_samples_up = next_power_of_two(num_samples);
817 const uint num_samples_down = num_samples_up - (num_samples_up >> 1);
818
819 const uint delta_up = num_samples_up - num_samples;
820 const uint delta_down = num_samples - num_samples_down;
821
822 if (delta_up <= delta_down) {
823 return num_samples_up;
824 }
825
826 return num_samples_down;
827}
828
830{
831 if (state_.resolution_divider != pixel_size_) {
832 return get_num_samples_during_navigation(state_.resolution_divider);
833 }
834
835 /* Always start full resolution render with a single sample. Gives more instant feedback to
836 * artists, and allows to gather information for a subsequent path tracing works. Do it in the
837 * headless mode as well, to give some estimate of how long samples are taking. */
838 if (state_.num_rendered_samples == 0) {
839 return 1;
840 }
841
842 int num_samples_per_update = calculate_num_samples_per_update();
843 const int path_trace_start_sample = get_start_sample_to_path_trace();
844
845 /* Round number of samples to a power of two, so that division of path states into tiles goes in
846 * a more integer manner.
847 * This might make it so updates happens more rarely due to rounding up. In the test scenes this
848 * is not huge deal because it is not seen that more than 8 samples can be rendered between
849 * updates. If that becomes a problem we can add some extra rules like never allow to round up
850 * more than N samples. */
851 const int num_samples_pot = round_num_samples_to_power_of_2(num_samples_per_update);
852
853 const int max_num_samples_to_render = start_sample_ + num_samples_ - path_trace_start_sample;
854
855 int num_samples_to_render = min(num_samples_pot, max_num_samples_to_render);
856
857 /* When enough statistics is available and doing an offline rendering prefer to keep device
858 * occupied. */
859 if (state_.occupancy_num_samples && (background_ || headless_)) {
860 /* Keep occupancy at about 0.5 (this is more of an empirical figure which seems to match scenes
861 * with good performance without forcing occupancy to be higher). */
862 int num_samples_to_occupy = state_.occupancy_num_samples;
863 if (state_.occupancy > 0 && state_.occupancy < 0.5f) {
864 num_samples_to_occupy = lround(state_.occupancy_num_samples * 0.7f / state_.occupancy);
865 }
866
867 /* Time limit for path tracing, which constraints the scheduler from "over-scheduling" work
868 * in scenes which have very long path trace times and low occupancy. This allows faster
869 * feedback of render results, and faster canceling when artists notice something is wrong.
870 *
871 * Additionally, when the time limit is enabled, do not render more samples than it is needed
872 * to reach the time limit. */
873 double path_tracing_time_limit = 0;
874 if (headless_) {
875 /* In the headless (command-line) render "over-scheduling" is not as bad, as it ensures the
876 * best possible render time. */
877 }
878 else if (background_) {
879 /* For the first few seconds prefer quicker updates, giving it a better chance for artists
880 * to cancel render early on when they notice something is wrong. After that increase the
881 * update times a lot, giving the best possible performance on a complicated scenes like
882 * the Spring splash screen (where occupancy is just very bad). */
883 if (state_.start_render_time == 0.0 || time_dt() - state_.start_render_time < 10) {
884 path_tracing_time_limit = 2.0;
885 }
886 else {
887 path_tracing_time_limit = 15.0;
888 }
889 }
890 else {
891 /* Viewport render: prefer faster updates over overall render time reduction. */
892 /* TODO: Look into enabling this entire code-path for the viewport as well, allowing
893 * compensation even in viewport (currently parent scope checks for non-viewport render). */
894 path_tracing_time_limit = guess_display_update_interval_in_seconds();
895 }
896 if (time_limit_ != 0.0 && state_.start_render_time != 0.0) {
897 const double remaining_render_time = max(
898 0.0, time_limit_ - (time_dt() - state_.start_render_time));
899 if (path_tracing_time_limit == 0) {
900 path_tracing_time_limit = remaining_render_time;
901 }
902 else {
903 path_tracing_time_limit = min(path_tracing_time_limit, remaining_render_time);
904 }
905 }
906 if (path_tracing_time_limit != 0) {
907 /* Use the per-sample time from the previously rendered batch of samples so that the
908 * correction is applied much quicker. */
909 const double predicted_render_time = num_samples_to_occupy *
911 if (predicted_render_time > path_tracing_time_limit) {
912 num_samples_to_occupy = lround(num_samples_to_occupy *
913 (path_tracing_time_limit / predicted_render_time));
914 }
915 }
916
917 num_samples_to_render = max(num_samples_to_render,
918 min(num_samples_to_occupy, max_num_samples_to_render));
919 }
920
921 /* If adaptive sampling is not use, render as many samples per update as possible, keeping the
922 * device fully occupied, without much overhead of display updates. */
923 if (!adaptive_sampling_.use) {
924 return num_samples_to_render;
925 }
926
927 /* TODO(sergey): Add extra "clamping" here so that none of the filtering points is missing. This
928 * is to ensure that the final render is pixel-matched regardless of how many samples per second
929 * compute device can do. */
930
931 return adaptive_sampling_.align_samples(path_trace_start_sample - sample_offset_,
932 num_samples_to_render);
933}
934
936{
937 /* Special trick for fast navigation: schedule multiple samples during fast navigation
938 * (which will prefer to use lower resolution to keep up with refresh rate). This gives more
939 * usable visual feedback for artists. */
940
942 /* When denoising is used during navigation prefer using a higher resolution with less samples
943 * (scheduling less samples here will make it so the resolution_divider calculation will use a
944 * lower value for the divider). This is because both OpenImageDenoise and OptiX denoiser
945 * give visually better results on a higher resolution image with less samples. */
946 return 1;
947 }
948
949 /* Schedule samples equal to the resolution divider up to a maximum of 4, limited by the maximum
950 * number of samples overall.
951 * The idea is to have enough information on the screen by increasing the sample count as the
952 * resolution is decreased. */
953 const int max_navigation_samples = min(num_samples_, 4);
954 /* NOTE: Changing this formula will change the formula in
955 * `RenderScheduler::calculate_resolution_divider_for_time()`. */
956 return min(max(1, resolution_divider / pixel_size_), max_navigation_samples);
957}
958
963
965{
968 }
969
970 return max(state_.adaptive_sampling_threshold, adaptive_sampling_.threshold);
971}
972
973bool RenderScheduler::work_need_denoise(bool &delayed, bool &ready_to_display)
974{
975 delayed = false;
976 ready_to_display = true;
977
978 if (!denoiser_params_.use) {
979 /* Denoising is disabled, no need to scheduler work for it. */
980 return false;
981 }
982
983 /* When multiple tiles are used the full frame will be denoised.
984 * Avoid per-tile denoising to save up render time. */
986 return false;
987 }
988
989 if (done()) {
990 /* Always denoise at the last sample. */
991 return true;
992 }
993
994 if (background_) {
995 /* Background render, only denoise when rendering the last sample. */
996 /* TODO(sergey): Follow similar logic to viewport, giving an overview of how final denoised
997 * image looks like even for the background rendering. */
998 return false;
999 }
1000
1001 /* Viewport render. */
1002
1003 /* Navigation might render multiple samples at a lower resolution. Those are not to be counted as
1004 * final samples. */
1005 const int num_samples_finished = state_.resolution_divider == pixel_size_ ?
1006 state_.num_rendered_samples :
1007 1;
1008
1009 /* Immediately denoise when we reach the start sample or last sample. */
1010 if (num_samples_finished == denoiser_params_.start_sample ||
1011 num_samples_finished == num_samples_)
1012 {
1013 return true;
1014 }
1015
1016 /* Do not denoise until the sample at which denoising should start is reached. */
1017 if (num_samples_finished < denoiser_params_.start_sample) {
1018 ready_to_display = false;
1019 return false;
1020 }
1021
1022 /* Avoid excessive denoising in viewport after reaching a certain sample count and render time.
1023 */
1024 /* TODO(sergey): Consider making time interval and sample configurable. */
1025 delayed = (path_trace_time_.get_wall() > 4 && num_samples_finished >= 20 &&
1026 (time_dt() - state_.last_display_update_time) < 1.0);
1027
1028 return !delayed;
1029}
1030
1031bool RenderScheduler::work_need_update_display(const bool denoiser_delayed)
1032{
1033 if (headless_) {
1034 /* Force disable display update in headless mode. There will be nothing to display the
1035 * in-progress result. */
1036 return false;
1037 }
1038
1039 if (denoiser_delayed) {
1040 /* If denoiser has been delayed the display can not be updated as it will not contain
1041 * up-to-date state of the render result. */
1042 return false;
1043 }
1044
1045 if (!adaptive_sampling_.use) {
1046 /* When adaptive sampling is not used the work is scheduled in a way that they keep render
1047 * device busy for long enough, so that the display update can happen right after the
1048 * rendering. */
1049 return true;
1050 }
1051
1052 if (done() || state_.last_display_update_sample == -1) {
1053 /* Make sure an initial and final results of adaptive sampling is communicated ot the display.
1054 */
1055 return true;
1056 }
1057
1058 /* For the development purposes of adaptive sampling it might be very useful to see all updates
1059 * of active pixels after convergence check. However, it would cause a slowdown for regular usage
1060 * users. Possibly, make it a debug panel option to allow rapid update to ease development
1061 * without need to re-compiled. */
1062 // if (work_need_adaptive_filter()) {
1063 // return true;
1064 // }
1065
1066 /* When adaptive sampling is used, its possible that only handful of samples of a very simple
1067 * scene will be scheduled to a powerful device (in order to not "miss" any of filtering points).
1068 * We take care of skipping updates here based on when previous display update did happen. */
1069 const double update_interval = guess_display_update_interval_in_seconds_for_num_samples(
1070 state_.last_display_update_sample);
1071 return (time_dt() - state_.last_display_update_time) > update_interval;
1072}
1073
1075{
1076 /* This is the minimum time, as the rebalancing can not happen more often than the path trace
1077 * work. */
1078 static const double kRebalanceIntervalInSeconds = 1;
1079
1081 return false;
1082 }
1083
1084 if (state_.resolution_divider != pixel_size_) {
1085 /* Don't rebalance at a non-final resolution divider. Some reasons for this:
1086 * - It will introduce unnecessary during navigation.
1087 * - Per-render device timing information is not very reliable yet. */
1088 return false;
1089 }
1090
1091 if (state_.num_rendered_samples == 0) {
1092 state_.need_rebalance_at_next_work = true;
1093 return false;
1094 }
1095
1096 if (state_.need_rebalance_at_next_work) {
1097 state_.need_rebalance_at_next_work = false;
1098 return true;
1099 }
1100
1101 if (state_.last_rebalance_changed) {
1102 return true;
1103 }
1104
1105 return (time_dt() - state_.last_rebalance_time) > kRebalanceIntervalInSeconds;
1106}
1107
1109{
1111 return;
1112 }
1113
1114 /* Calculate the maximum resolution divider possible while keeping the long axis of the viewport
1115 * above our preferred minimum axis size (128). */
1116 const int long_viewport_axis = max(buffer_params_.width, buffer_params_.height);
1117 const int max_res_divider_for_desired_size = long_viewport_axis / 128;
1118
1119 if (start_resolution_divider_ == 0) {
1120 /* Resolution divider has never been calculated before: start with a high resolution divider so
1121 * that we have a somewhat good initial behavior, giving a chance to collect real numbers. */
1123 max_res_divider_for_desired_size);
1124 VLOG_WORK << "Initial resolution divider is " << start_resolution_divider_;
1125 return;
1126 }
1127
1128 if (first_render_time_.path_trace_per_sample == 0.0) {
1129 /* Not enough information to calculate better resolution, keep the existing one. */
1130 return;
1131 }
1132
1133 const double desired_update_interval_in_seconds =
1135
1136 const double actual_time_per_update = first_render_time_.path_trace_per_sample +
1137 first_render_time_.denoise_time +
1138 first_render_time_.display_update_time;
1139
1140 /* Allow some percent of tolerance, so that if the render time is close enough to the higher
1141 * resolution we prefer to use it instead of going way lower resolution and time way below the
1142 * desired one. */
1143 const int resolution_divider_for_update = calculate_resolution_divider_for_time(
1144 desired_update_interval_in_seconds * 1.4, actual_time_per_update);
1145
1146 /* TODO(sergey): Need to add hysteresis to avoid resolution divider bouncing around when actual
1147 * render time is somewhere on a boundary between two resolutions. */
1148
1149 /* Don't let resolution drop below the desired one. It's better to be slow than provide an
1150 * unreadable viewport render. */
1151 start_resolution_divider_ = min(resolution_divider_for_update, max_res_divider_for_desired_size);
1152
1153 VLOG_WORK << "Calculated resolution divider is " << start_resolution_divider_;
1154}
1155
1157{
1159 /* Use lower value than the non-denoised case to allow having more pixels to reconstruct the
1160 * image from. With the faster updates and extra compute required the resolution becomes too
1161 * low to give usable feedback. */
1162 /* NOTE: Based on performance of OpenImageDenoise on CPU. For OptiX denoiser or other denoiser
1163 * on GPU the value might need to become lower for faster navigation. */
1164 return 1.0 / 12.0;
1165 }
1166
1167 /* For the best match with the Blender's viewport the refresh ratio should be 60fps. This will
1168 * avoid "jelly" effects. However, on a non-trivial scenes this can only be achieved with high
1169 * values of the resolution divider which does not give very pleasant updates during navigation.
1170 * Choose less frequent updates to allow more noise-free and higher resolution updates. */
1171
1172 /* TODO(sergey): Can look into heuristic which will allow to have 60fps if the resolution divider
1173 * is not too high. Alternatively, synchronize Blender's overlays updates to Cycles updates. */
1174
1175 return 1.0 / 30.0;
1176}
1177
1179{
1180 if (!denoiser_params_.use) {
1181 return false;
1182 }
1183
1185 return false;
1186 }
1187
1188 return true;
1189}
1190
1192{
1193 return render_work.resolution_divider == pixel_size_ &&
1194 render_work.path_trace.start_sample == start_sample_;
1195}
1196
1198{
1199 /* When rendering at a non-final resolution divider time average is not very useful because it
1200 * will either bias average down (due to lower render times on the smaller images) or will give
1201 * incorrect result when trying to estimate time which would have spent on the final resolution.
1202 *
1203 * So we only accumulate average for the latest resolution divider which was rendered. */
1204 return render_work.resolution_divider != pixel_size_;
1205}
1206
1208{
1209 if (time_limit_ == 0.0) {
1210 /* No limit is enforced. */
1211 return;
1212 }
1213
1214 if (state_.start_render_time == 0.0) {
1215 /* Rendering did not start yet. */
1216 return;
1217 }
1218
1219 const double current_time = time_dt();
1220
1221 if (current_time - state_.start_render_time < time_limit_) {
1222 /* Time limit is not reached yet. */
1223 return;
1224 }
1225
1226 state_.time_limit_reached = true;
1227 state_.end_render_time = current_time;
1228}
1229
1230/* --------------------------------------------------------------------
1231 * Utility functions.
1232 */
1233
1234int RenderScheduler::calculate_resolution_divider_for_time(double desired_time, double actual_time)
1235{
1236 const double ratio_between_times = actual_time / desired_time;
1237
1238 /* We can pass `ratio_between_times` to `get_num_samples_during_navigation()` to get our
1239 * navigation samples because the equation for calculating the resolution divider is as follows:
1240 * `actual_time / desired_time = sqr(resolution_divider) / sample_count`.
1241 * While `resolution_divider` is less than or equal to 4, `resolution_divider = sample_count`
1242 * (This relationship is determined in `get_num_samples_during_navigation()`). With some
1243 * substitution we end up with `actual_time / desired_time = resolution_divider` while the
1244 * resolution divider is less than or equal to 4. Once the resolution divider increases above 4,
1245 * the relationship of `actual_time / desired_time = resolution_divider` is no longer true,
1246 * however the sample count retrieved from `get_num_samples_during_navigation()` is still
1247 * accurate if we continue using this assumption. It should be noted that the interaction between
1248 * `pixel_size`, sample count, and resolution divider are automatically accounted for and that's
1249 * why `pixel_size` isn't included in any of the equations. */
1250 const int navigation_samples = get_num_samples_during_navigation(
1251 ceil_to_int(ratio_between_times));
1252
1253 return ceil_to_int(sqrt(navigation_samples * ratio_between_times));
1254}
1255
1256int calculate_resolution_divider_for_resolution(int width, int height, int resolution)
1257{
1258 if (resolution == INT_MAX) {
1259 return 1;
1260 }
1261
1262 int resolution_divider = 1;
1263 while (width * height > resolution * resolution) {
1264 width = max(1, width / 2);
1265 height = max(1, height / 2);
1266
1267 resolution_divider <<= 1;
1268 }
1269
1270 return resolution_divider;
1271}
1272
1273int calculate_resolution_for_divider(int width, int height, int resolution_divider)
1274{
1275 const int pixel_area = width * height;
1276 const int resolution = lround(sqrt(pixel_area));
1277
1278 return resolution / resolution_divider;
1279}
1280
sqrt(x)+1/max(0
unsigned int uint
void reset()
clear internal cached data and reset random seed
bool need_filter(int sample) const
int align_samples(int start_sample, int num_samples) const
NODE_DECLARE int width
Definition buffers.h:72
DenoiserType type
Definition denoise.h:61
bool use_gpu
Definition denoise.h:75
int start_sample
Definition denoise.h:64
NODE_DECLARE bool use
Definition denoise.h:58
bool use_pass_normal
Definition denoise.h:68
bool use_pass_albedo
Definition denoise.h:67
void add_average(double time, int num_measurements=1)
int calculate_resolution_divider_for_time(double desired_time, double actual_time)
AdaptiveSampling adaptive_sampling_
DenoiseParams denoiser_params_
void update_state_for_render_work(const RenderWork &render_work)
void report_display_update_time(const RenderWork &render_work, double time)
float work_adaptive_threshold() const
int calculate_num_samples_per_update() const
int get_rendered_sample() const
void set_time_limit(double time_limit)
bool work_need_update_display(const bool denoiser_delayed)
BufferParams buffer_params_
void set_sample_offset(int sample_offset)
void report_adaptive_filter_time(const RenderWork &render_work, double time, bool is_cancelled)
void set_need_schedule_rebalance(bool need_schedule_rebalance)
double guess_display_update_interval_in_seconds() const
bool work_need_denoise(bool &delayed, bool &ready_to_display)
bool work_need_adaptive_filter() const
int get_num_rendered_samples() const
string full_report() const
bool need_schedule_rebalance_works_
void reset(const BufferParams &buffer_params, int num_samples, int sample_offset)
struct RenderScheduler::@1434 state_
bool is_denoise_active_during_update() const
int get_num_samples_during_navigation(int resolution_divier) const
bool set_postprocess_render_work(RenderWork *render_work)
void report_rebalance_time(const RenderWork &render_work, double time, bool balance_changed)
double get_time_limit() const
double guess_display_update_interval_in_seconds_for_num_samples_no_limit(int num_rendered_samples) const
bool work_is_usable_for_first_render_estimation(const RenderWork &render_work)
void report_denoise_time(const RenderWork &render_work, double time)
bool is_adaptive_sampling_used() const
void set_denoiser_params(const DenoiseParams &params)
double guess_display_update_interval_in_seconds_for_num_samples(int num_rendered_samples) const
bool render_work_reschedule_on_idle(RenderWork &render_work)
int get_num_samples() const
TimeWithAverage adaptive_filter_time_
TimeWithAverage rebalance_time_
int get_sample_offset() const
int default_start_resolution_divider_
void report_path_trace_time(const RenderWork &render_work, double time, bool is_cancelled)
bool is_denoiser_gpu_used() const
TimeWithAverage display_update_time_
void set_full_frame_render_work(RenderWork *render_work)
double guess_viewport_navigation_update_interval_in_seconds() const
void update_start_resolution_divider()
void set_num_samples(int num_samples)
void report_path_trace_occupancy(const RenderWork &render_work, float occupancy)
void set_adaptive_sampling(const AdaptiveSampling &adaptive_sampling)
struct RenderScheduler::@1435 first_render_time_
bool render_work_reschedule_on_converge(RenderWork &render_work)
TileManager & tile_manager_
bool work_report_reset_average(const RenderWork &render_work)
TimeWithAverage path_trace_time_
void set_start_sample(int start_sample)
int get_start_sample_to_path_trace() const
int get_num_samples_to_path_trace() const
void render_work_reschedule_on_cancel(RenderWork &render_work)
void report_work_begin(const RenderWork &render_work)
TimeWithAverage denoise_time_
void set_need_schedule_cryptomatte(bool need_schedule_cryptomatte)
void set_limit_samples_per_update(const int limit_samples)
int get_start_sample() const
RenderWork get_render_work()
RenderScheduler(TileManager &tile_manager, const SessionParams &params)
bool is_background() const
struct RenderWork::@1432 full
bool use_denoised_result
bool init_render_buffers
struct RenderWork::@1430 cryptomatte
struct RenderWork::@1429 adaptive_sampling
struct RenderWork::@1431 tile
struct RenderWork::@1433 display
struct RenderWork::@1428 path_trace
bool has_multiple_tiles() const
double time
CCL_NAMESPACE_BEGIN const char * denoiserTypeToHumanReadable(DenoiserType type)
Definition denoise.cpp:9
#define CCL_NAMESPACE_END
static const char * to_string(const Interpolation &interp)
Definition gl_shader.cc:82
uiWidgetBaseParameters params[MAX_WIDGET_BASE_BATCH]
#define VLOG_WORK
Definition log.h:75
#define DCHECK_GT(a, b)
Definition log.h:60
int calculate_resolution_divider_for_resolution(int width, int height, int resolution)
static double approximate_final_time(const RenderWork &render_work, double time)
static uint round_num_samples_to_power_of_2(const uint num_samples)
int calculate_resolution_for_divider(int width, int height, int resolution_divider)
#define min(a, b)
Definition sort.c:32
string string_from_bool(bool var)
Definition string.cpp:170
CCL_NAMESPACE_BEGIN string string_printf(const char *format,...)
Definition string.cpp:23
CCL_NAMESPACE_BEGIN double time_dt()
Definition time.cpp:36
float max
ccl_device_inline int ceil_to_int(float f)
Definition util/math.h:441
ccl_device_inline uint next_power_of_two(uint x)
Definition util/math.h:1000
ccl_device_inline bool is_power_of_two(size_t x)
Definition util/types.h:68
double total_time