Blender V5.0
render_scheduler.cpp
Go to the documentation of this file.
1/* SPDX-FileCopyrightText: 2011-2022 Blender Foundation
2 *
3 * SPDX-License-Identifier: Apache-2.0 */
4
6
7#include "scene/integrator.h"
8
9#include "session/session.h"
10#include "session/tile.h"
11
12#include "util/log.h"
13#include "util/time.h"
14
16
17/* --------------------------------------------------------------------
18 * Render scheduler.
19 */
20
22 : headless_(params.headless),
23 background_(params.background),
24 pixel_size_(params.pixel_size),
25 tile_manager_(tile_manager),
26 default_start_resolution_divider_(params.use_resolution_divider ? pixel_size_ * 8 : 0)
27{
29}
30
31void RenderScheduler::set_need_schedule_cryptomatte(bool need_schedule_cryptomatte)
32{
33 need_schedule_cryptomatte_ = need_schedule_cryptomatte;
34}
35
36void RenderScheduler::set_need_schedule_rebalance(bool need_schedule_rebalance)
37{
38 need_schedule_rebalance_works_ = need_schedule_rebalance;
39}
40
42{
43 return background_;
44}
45
50
52{
53 return denoiser_params_.use_gpu;
54}
55
57{
60 }
61 else {
62 limit_samples_per_update_ = limit_samples;
63 }
64}
65
67{
68 adaptive_sampling_ = adaptive_sampling;
69}
70
75
76void RenderScheduler::set_sample_params(const int num_samples,
77 const bool use_sample_subset,
78 const int sample_subset_offset,
79 const int sample_subset_length)
80{
83
84 if (use_sample_subset) {
85 sample_offset_ = sample_subset_offset;
87 min(sample_subset_offset + sample_subset_length, num_samples_) - sample_subset_offset, 0);
88 }
89}
90
92{
93 return num_samples_;
94}
95
100
101void RenderScheduler::set_time_limit(const double time_limit)
102{
103 time_limit_ = time_limit;
104}
105
107{
108 return time_limit_;
109}
110
117
119{
120 return state_.num_rendered_samples;
121}
122
123void RenderScheduler::reset(const BufferParams &buffer_params)
124{
125 buffer_params_ = buffer_params;
126
128
129 /* In background mode never do lower resolution render preview, as it is not really supported
130 * by the software. */
132 state_.resolution_divider = 1;
133 }
134 else {
135 state_.user_is_navigating = true;
136 state_.resolution_divider = start_resolution_divider_;
137 }
138
139 state_.num_rendered_samples = 0;
140 state_.last_display_update_time = 0.0;
141 state_.last_display_update_sample = -1;
142
143 state_.last_rebalance_time = 0.0;
144 state_.num_rebalance_requested = 0;
145 state_.num_rebalance_changes = 0;
146 state_.last_rebalance_changed = false;
147 state_.need_rebalance_at_next_work = false;
148
149 /* TODO(sergey): Choose better initial value. */
150 /* NOTE: The adaptive sampling settings might not be available here yet. */
151 state_.adaptive_sampling_threshold = 0.4f;
152
153 state_.last_work_tile_was_denoised = false;
154 state_.tile_result_was_written = false;
155 state_.postprocess_work_scheduled = false;
156 state_.full_frame_work_scheduled = false;
157 state_.full_frame_was_written = false;
158
159 state_.path_trace_finished = false;
160
161 state_.start_render_time = 0.0;
162 state_.end_render_time = 0.0;
163 state_.time_limit_reached = false;
164
165 state_.occupancy_num_samples = 0;
166 state_.occupancy = 1.0f;
167
168 first_render_time_.path_trace_per_sample = 0.0;
169 first_render_time_.denoise_time = 0.0;
170 first_render_time_.display_update_time = 0.0;
171
172 path_trace_time_.reset();
173 denoise_time_.reset();
174 adaptive_filter_time_.reset();
175 display_update_time_.reset();
176 rebalance_time_.reset();
178}
179
184
186{
187 /* Move to the next resolution divider. Assume adaptive filtering is not needed during
188 * navigation. */
189 if (state_.resolution_divider != pixel_size_) {
190 return false;
191 }
192
193 if (render_work_reschedule_on_idle(render_work)) {
194 return true;
195 }
196
197 state_.path_trace_finished = true;
198
199 bool denoiser_delayed;
200 bool denoiser_ready_to_display;
201 render_work.tile.denoise = work_need_denoise(denoiser_delayed, denoiser_ready_to_display);
202
203 render_work.display.update = work_need_update_display(denoiser_delayed);
204 render_work.display.use_denoised_result = denoiser_ready_to_display;
205
206 return false;
207}
208
210{
212 return false;
213 }
214
215 /* Move to the next resolution divider. Assume adaptive filtering is not needed during
216 * navigation. */
217 if (state_.resolution_divider != pixel_size_) {
218 return false;
219 }
220
221 if (adaptive_sampling_.use) {
222 if (state_.adaptive_sampling_threshold > adaptive_sampling_.threshold) {
223 state_.adaptive_sampling_threshold = max(state_.adaptive_sampling_threshold / 2,
224 adaptive_sampling_.threshold);
225
226 render_work.adaptive_sampling.threshold = state_.adaptive_sampling_threshold;
227 render_work.adaptive_sampling.reset = true;
228
229 return true;
230 }
231 }
232
233 return false;
234}
235
237{
238 LOG_DEBUG << "Schedule work for cancel.";
239
240 /* Un-schedule samples: they will not be rendered and should not be counted. */
241 state_.num_rendered_samples -= render_work.path_trace.num_samples;
242
243 const bool has_rendered_samples = get_num_rendered_samples() != 0;
244
245 /* Reset all fields of the previous work, canceling things like adaptive sampling filtering and
246 * denoising.
247 * However, need to preserve write requests, since those will not be possible to recover and
248 * writes are only to happen once. */
249 const bool tile_write = render_work.tile.write;
250 const bool full_write = render_work.full.write;
251
252 render_work = RenderWork();
253
254 render_work.tile.write = tile_write;
255 render_work.full.write = full_write;
256
257 /* Do not write tile if it has zero samples in it, treat it similarly to all other tiles which
258 * got canceled. */
259 if (!state_.tile_result_was_written && has_rendered_samples) {
260 render_work.tile.write = true;
261 }
262
263 if (!state_.full_frame_was_written) {
264 render_work.full.write = true;
265 }
266
267 /* Update current tile, but only if any sample was rendered.
268 * Allows to have latest state of tile visible while full buffer is being processed.
269 *
270 * Note that if there are no samples in the current tile its render buffer might have pixels
271 * remained from previous state.
272 *
273 * If the full result was written, then there is no way any updates were made to the render
274 * buffers. And the buffers might have been freed from the device, so display update is not
275 * possible. */
276 if (has_rendered_samples && !state_.full_frame_was_written) {
277 render_work.display.update = true;
278 }
279}
280
282{
283 if (state_.resolution_divider != pixel_size_) {
284 return false;
285 }
286
287 if (state_.path_trace_finished || state_.time_limit_reached) {
288 return true;
289 }
290
292}
293
295{
297
298 const double time_now = time_dt();
299
300 if (done()) {
301 RenderWork render_work;
302 render_work.resolution_divider = state_.resolution_divider;
303
304 if (!set_postprocess_render_work(&render_work)) {
305 set_full_frame_render_work(&render_work);
306 }
307
308 if (!render_work) {
309 state_.end_render_time = time_now;
310 }
311
312 update_state_for_render_work(render_work);
313
314 return render_work;
315 }
316
317 RenderWork render_work;
318
319 if (state_.resolution_divider != pixel_size_) {
320 if (state_.user_is_navigating) {
321 /* Don't progress the resolution divider as the user is currently navigating in the scene. */
322 state_.user_is_navigating = false;
323 }
324 else {
325 /* If the resolution divider is greater than or equal to default_start_resolution_divider_,
326 * drop the resolution divider down to 4. This is so users with slow hardware and thus high
327 * resolution dividers (E.G. 16), get an update to let them know something is happening
328 * rather than having to wait for the full 1:1 render to show up. */
329 state_.resolution_divider = state_.resolution_divider > default_start_resolution_divider_ ?
330 (4 * pixel_size_) :
331 1;
332 }
333
334 state_.resolution_divider = max(state_.resolution_divider, pixel_size_);
335 state_.num_rendered_samples = 0;
336 state_.last_display_update_sample = -1;
337 }
338
339 render_work.resolution_divider = state_.resolution_divider;
340
344
345 render_work.init_render_buffers = (render_work.path_trace.start_sample == get_sample_offset());
346
347 /* NOTE: Rebalance scheduler requires current number of samples to not be advanced forward. */
348 render_work.rebalance = work_need_rebalance();
349
350 /* NOTE: Advance number of samples now, so that filter and denoising check can see that all the
351 * samples are rendered. */
352 state_.num_rendered_samples += render_work.path_trace.num_samples;
353
356 render_work.adaptive_sampling.reset = false;
357
358 bool denoiser_delayed;
359 bool denoiser_ready_to_display;
360 render_work.tile.denoise = work_need_denoise(denoiser_delayed, denoiser_ready_to_display);
361
362 render_work.tile.write = done();
363
364 render_work.display.update = work_need_update_display(denoiser_delayed);
365 render_work.display.use_denoised_result = denoiser_ready_to_display;
366
367 if (done()) {
368 set_postprocess_render_work(&render_work);
369 }
370
371 update_state_for_render_work(render_work);
372
373 return render_work;
374}
375
377{
378 const double time_now = time_dt();
379
380 if (render_work.rebalance) {
381 state_.last_rebalance_time = time_now;
382 ++state_.num_rebalance_requested;
383 }
384
385 /* A fallback display update time, for the case there is an error of display update, or when
386 * there is no display at all. */
387 if (render_work.display.update) {
388 state_.last_display_update_time = time_now;
389 state_.last_display_update_sample = state_.num_rendered_samples;
390 }
391
392 state_.last_work_tile_was_denoised = render_work.tile.denoise;
393 state_.tile_result_was_written |= render_work.tile.write;
394 state_.full_frame_was_written |= render_work.full.write;
395}
396
398{
399 if (state_.postprocess_work_scheduled) {
400 return false;
401 }
402 state_.postprocess_work_scheduled = true;
403
404 bool any_scheduled = false;
405
407 render_work->cryptomatte.postprocess = true;
408 any_scheduled = true;
409 }
410
411 if (denoiser_params_.use && !state_.last_work_tile_was_denoised) {
412 render_work->tile.denoise = !tile_manager_.has_multiple_tiles();
413 any_scheduled = true;
414 }
415
416 if (!state_.tile_result_was_written) {
417 render_work->tile.write = true;
418 any_scheduled = true;
419 }
420
421 if (any_scheduled) {
422 render_work->display.update = true;
423 }
424
425 return any_scheduled;
426}
427
429{
430 if (state_.full_frame_work_scheduled) {
431 return;
432 }
433
434 if (!tile_manager_.has_multiple_tiles()) {
435 /* There is only single tile, so all work has been performed already. */
436 return;
437 }
438
439 if (!tile_manager_.done()) {
440 /* There are still tiles to be rendered. */
441 return;
442 }
443
444 if (state_.full_frame_was_written) {
445 return;
446 }
447
448 state_.full_frame_work_scheduled = true;
449
450 render_work->full.write = true;
451}
452
453/* Knowing time which it took to complete a task at the current resolution divider approximate how
454 * long it would have taken to complete it at a final resolution. */
455static double approximate_final_time(const RenderWork &render_work, const double time)
456{
457 if (render_work.resolution_divider == 1) {
458 return time;
459 }
460
461 const double resolution_divider_sq = render_work.resolution_divider *
462 render_work.resolution_divider;
463 return time * resolution_divider_sq;
464}
465
467{
468 /* Start counting render time when rendering samples at their final resolution.
469 *
470 * NOTE: The work might have the path trace part be all zero: this happens when a post-processing
471 * work is scheduled after the path tracing. Checking for just a start sample doesn't work here
472 * because it might be wrongly 0. Check for whether path tracing is actually happening as it is
473 * expected to happen in the first work. */
474 if (render_work.resolution_divider == pixel_size_ && render_work.path_trace.num_samples != 0 &&
476 {
477 state_.start_render_time = time_dt();
478 }
479}
480
482 const double time,
483 bool is_cancelled)
484{
485 path_trace_time_.add_wall(time);
486
487 if (is_cancelled) {
488 return;
489 }
490
491 const double final_time_approx = approximate_final_time(render_work, time);
492
494 first_render_time_.path_trace_per_sample = final_time_approx /
495 render_work.path_trace.num_samples;
496 }
497
498 if (work_report_reset_average(render_work)) {
499 path_trace_time_.reset_average();
500 }
501
502 path_trace_time_.add_average(final_time_approx, render_work.path_trace.num_samples);
503
504 LOG_DEBUG << "Average path tracing time: " << path_trace_time_.get_average() << " seconds.";
505}
506
508 const float occupancy)
509{
510 state_.occupancy_num_samples = render_work.path_trace.num_samples;
511 state_.occupancy = occupancy;
512 LOG_DEBUG << "Measured path tracing occupancy: " << occupancy;
513}
514
516 const double time,
517 bool is_cancelled)
518{
519 adaptive_filter_time_.add_wall(time);
520
521 if (is_cancelled) {
522 return;
523 }
524
525 const double final_time_approx = approximate_final_time(render_work, time);
526
527 if (work_report_reset_average(render_work)) {
528 adaptive_filter_time_.reset_average();
529 }
530
531 adaptive_filter_time_.add_average(final_time_approx, render_work.path_trace.num_samples);
532
533 LOG_DEBUG << "Average adaptive sampling filter time: " << adaptive_filter_time_.get_average()
534 << " seconds.";
535}
536
537void RenderScheduler::report_denoise_time(const RenderWork &render_work, const double time)
538{
539 denoise_time_.add_wall(time);
540
541 const double final_time_approx = approximate_final_time(render_work, time);
542
544 first_render_time_.denoise_time = final_time_approx;
545 }
546
547 if (work_report_reset_average(render_work)) {
548 denoise_time_.reset_average();
549 }
550
551 denoise_time_.add_average(final_time_approx);
552
553 LOG_DEBUG << "Average denoising time: " << denoise_time_.get_average() << " seconds.";
554}
555
557 const double time)
558{
559 volume_guiding_denoise_time_.add_wall(time);
560
561 const double final_time_approx = approximate_final_time(render_work, time);
562
563 if (work_report_reset_average(render_work)) {
564 volume_guiding_denoise_time_.reset_average();
565 }
566
567 volume_guiding_denoise_time_.add_average(final_time_approx, render_work.path_trace.num_samples);
568
569 LOG_DEBUG << "Average volume guiding denoising time: "
570 << volume_guiding_denoise_time_.get_average() << " seconds.";
571}
572
573void RenderScheduler::report_display_update_time(const RenderWork &render_work, const double time)
574{
575 display_update_time_.add_wall(time);
576
577 const double final_time_approx = approximate_final_time(render_work, time);
578
580 first_render_time_.display_update_time = final_time_approx;
581 }
582
583 if (work_report_reset_average(render_work)) {
584 display_update_time_.reset_average();
585 }
586
587 display_update_time_.add_average(final_time_approx);
588
589 LOG_DEBUG << "Average display update time: " << display_update_time_.get_average()
590 << " seconds.";
591
592 /* Move the display update moment further in time, so that logic which checks when last update
593 * did happen have more reliable point in time (without path tracing and denoising parts of the
594 * render work). */
595 state_.last_display_update_time = time_dt();
596}
597
599 const double time,
600 bool balance_changed)
601{
602 rebalance_time_.add_wall(time);
603
604 if (work_report_reset_average(render_work)) {
605 rebalance_time_.reset_average();
606 }
607
608 rebalance_time_.add_average(time);
609
610 if (balance_changed) {
611 ++state_.num_rebalance_changes;
612 }
613
614 state_.last_rebalance_changed = balance_changed;
615
616 LOG_DEBUG << "Average rebalance time: " << rebalance_time_.get_average() << " seconds.";
617}
618
620{
621 const double render_wall_time = state_.end_render_time - state_.start_render_time;
623
624 string result = "\nRender Scheduler Summary\n\n";
625
626 {
627 string mode;
628 if (headless_) {
629 mode = "Headless";
630 }
631 else if (background_) {
632 mode = "Background";
633 }
634 else {
635 mode = "Interactive";
636 }
637 result += "Mode: " + mode + "\n";
638 }
639
640 result += "Resolution: " + to_string(buffer_params_.width) + "x" +
641 to_string(buffer_params_.height) + "\n";
642
643 result += "\nAdaptive sampling:\n";
644 result += " Use: " + string_from_bool(adaptive_sampling_.use) + "\n";
645 if (adaptive_sampling_.use) {
646 result += " Step: " + to_string(adaptive_sampling_.adaptive_step) + "\n";
647 result += " Min Samples: " + to_string(adaptive_sampling_.min_samples) + "\n";
648 result += " Threshold: " + to_string(adaptive_sampling_.threshold) + "\n";
649 }
650
651 result += "\nDenoiser:\n";
652 result += " Use: " + string_from_bool(denoiser_params_.use) + "\n";
653 if (denoiser_params_.use) {
654 result += " Type: " + string(denoiserTypeToHumanReadable(denoiser_params_.type)) + "\n";
655 result += " Start Sample: " + to_string(denoiser_params_.start_sample) + "\n";
656
657 string passes = "Color";
658 if (denoiser_params_.use_pass_albedo) {
659 passes += ", Albedo";
660 }
661 if (denoiser_params_.use_pass_normal) {
662 passes += ", Normal";
663 }
664
665 result += " Passes: " + passes + "\n";
666 }
667
668 if (state_.num_rebalance_requested) {
669 result += "\nRebalancer:\n";
670 result += " Number of requested rebalances: " + to_string(state_.num_rebalance_requested) +
671 "\n";
672 result += " Number of performed rebalances: " + to_string(state_.num_rebalance_changes) +
673 "\n";
674 }
675
676 result += "\nTime (in seconds):\n";
677 result += string_printf(" %20s %20s %20s\n", "", "Wall", "Average");
678 result += string_printf(" %20s %20f %20f\n",
679 "Path Tracing",
680 path_trace_time_.get_wall(),
681 path_trace_time_.get_average());
682
683 if (adaptive_sampling_.use) {
684 result += string_printf(" %20s %20f %20f\n",
685 "Adaptive Filter",
686 adaptive_filter_time_.get_wall(),
687 adaptive_filter_time_.get_average());
688 }
689
690 if (denoiser_params_.use) {
692 " %20s %20f %20f\n", "Denoiser", denoise_time_.get_wall(), denoise_time_.get_average());
693 }
694
695 result += string_printf(" %20s %20f %20f\n",
696 "Display Update",
697 display_update_time_.get_wall(),
698 display_update_time_.get_average());
699
700 if (state_.num_rebalance_requested) {
701 result += string_printf(" %20s %20f %20f\n",
702 "Rebalance",
703 rebalance_time_.get_wall(),
704 rebalance_time_.get_average());
705 }
706
707 const double total_time = path_trace_time_.get_wall() + adaptive_filter_time_.get_wall() +
708 denoise_time_.get_wall() + display_update_time_.get_wall();
709 result += "\n Total: " + to_string(total_time) + "\n";
710
712 "\nRendered %d samples in %f seconds\n", num_rendered_samples, render_wall_time);
713
714 /* When adaptive sampling is used the average time becomes meaningless, because different samples
715 * will likely render different number of pixels. */
716 if (!adaptive_sampling_.use) {
717 result += string_printf("Average time per sample: %f seconds\n",
718 render_wall_time / num_rendered_samples);
719 }
720
721 return result;
722}
723
728
730 int num_rendered_samples) const
731{
734
735 if (time_limit_ != 0.0 && state_.start_render_time != 0.0) {
736 const double remaining_render_time = max(0.0,
737 time_limit_ - (time_dt() - state_.start_render_time));
738
739 update_interval = min(update_interval, remaining_render_time);
740 }
741
742 return update_interval;
743}
744
745/* TODO(sergey): This is just a quick implementation, exact values might need to be tweaked based
746 * on a more careful experiments with viewport rendering. */
748 int num_rendered_samples) const
749{
750 /* TODO(sergey): Need a decision on whether this should be using number of samples rendered
751 * within the current render session, or use absolute number of samples with the start sample
752 * taken into account. It will depend on whether the start sample offset clears the render
753 * buffer. */
754
755 if (state_.need_rebalance_at_next_work) {
756 return 0.1;
757 }
758 if (state_.last_rebalance_changed) {
759 return 0.2;
760 }
761
762 if (headless_) {
763 /* In headless mode do rare updates, so that the device occupancy is high, but there are still
764 * progress messages printed to the logs. */
765 return 30.0;
766 }
767
768 if (background_) {
769 if (num_rendered_samples < 32) {
770 return 1.0;
771 }
772 return 2.0;
773 }
774
775 /* Render time and number of samples rendered are used to figure out the display update interval.
776 * Render time is used to allow for fast display updates in the first few seconds of rendering
777 * on fast devices. Number of samples rendered is used to allow for potentially quicker display
778 * updates on slow devices during the first few samples. */
779 const double render_time = path_trace_time_.get_wall();
780 if (render_time < 1) {
781 return 0.1;
782 }
783 if (render_time < 2) {
784 return 0.25;
785 }
786 if (render_time < 4) {
787 return 0.5;
788 }
789 if (render_time < 8 || num_rendered_samples < 32) {
790 return 1.0;
791 }
792 return 2.0;
793}
794
796{
797 const double time_per_sample_average = path_trace_time_.get_average();
798 /* Fall back to 1 sample if we have not recorded a time yet. */
799 if (time_per_sample_average == 0.0) {
800 return 1;
801 }
802
803 const double num_samples_in_second = pixel_size_ * pixel_size_ / time_per_sample_average;
804
805 const double update_interval_in_seconds = guess_display_update_interval_in_seconds();
806
807 return max(int(num_samples_in_second * update_interval_in_seconds), 1);
808}
809
811{
812 return sample_offset_ + state_.num_rendered_samples;
813}
814
815/* Round number of samples to the closest power of two.
816 * Rounding might happen to higher or lower value depending on which one is closer. Such behavior
817 * allows to have number of samples to be power of two without diverging from the planned number of
818 * samples too much. */
819static inline uint round_num_samples_to_power_of_2(const uint num_samples)
820{
821 if (num_samples == 1) {
822 return 1;
823 }
824
825 if (is_power_of_two(num_samples)) {
826 return num_samples;
827 }
828
829 const uint num_samples_up = next_power_of_two(num_samples);
830 const uint num_samples_down = num_samples_up - (num_samples_up >> 1);
831
832 const uint delta_up = num_samples_up - num_samples;
833 const uint delta_down = num_samples - num_samples_down;
834
835 if (delta_up <= delta_down) {
836 return num_samples_up;
837 }
838
839 return num_samples_down;
840}
841
843{
844 if (state_.resolution_divider != pixel_size_) {
845 return get_num_samples_during_navigation(state_.resolution_divider);
846 }
847
848 /* Always start full resolution render with a single sample. Gives more instant feedback to
849 * artists, and allows to gather information for a subsequent path tracing works. Do it in the
850 * headless mode as well, to give some estimate of how long samples are taking. */
851 if (state_.num_rendered_samples == 0) {
852 return 1;
853 }
854
855 const int num_samples_per_update = calculate_num_samples_per_update();
856 const int path_trace_start_sample = get_start_sample_to_path_trace();
857
858 /* Round number of samples to a power of two, so that division of path states into tiles goes in
859 * a more integer manner.
860 * This might make it so updates happens more rarely due to rounding up. In the test scenes this
861 * is not huge deal because it is not seen that more than 8 samples can be rendered between
862 * updates. If that becomes a problem we can add some extra rules like never allow to round up
863 * more than N samples. */
864 const int num_samples_pot = round_num_samples_to_power_of_2(num_samples_per_update);
865
866 const int max_num_samples_to_render = sample_offset_ + num_samples_ - path_trace_start_sample;
867
868 int num_samples_to_render = min(num_samples_pot, max_num_samples_to_render);
869
870 /* When enough statistics is available and doing an offline rendering prefer to keep device
871 * occupied. */
872 if (state_.occupancy_num_samples && (background_ || headless_)) {
873 /* Keep occupancy at about 0.5 (this is more of an empirical figure which seems to match scenes
874 * with good performance without forcing occupancy to be higher). */
875 int num_samples_to_occupy = state_.occupancy_num_samples;
876 float ratio_to_increase_occupancy = 1.0f;
877 if (state_.occupancy > 0 && state_.occupancy < 0.5f) {
878 ratio_to_increase_occupancy = 0.7f / state_.occupancy;
879 num_samples_to_occupy = lround(state_.occupancy_num_samples * ratio_to_increase_occupancy);
880 }
881
882 /* Time limit for path tracing, which constraints the scheduler from "over-scheduling" work
883 * in scenes which have very long path trace times and low occupancy. This allows faster
884 * feedback of render results, and faster canceling when artists notice something is wrong.
885 *
886 * Additionally, when the time limit is enabled, do not render more samples than it is needed
887 * to reach the time limit. */
888 double path_tracing_time_limit = 0;
889 if (headless_) {
890 /* In the headless (command-line) render "over-scheduling" is not as bad, as it ensures the
891 * best possible render time. */
892 }
893 else if (background_) {
894 /* For the first few seconds prefer quicker updates, giving it a better chance for artists
895 * to cancel render early on when they notice something is wrong. After that increase the
896 * update times a lot, giving the best possible performance on a complicated scenes like
897 * the Spring splash screen (where occupancy is just very bad). */
898 if (state_.start_render_time == 0.0 || time_dt() - state_.start_render_time < 10) {
899 path_tracing_time_limit = 2.0;
900 }
901 else {
902 path_tracing_time_limit = 15.0;
903 }
904 }
905 else {
906 /* Viewport render: prefer faster updates over overall render time reduction. */
907 /* TODO: Look into enabling this entire code-path for the viewport as well, allowing
908 * compensation even in viewport (currently parent scope checks for non-viewport render). */
909 path_tracing_time_limit = guess_display_update_interval_in_seconds();
910 }
911 if (time_limit_ != 0.0 && state_.start_render_time != 0.0) {
912 const double remaining_render_time = max(
913 0.0, time_limit_ - (time_dt() - state_.start_render_time));
914 if (path_tracing_time_limit == 0) {
915 path_tracing_time_limit = remaining_render_time;
916 }
917 else {
918 path_tracing_time_limit = min(path_tracing_time_limit, remaining_render_time);
919 }
920 }
921 if (path_tracing_time_limit != 0) {
922 /* Use the per-sample time from the previously rendered batch of samples, so that the
923 * correction is applied much quicker. Also use the predicted increase in performance from
924 * increased occupany. */
925 const double predicted_render_time = num_samples_to_occupy *
926 path_trace_time_.get_last_sample_time() /
927 ratio_to_increase_occupancy;
928 if (predicted_render_time > path_tracing_time_limit) {
929 num_samples_to_occupy = lround(num_samples_to_occupy *
930 (path_tracing_time_limit / predicted_render_time));
931 }
932 }
933
934 num_samples_to_render = max(num_samples_to_render,
935 min(num_samples_to_occupy, max_num_samples_to_render));
936 }
937
939 num_samples_to_render = min(limit_samples_per_update_, num_samples_to_render);
940 }
941
942 /* If adaptive sampling is not use, render as many samples per update as possible, keeping
943 * the device fully occupied, without much overhead of display updates. */
944 if (!adaptive_sampling_.use) {
945 return num_samples_to_render;
946 }
947
948 /* TODO(sergey): Add extra "clamping" here so that none of the filtering points is missing. This
949 * is to ensure that the final render is pixel-matched regardless of how many samples per second
950 * compute device can do. */
951
952 return adaptive_sampling_.align_samples(path_trace_start_sample - sample_offset_,
953 num_samples_to_render);
954}
955
957{
958 /* Special trick for fast navigation: schedule multiple samples during fast navigation
959 * (which will prefer to use lower resolution to keep up with refresh rate). This gives more
960 * usable visual feedback for artists. */
961
963 /* When denoising is used during navigation prefer using a higher resolution with less samples
964 * (scheduling less samples here will make it so the resolution_divider calculation will use a
965 * lower value for the divider). This is because both OpenImageDenoise and OptiX denoiser
966 * give visually better results on a higher resolution image with less samples. */
967 return 1;
968 }
969
970 /* Schedule samples equal to the resolution divider up to a maximum of 4, limited by the maximum
971 * number of samples overall.
972 * The idea is to have enough information on the screen by increasing the sample count as the
973 * resolution is decreased. */
974 const int max_navigation_samples = min(num_samples_, 4);
975 /* NOTE: Changing this formula will change the formula in
976 * `RenderScheduler::calculate_resolution_divider_for_time()`. */
977 return min(max(1, resolution_divider / pixel_size_), max_navigation_samples);
978}
979
984
986{
988 return adaptive_sampling_.threshold;
989 }
990
991 return max(state_.adaptive_sampling_threshold, adaptive_sampling_.threshold);
992}
993
995{
997 return false;
998 }
999
1000 if (done()) {
1001 /* No need to denoise after the last sample. */
1002 return false;
1003 }
1004
1005 return true;
1006}
1007
1008bool RenderScheduler::work_need_denoise(bool &delayed, bool &ready_to_display)
1009{
1010 delayed = false;
1011 ready_to_display = true;
1012
1013 if (!denoiser_params_.use) {
1014 /* Denoising is disabled, no need to scheduler work for it. */
1015 return false;
1016 }
1017
1018 /* When multiple tiles are used the full frame will be denoised.
1019 * Avoid per-tile denoising to save up render time. */
1020 if (tile_manager_.has_multiple_tiles()) {
1021 return false;
1022 }
1023
1024 if (done()) {
1025 /* Always denoise at the last sample. */
1026 return true;
1027 }
1028
1029 if (background_) {
1030 /* Background render, only denoise when rendering the last sample. */
1031 /* TODO(sergey): Follow similar logic to viewport, giving an overview of how final denoised
1032 * image looks like even for the background rendering. */
1033 return false;
1034 }
1035
1036 /* Viewport render. */
1037
1038 /* Navigation might render multiple samples at a lower resolution. Those are not to be counted as
1039 * final samples. */
1040 const int num_samples_finished = state_.resolution_divider == pixel_size_ ?
1041 state_.num_rendered_samples :
1042 1;
1043
1044 /* Immediately denoise when we reach the start sample or last sample. */
1045 if (num_samples_finished == denoiser_params_.start_sample ||
1046 num_samples_finished == num_samples_)
1047 {
1048 return true;
1049 }
1050
1051 /* Do not denoise until the sample at which denoising should start is reached. */
1052 if (num_samples_finished < denoiser_params_.start_sample) {
1053 ready_to_display = false;
1054 return false;
1055 }
1056
1057 /* Avoid excessive denoising in viewport after reaching a certain sample count and render time.
1058 */
1059 /* TODO(sergey): Consider making time interval and sample configurable. */
1060 delayed = (path_trace_time_.get_wall() > 4 && num_samples_finished >= 20 &&
1061 (time_dt() - state_.last_display_update_time) < 1.0);
1062
1063 return !delayed;
1064}
1065
1066bool RenderScheduler::work_need_update_display(const bool denoiser_delayed)
1067{
1068 if (headless_) {
1069 /* Force disable display update in headless mode. There will be nothing to display the
1070 * in-progress result. */
1071 return false;
1072 }
1073
1074 if (denoiser_delayed) {
1075 /* If denoiser has been delayed the display can not be updated as it will not contain
1076 * up-to-date state of the render result. */
1077 return false;
1078 }
1079
1080 if (!adaptive_sampling_.use) {
1081 /* When adaptive sampling is not used the work is scheduled in a way that they keep render
1082 * device busy for long enough, so that the display update can happen right after the
1083 * rendering. */
1084 return true;
1085 }
1086
1087 if (done() || state_.last_display_update_sample == -1) {
1088 /* Make sure an initial and final results of adaptive sampling is communicated ot the display.
1089 */
1090 return true;
1091 }
1092
1093 /* For the development purposes of adaptive sampling it might be very useful to see all updates
1094 * of active pixels after convergence check. However, it would cause a slowdown for regular usage
1095 * users. Possibly, make it a debug panel option to allow rapid update to ease development
1096 * without need to re-compiled. */
1097 // if (work_need_adaptive_filter()) {
1098 // return true;
1099 // }
1100
1101 /* When adaptive sampling is used, its possible that only handful of samples of a very simple
1102 * scene will be scheduled to a powerful device (in order to not "miss" any of filtering points).
1103 * We take care of skipping updates here based on when previous display update did happen. */
1104 const double update_interval = guess_display_update_interval_in_seconds_for_num_samples(
1105 state_.last_display_update_sample);
1106 return (time_dt() - state_.last_display_update_time) > update_interval;
1107}
1108
1110{
1111 /* This is the minimum time, as the rebalancing can not happen more often than the path trace
1112 * work. */
1113 static const double kRebalanceIntervalInSeconds = 1;
1114
1116 return false;
1117 }
1118
1119 if (state_.resolution_divider != pixel_size_) {
1120 /* Don't rebalance at a non-final resolution divider. Some reasons for this:
1121 * - It will introduce unnecessary during navigation.
1122 * - Per-render device timing information is not very reliable yet. */
1123 return false;
1124 }
1125
1126 if (state_.num_rendered_samples == 0) {
1127 state_.need_rebalance_at_next_work = true;
1128 return false;
1129 }
1130
1131 if (state_.need_rebalance_at_next_work) {
1132 state_.need_rebalance_at_next_work = false;
1133 return true;
1134 }
1135
1136 if (state_.last_rebalance_changed) {
1137 return true;
1138 }
1139
1140 return (time_dt() - state_.last_rebalance_time) > kRebalanceIntervalInSeconds;
1141}
1142
1144{
1146 return;
1147 }
1148
1149 /* Calculate the maximum resolution divider possible while keeping the long axis of the viewport
1150 * above our preferred minimum axis size (128). */
1151 const int long_viewport_axis = max(buffer_params_.width, buffer_params_.height);
1152 const int max_res_divider_for_desired_size = long_viewport_axis / 128;
1153
1154 if (start_resolution_divider_ == 0) {
1155 /* Resolution divider has never been calculated before: start with a high resolution divider so
1156 * that we have a somewhat good initial behavior, giving a chance to collect real numbers. */
1158 max_res_divider_for_desired_size);
1159 LOG_DEBUG << "Initial resolution divider is " << start_resolution_divider_;
1160 return;
1161 }
1162
1163 if (first_render_time_.path_trace_per_sample == 0.0) {
1164 /* Not enough information to calculate better resolution, keep the existing one. */
1165 return;
1166 }
1167
1168 const double desired_update_interval_in_seconds =
1170
1171 const double actual_time_per_update = first_render_time_.path_trace_per_sample +
1172 first_render_time_.denoise_time +
1173 first_render_time_.display_update_time;
1174
1175 /* Allow some percent of tolerance, so that if the render time is close enough to the higher
1176 * resolution we prefer to use it instead of going way lower resolution and time way below the
1177 * desired one. */
1178 const int resolution_divider_for_update = calculate_resolution_divider_for_time(
1179 desired_update_interval_in_seconds * 1.4, actual_time_per_update);
1180
1181 /* TODO(sergey): Need to add hysteresis to avoid resolution divider bouncing around when actual
1182 * render time is somewhere on a boundary between two resolutions. */
1183
1184 /* Don't let resolution drop below the desired one. It's better to be slow than provide an
1185 * unreadable viewport render. */
1186 start_resolution_divider_ = min(resolution_divider_for_update, max_res_divider_for_desired_size);
1187
1188 LOG_DEBUG << "Calculated resolution divider is " << start_resolution_divider_;
1189}
1190
1192{
1194 /* Use lower value than the non-denoised case to allow having more pixels to reconstruct the
1195 * image from. With the faster updates and extra compute required the resolution becomes too
1196 * low to give usable feedback. */
1197 /* NOTE: Based on performance of OpenImageDenoise on CPU. For OptiX denoiser or other denoiser
1198 * on GPU the value might need to become lower for faster navigation. */
1199 return 1.0 / 12.0;
1200 }
1201
1202 /* For the best match with the Blender's viewport the refresh ratio should be 60fps. This will
1203 * avoid "jelly" effects. However, on a non-trivial scenes this can only be achieved with high
1204 * values of the resolution divider which does not give very pleasant updates during navigation.
1205 * Choose less frequent updates to allow more noise-free and higher resolution updates. */
1206
1207 /* TODO(sergey): Can look into heuristic which will allow to have 60fps if the resolution divider
1208 * is not too high. Alternatively, synchronize Blender's overlays updates to Cycles updates. */
1209
1210 return 1.0 / 30.0;
1211}
1212
1214{
1215 if (!denoiser_params_.use) {
1216 return false;
1217 }
1218
1219 if (denoiser_params_.start_sample > 1) {
1220 return false;
1221 }
1222
1223 return true;
1224}
1225
1227{
1228 return render_work.resolution_divider == pixel_size_ &&
1229 render_work.path_trace.start_sample == sample_offset_;
1230}
1231
1233{
1234 /* When rendering at a non-final resolution divider time average is not very useful because it
1235 * will either bias average down (due to lower render times on the smaller images) or will give
1236 * incorrect result when trying to estimate time which would have spent on the final resolution.
1237 *
1238 * So we only accumulate average for the latest resolution divider which was rendered. */
1239 return render_work.resolution_divider != pixel_size_;
1240}
1241
1243{
1244 if (time_limit_ == 0.0) {
1245 /* No limit is enforced. */
1246 return;
1247 }
1248
1249 if (state_.start_render_time == 0.0) {
1250 /* Rendering did not start yet. */
1251 return;
1252 }
1253
1254 const double current_time = time_dt();
1255
1256 if (current_time - state_.start_render_time < time_limit_) {
1257 /* Time limit is not reached yet. */
1258 return;
1259 }
1260
1261 state_.time_limit_reached = true;
1262 state_.end_render_time = current_time;
1263}
1264
1265/* --------------------------------------------------------------------
1266 * Utility functions.
1267 */
1268
1270 const double actual_time)
1271{
1272 const double ratio_between_times = actual_time / desired_time;
1273
1274 /* We can pass `ratio_between_times` to `get_num_samples_during_navigation()` to get our
1275 * navigation samples because the equation for calculating the resolution divider is as follows:
1276 * `actual_time / desired_time = sqr(resolution_divider) / sample_count`.
1277 * While `resolution_divider` is less than or equal to 4, `resolution_divider = sample_count`
1278 * (This relationship is determined in `get_num_samples_during_navigation()`). With some
1279 * substitution we end up with `actual_time / desired_time = resolution_divider` while the
1280 * resolution divider is less than or equal to 4. Once the resolution divider increases above 4,
1281 * the relationship of `actual_time / desired_time = resolution_divider` is no longer true,
1282 * however the sample count retrieved from `get_num_samples_during_navigation()` is still
1283 * accurate if we continue using this assumption. It should be noted that the interaction between
1284 * `pixel_size`, sample count, and resolution divider are automatically accounted for and that's
1285 * why `pixel_size` isn't included in any of the equations. */
1286 const int navigation_samples = get_num_samples_during_navigation(
1287 ceil_to_int(ratio_between_times));
1288
1289 return ceil_to_int(sqrt(navigation_samples * ratio_between_times));
1290}
1291
1292int calculate_resolution_divider_for_resolution(int width, int height, const int resolution)
1293{
1294 if (resolution == INT_MAX) {
1295 return 1;
1296 }
1297
1298 int resolution_divider = 1;
1299 while (width * height > resolution * resolution) {
1300 width = max(1, width / 2);
1301 height = max(1, height / 2);
1302
1303 resolution_divider <<= 1;
1304 }
1305
1306 return resolution_divider;
1307}
1308
1310 const int height,
1311 const int resolution_divider)
1312{
1313 const int pixel_area = width * height;
1314 const int resolution = lround(sqrt(pixel_area));
1315
1316 return resolution / resolution_divider;
1317}
1318
unsigned int uint
void reset()
clear internal cached data and reset random seed
static const int MAX_SAMPLES
Definition integrator.h:80
AdaptiveSampling adaptive_sampling_
DenoiseParams denoiser_params_
void set_time_limit(const double time_limit)
void update_state_for_render_work(const RenderWork &render_work)
void report_path_trace_occupancy(const RenderWork &render_work, const float occupancy)
double guess_display_update_interval_in_seconds_for_num_samples(const int num_rendered_samples) const
float work_adaptive_threshold() const
int calculate_num_samples_per_update() const
int get_rendered_sample() const
void report_adaptive_filter_time(const RenderWork &render_work, const double time, bool is_cancelled)
struct RenderScheduler::@160041044210107046321320116341171004134167053234 state_
bool work_need_update_display(const bool denoiser_delayed)
BufferParams buffer_params_
void set_need_schedule_rebalance(bool need_schedule_rebalance)
double guess_display_update_interval_in_seconds() const
bool work_need_denoise(bool &delayed, bool &ready_to_display)
bool work_need_adaptive_filter() const
int get_num_rendered_samples() const
string full_report() const
bool need_schedule_rebalance_works_
bool is_denoise_active_during_update() const
void report_denoise_time(const RenderWork &render_work, const double time)
bool set_postprocess_render_work(RenderWork *render_work)
struct RenderScheduler::@363331175056261256267056367351034116315347004137 first_render_time_
double get_time_limit() const
double guess_display_update_interval_in_seconds_for_num_samples_no_limit(int num_rendered_samples) const
bool work_is_usable_for_first_render_estimation(const RenderWork &render_work)
int calculate_resolution_divider_for_time(const double desired_time, const double actual_time)
TimeWithAverage volume_guiding_denoise_time_
bool is_adaptive_sampling_used() const
void set_denoiser_params(const DenoiseParams &params)
bool render_work_reschedule_on_idle(RenderWork &render_work)
int get_num_samples() const
TimeWithAverage adaptive_filter_time_
void report_display_update_time(const RenderWork &render_work, const double time)
TimeWithAverage rebalance_time_
void set_sample_params(const int num_samples, const bool use_sample_subset, const int sample_subset_offset, const int sample_subset_length)
int get_sample_offset() const
int default_start_resolution_divider_
bool is_denoiser_gpu_used() const
TimeWithAverage display_update_time_
void reset(const BufferParams &buffer_params)
void set_full_frame_render_work(RenderWork *render_work)
double guess_viewport_navigation_update_interval_in_seconds() const
void update_start_resolution_divider()
void report_path_trace_time(const RenderWork &render_work, const double time, bool is_cancelled)
void set_adaptive_sampling(const AdaptiveSampling &adaptive_sampling)
int get_num_samples_during_navigation(const int resolution_divider) const
bool render_work_reschedule_on_converge(RenderWork &render_work)
TileManager & tile_manager_
bool work_report_reset_average(const RenderWork &render_work)
bool volume_guiding_need_denoise() const
TimeWithAverage path_trace_time_
int get_start_sample_to_path_trace() const
int get_num_samples_to_path_trace() const
void render_work_reschedule_on_cancel(RenderWork &render_work)
void report_work_begin(const RenderWork &render_work)
TimeWithAverage denoise_time_
void set_need_schedule_cryptomatte(bool need_schedule_cryptomatte)
void report_rebalance_time(const RenderWork &render_work, const double time, bool balance_changed)
void set_limit_samples_per_update(const int limit_samples)
RenderWork get_render_work()
RenderScheduler(TileManager &tile_manager, const SessionParams &params)
bool is_background() const
void report_volume_guiding_denoise_time(const RenderWork &render_work, const double time)
struct RenderWork::@274302037211333357242061375066056076354104241257 cryptomatte
bool use_denoised_result
struct RenderWork::@143272241044374345203044061122063322004171016113 tile
bool init_render_buffers
struct RenderWork::@332011177057136214007215372066145235154020032310 path_trace
struct RenderWork::@165363207370354371245137325023006326210217053004 adaptive_sampling
struct RenderWork::@226364033061301324161311275016362103243040016376 display
struct RenderWork::@200361311027272113377377324353154145102345065344 full
CCL_NAMESPACE_BEGIN const char * denoiserTypeToHumanReadable(DenoiserType type)
Definition denoise.cpp:9
#define CCL_NAMESPACE_END
static const char * to_string(const Interpolation &interp)
Definition gl_shader.cc:103
#define sqrt
uiWidgetBaseParameters params[MAX_WIDGET_BASE_BATCH]
#define LOG_DEBUG
Definition log.h:107
#define DCHECK_GT(a, b)
Definition log.h:145
ccl_device_inline int ceil_to_int(const float f)
Definition math_base.h:424
ccl_device_inline uint next_power_of_two(const uint x)
Definition math_base.h:786
int calculate_resolution_divider_for_resolution(int width, int height, const int resolution)
int calculate_resolution_for_divider(const int width, const int height, const int resolution_divider)
static double approximate_final_time(const RenderWork &render_work, const double time)
static uint round_num_samples_to_power_of_2(const uint num_samples)
#define min(a, b)
Definition sort.cc:36
string string_from_bool(bool var)
Definition string.cpp:183
CCL_NAMESPACE_BEGIN string string_printf(const char *format,...)
Definition string.cpp:23
max
Definition text_draw.cc:251
CCL_NAMESPACE_BEGIN double time_dt()
Definition time.cpp:47
ccl_device_inline bool is_power_of_two(const size_t x)
Definition types_base.h:67
double total_time