Blender V4.5
render_scheduler.cpp
Go to the documentation of this file.
1/* SPDX-FileCopyrightText: 2011-2022 Blender Foundation
2 *
3 * SPDX-License-Identifier: Apache-2.0 */
4
6
7#include "scene/integrator.h"
8
9#include "session/session.h"
10#include "session/tile.h"
11
12#include "util/log.h"
13#include "util/time.h"
14
16
17/* --------------------------------------------------------------------
18 * Render scheduler.
19 */
20
22 : headless_(params.headless),
23 background_(params.background),
24 pixel_size_(params.pixel_size),
25 tile_manager_(tile_manager),
26 default_start_resolution_divider_(params.use_resolution_divider ? pixel_size_ * 8 : 0)
27{
29}
30
31void RenderScheduler::set_need_schedule_cryptomatte(bool need_schedule_cryptomatte)
32{
33 need_schedule_cryptomatte_ = need_schedule_cryptomatte;
34}
35
36void RenderScheduler::set_need_schedule_rebalance(bool need_schedule_rebalance)
37{
38 need_schedule_rebalance_works_ = need_schedule_rebalance;
39}
40
42{
43 return background_;
44}
45
50
52{
53 return denoiser_params_.use_gpu;
54}
55
57{
58 limit_samples_per_update_ = limit_samples;
59}
60
62{
63 adaptive_sampling_ = adaptive_sampling;
64}
65
70
71void RenderScheduler::set_sample_params(const int num_samples,
72 const bool use_sample_subset,
73 const int sample_subset_offset,
74 const int sample_subset_length)
75{
78
79 if (use_sample_subset) {
80 sample_offset_ = sample_subset_offset;
82 min(sample_subset_offset + sample_subset_length, num_samples_) - sample_subset_offset, 0);
83 }
84}
85
87{
88 return num_samples_;
89}
90
95
96void RenderScheduler::set_time_limit(const double time_limit)
97{
98 time_limit_ = time_limit;
99}
100
102{
103 return time_limit_;
104}
105
112
114{
115 return state_.num_rendered_samples;
116}
117
118void RenderScheduler::reset(const BufferParams &buffer_params)
119{
120 buffer_params_ = buffer_params;
121
123
124 /* In background mode never do lower resolution render preview, as it is not really supported
125 * by the software. */
127 state_.resolution_divider = 1;
128 }
129 else {
130 state_.user_is_navigating = true;
131 state_.resolution_divider = start_resolution_divider_;
132 }
133
134 state_.num_rendered_samples = 0;
135 state_.last_display_update_time = 0.0;
136 state_.last_display_update_sample = -1;
137
138 state_.last_rebalance_time = 0.0;
139 state_.num_rebalance_requested = 0;
140 state_.num_rebalance_changes = 0;
141 state_.last_rebalance_changed = false;
142 state_.need_rebalance_at_next_work = false;
143
144 /* TODO(sergey): Choose better initial value. */
145 /* NOTE: The adaptive sampling settings might not be available here yet. */
146 state_.adaptive_sampling_threshold = 0.4f;
147
148 state_.last_work_tile_was_denoised = false;
149 state_.tile_result_was_written = false;
150 state_.postprocess_work_scheduled = false;
151 state_.full_frame_work_scheduled = false;
152 state_.full_frame_was_written = false;
153
154 state_.path_trace_finished = false;
155
156 state_.start_render_time = 0.0;
157 state_.end_render_time = 0.0;
158 state_.time_limit_reached = false;
159
160 state_.occupancy_num_samples = 0;
161 state_.occupancy = 1.0f;
162
163 first_render_time_.path_trace_per_sample = 0.0;
164 first_render_time_.denoise_time = 0.0;
165 first_render_time_.display_update_time = 0.0;
166
167 path_trace_time_.reset();
168 denoise_time_.reset();
169 adaptive_filter_time_.reset();
170 display_update_time_.reset();
171 rebalance_time_.reset();
172}
173
178
180{
181 /* Move to the next resolution divider. Assume adaptive filtering is not needed during
182 * navigation. */
183 if (state_.resolution_divider != pixel_size_) {
184 return false;
185 }
186
187 if (render_work_reschedule_on_idle(render_work)) {
188 return true;
189 }
190
191 state_.path_trace_finished = true;
192
193 bool denoiser_delayed;
194 bool denoiser_ready_to_display;
195 render_work.tile.denoise = work_need_denoise(denoiser_delayed, denoiser_ready_to_display);
196
197 render_work.display.update = work_need_update_display(denoiser_delayed);
198 render_work.display.use_denoised_result = denoiser_ready_to_display;
199
200 return false;
201}
202
204{
206 return false;
207 }
208
209 /* Move to the next resolution divider. Assume adaptive filtering is not needed during
210 * navigation. */
211 if (state_.resolution_divider != pixel_size_) {
212 return false;
213 }
214
215 if (adaptive_sampling_.use) {
216 if (state_.adaptive_sampling_threshold > adaptive_sampling_.threshold) {
217 state_.adaptive_sampling_threshold = max(state_.adaptive_sampling_threshold / 2,
218 adaptive_sampling_.threshold);
219
220 render_work.adaptive_sampling.threshold = state_.adaptive_sampling_threshold;
221 render_work.adaptive_sampling.reset = true;
222
223 return true;
224 }
225 }
226
227 return false;
228}
229
231{
232 VLOG_WORK << "Schedule work for cancel.";
233
234 /* Un-schedule samples: they will not be rendered and should not be counted. */
235 state_.num_rendered_samples -= render_work.path_trace.num_samples;
236
237 const bool has_rendered_samples = get_num_rendered_samples() != 0;
238
239 /* Reset all fields of the previous work, canceling things like adaptive sampling filtering and
240 * denoising.
241 * However, need to preserve write requests, since those will not be possible to recover and
242 * writes are only to happen once. */
243 const bool tile_write = render_work.tile.write;
244 const bool full_write = render_work.full.write;
245
246 render_work = RenderWork();
247
248 render_work.tile.write = tile_write;
249 render_work.full.write = full_write;
250
251 /* Do not write tile if it has zero samples in it, treat it similarly to all other tiles which
252 * got canceled. */
253 if (!state_.tile_result_was_written && has_rendered_samples) {
254 render_work.tile.write = true;
255 }
256
257 if (!state_.full_frame_was_written) {
258 render_work.full.write = true;
259 }
260
261 /* Update current tile, but only if any sample was rendered.
262 * Allows to have latest state of tile visible while full buffer is being processed.
263 *
264 * Note that if there are no samples in the current tile its render buffer might have pixels
265 * remained from previous state.
266 *
267 * If the full result was written, then there is no way any updates were made to the render
268 * buffers. And the buffers might have been freed from the device, so display update is not
269 * possible. */
270 if (has_rendered_samples && !state_.full_frame_was_written) {
271 render_work.display.update = true;
272 }
273}
274
276{
277 if (state_.resolution_divider != pixel_size_) {
278 return false;
279 }
280
281 if (state_.path_trace_finished || state_.time_limit_reached) {
282 return true;
283 }
284
286}
287
289{
291
292 const double time_now = time_dt();
293
294 if (done()) {
295 RenderWork render_work;
296 render_work.resolution_divider = state_.resolution_divider;
297
298 if (!set_postprocess_render_work(&render_work)) {
299 set_full_frame_render_work(&render_work);
300 }
301
302 if (!render_work) {
303 state_.end_render_time = time_now;
304 }
305
306 update_state_for_render_work(render_work);
307
308 return render_work;
309 }
310
311 RenderWork render_work;
312
313 if (state_.resolution_divider != pixel_size_) {
314 if (state_.user_is_navigating) {
315 /* Don't progress the resolution divider as the user is currently navigating in the scene. */
316 state_.user_is_navigating = false;
317 }
318 else {
319 /* If the resolution divider is greater than or equal to default_start_resolution_divider_,
320 * drop the resolution divider down to 4. This is so users with slow hardware and thus high
321 * resolution dividers (E.G. 16), get an update to let them know something is happening
322 * rather than having to wait for the full 1:1 render to show up. */
323 state_.resolution_divider = state_.resolution_divider > default_start_resolution_divider_ ?
324 (4 * pixel_size_) :
325 1;
326 }
327
328 state_.resolution_divider = max(state_.resolution_divider, pixel_size_);
329 state_.num_rendered_samples = 0;
330 state_.last_display_update_sample = -1;
331 }
332
333 render_work.resolution_divider = state_.resolution_divider;
334
338
339 render_work.init_render_buffers = (render_work.path_trace.start_sample == get_sample_offset());
340
341 /* NOTE: Rebalance scheduler requires current number of samples to not be advanced forward. */
342 render_work.rebalance = work_need_rebalance();
343
344 /* NOTE: Advance number of samples now, so that filter and denoising check can see that all the
345 * samples are rendered. */
346 state_.num_rendered_samples += render_work.path_trace.num_samples;
347
350 render_work.adaptive_sampling.reset = false;
351
352 bool denoiser_delayed;
353 bool denoiser_ready_to_display;
354 render_work.tile.denoise = work_need_denoise(denoiser_delayed, denoiser_ready_to_display);
355
356 render_work.tile.write = done();
357
358 render_work.display.update = work_need_update_display(denoiser_delayed);
359 render_work.display.use_denoised_result = denoiser_ready_to_display;
360
361 if (done()) {
362 set_postprocess_render_work(&render_work);
363 }
364
365 update_state_for_render_work(render_work);
366
367 return render_work;
368}
369
371{
372 const double time_now = time_dt();
373
374 if (render_work.rebalance) {
375 state_.last_rebalance_time = time_now;
376 ++state_.num_rebalance_requested;
377 }
378
379 /* A fallback display update time, for the case there is an error of display update, or when
380 * there is no display at all. */
381 if (render_work.display.update) {
382 state_.last_display_update_time = time_now;
383 state_.last_display_update_sample = state_.num_rendered_samples;
384 }
385
386 state_.last_work_tile_was_denoised = render_work.tile.denoise;
387 state_.tile_result_was_written |= render_work.tile.write;
388 state_.full_frame_was_written |= render_work.full.write;
389}
390
392{
393 if (state_.postprocess_work_scheduled) {
394 return false;
395 }
396 state_.postprocess_work_scheduled = true;
397
398 bool any_scheduled = false;
399
401 render_work->cryptomatte.postprocess = true;
402 any_scheduled = true;
403 }
404
405 if (denoiser_params_.use && !state_.last_work_tile_was_denoised) {
406 render_work->tile.denoise = !tile_manager_.has_multiple_tiles();
407 any_scheduled = true;
408 }
409
410 if (!state_.tile_result_was_written) {
411 render_work->tile.write = true;
412 any_scheduled = true;
413 }
414
415 if (any_scheduled) {
416 render_work->display.update = true;
417 }
418
419 return any_scheduled;
420}
421
423{
424 if (state_.full_frame_work_scheduled) {
425 return;
426 }
427
428 if (!tile_manager_.has_multiple_tiles()) {
429 /* There is only single tile, so all work has been performed already. */
430 return;
431 }
432
433 if (!tile_manager_.done()) {
434 /* There are still tiles to be rendered. */
435 return;
436 }
437
438 if (state_.full_frame_was_written) {
439 return;
440 }
441
442 state_.full_frame_work_scheduled = true;
443
444 render_work->full.write = true;
445}
446
447/* Knowing time which it took to complete a task at the current resolution divider approximate how
448 * long it would have taken to complete it at a final resolution. */
449static double approximate_final_time(const RenderWork &render_work, const double time)
450{
451 if (render_work.resolution_divider == 1) {
452 return time;
453 }
454
455 const double resolution_divider_sq = render_work.resolution_divider *
456 render_work.resolution_divider;
457 return time * resolution_divider_sq;
458}
459
461{
462 /* Start counting render time when rendering samples at their final resolution.
463 *
464 * NOTE: The work might have the path trace part be all zero: this happens when a post-processing
465 * work is scheduled after the path tracing. Checking for just a start sample doesn't work here
466 * because it might be wrongly 0. Check for whether path tracing is actually happening as it is
467 * expected to happen in the first work. */
468 if (render_work.resolution_divider == pixel_size_ && render_work.path_trace.num_samples != 0 &&
470 {
471 state_.start_render_time = time_dt();
472 }
473}
474
476 const double time,
477 bool is_cancelled)
478{
479 path_trace_time_.add_wall(time);
480
481 if (is_cancelled) {
482 return;
483 }
484
485 const double final_time_approx = approximate_final_time(render_work, time);
486
488 first_render_time_.path_trace_per_sample = final_time_approx /
489 render_work.path_trace.num_samples;
490 }
491
492 if (work_report_reset_average(render_work)) {
493 path_trace_time_.reset_average();
494 }
495
496 path_trace_time_.add_average(final_time_approx, render_work.path_trace.num_samples);
497
498 VLOG_WORK << "Average path tracing time: " << path_trace_time_.get_average() << " seconds.";
499}
500
502 const float occupancy)
503{
504 state_.occupancy_num_samples = render_work.path_trace.num_samples;
505 state_.occupancy = occupancy;
506 VLOG_WORK << "Measured path tracing occupancy: " << occupancy;
507}
508
510 const double time,
511 bool is_cancelled)
512{
513 adaptive_filter_time_.add_wall(time);
514
515 if (is_cancelled) {
516 return;
517 }
518
519 const double final_time_approx = approximate_final_time(render_work, time);
520
521 if (work_report_reset_average(render_work)) {
522 adaptive_filter_time_.reset_average();
523 }
524
525 adaptive_filter_time_.add_average(final_time_approx, render_work.path_trace.num_samples);
526
527 VLOG_WORK << "Average adaptive sampling filter time: " << adaptive_filter_time_.get_average()
528 << " seconds.";
529}
530
531void RenderScheduler::report_denoise_time(const RenderWork &render_work, const double time)
532{
533 denoise_time_.add_wall(time);
534
535 const double final_time_approx = approximate_final_time(render_work, time);
536
538 first_render_time_.denoise_time = final_time_approx;
539 }
540
541 if (work_report_reset_average(render_work)) {
542 denoise_time_.reset_average();
543 }
544
545 denoise_time_.add_average(final_time_approx);
546
547 VLOG_WORK << "Average denoising time: " << denoise_time_.get_average() << " seconds.";
548}
549
550void RenderScheduler::report_display_update_time(const RenderWork &render_work, const double time)
551{
552 display_update_time_.add_wall(time);
553
554 const double final_time_approx = approximate_final_time(render_work, time);
555
557 first_render_time_.display_update_time = final_time_approx;
558 }
559
560 if (work_report_reset_average(render_work)) {
561 display_update_time_.reset_average();
562 }
563
564 display_update_time_.add_average(final_time_approx);
565
566 VLOG_WORK << "Average display update time: " << display_update_time_.get_average()
567 << " seconds.";
568
569 /* Move the display update moment further in time, so that logic which checks when last update
570 * did happen have more reliable point in time (without path tracing and denoising parts of the
571 * render work). */
572 state_.last_display_update_time = time_dt();
573}
574
576 const double time,
577 bool balance_changed)
578{
579 rebalance_time_.add_wall(time);
580
581 if (work_report_reset_average(render_work)) {
582 rebalance_time_.reset_average();
583 }
584
585 rebalance_time_.add_average(time);
586
587 if (balance_changed) {
588 ++state_.num_rebalance_changes;
589 }
590
591 state_.last_rebalance_changed = balance_changed;
592
593 VLOG_WORK << "Average rebalance time: " << rebalance_time_.get_average() << " seconds.";
594}
595
597{
598 const double render_wall_time = state_.end_render_time - state_.start_render_time;
600
601 string result = "\nRender Scheduler Summary\n\n";
602
603 {
604 string mode;
605 if (headless_) {
606 mode = "Headless";
607 }
608 else if (background_) {
609 mode = "Background";
610 }
611 else {
612 mode = "Interactive";
613 }
614 result += "Mode: " + mode + "\n";
615 }
616
617 result += "Resolution: " + to_string(buffer_params_.width) + "x" +
618 to_string(buffer_params_.height) + "\n";
619
620 result += "\nAdaptive sampling:\n";
621 result += " Use: " + string_from_bool(adaptive_sampling_.use) + "\n";
622 if (adaptive_sampling_.use) {
623 result += " Step: " + to_string(adaptive_sampling_.adaptive_step) + "\n";
624 result += " Min Samples: " + to_string(adaptive_sampling_.min_samples) + "\n";
625 result += " Threshold: " + to_string(adaptive_sampling_.threshold) + "\n";
626 }
627
628 result += "\nDenoiser:\n";
629 result += " Use: " + string_from_bool(denoiser_params_.use) + "\n";
630 if (denoiser_params_.use) {
631 result += " Type: " + string(denoiserTypeToHumanReadable(denoiser_params_.type)) + "\n";
632 result += " Start Sample: " + to_string(denoiser_params_.start_sample) + "\n";
633
634 string passes = "Color";
635 if (denoiser_params_.use_pass_albedo) {
636 passes += ", Albedo";
637 }
638 if (denoiser_params_.use_pass_normal) {
639 passes += ", Normal";
640 }
641
642 result += " Passes: " + passes + "\n";
643 }
644
645 if (state_.num_rebalance_requested) {
646 result += "\nRebalancer:\n";
647 result += " Number of requested rebalances: " + to_string(state_.num_rebalance_requested) +
648 "\n";
649 result += " Number of performed rebalances: " + to_string(state_.num_rebalance_changes) +
650 "\n";
651 }
652
653 result += "\nTime (in seconds):\n";
654 result += string_printf(" %20s %20s %20s\n", "", "Wall", "Average");
655 result += string_printf(" %20s %20f %20f\n",
656 "Path Tracing",
657 path_trace_time_.get_wall(),
658 path_trace_time_.get_average());
659
660 if (adaptive_sampling_.use) {
661 result += string_printf(" %20s %20f %20f\n",
662 "Adaptive Filter",
663 adaptive_filter_time_.get_wall(),
664 adaptive_filter_time_.get_average());
665 }
666
667 if (denoiser_params_.use) {
669 " %20s %20f %20f\n", "Denoiser", denoise_time_.get_wall(), denoise_time_.get_average());
670 }
671
672 result += string_printf(" %20s %20f %20f\n",
673 "Display Update",
674 display_update_time_.get_wall(),
675 display_update_time_.get_average());
676
677 if (state_.num_rebalance_requested) {
678 result += string_printf(" %20s %20f %20f\n",
679 "Rebalance",
680 rebalance_time_.get_wall(),
681 rebalance_time_.get_average());
682 }
683
684 const double total_time = path_trace_time_.get_wall() + adaptive_filter_time_.get_wall() +
685 denoise_time_.get_wall() + display_update_time_.get_wall();
686 result += "\n Total: " + to_string(total_time) + "\n";
687
689 "\nRendered %d samples in %f seconds\n", num_rendered_samples, render_wall_time);
690
691 /* When adaptive sampling is used the average time becomes meaningless, because different samples
692 * will likely render different number of pixels. */
693 if (!adaptive_sampling_.use) {
694 result += string_printf("Average time per sample: %f seconds\n",
695 render_wall_time / num_rendered_samples);
696 }
697
698 return result;
699}
700
705
707 int num_rendered_samples) const
708{
711
712 if (time_limit_ != 0.0 && state_.start_render_time != 0.0) {
713 const double remaining_render_time = max(0.0,
714 time_limit_ - (time_dt() - state_.start_render_time));
715
716 update_interval = min(update_interval, remaining_render_time);
717 }
718
719 return update_interval;
720}
721
722/* TODO(sergey): This is just a quick implementation, exact values might need to be tweaked based
723 * on a more careful experiments with viewport rendering. */
725 int num_rendered_samples) const
726{
727 /* TODO(sergey): Need a decision on whether this should be using number of samples rendered
728 * within the current render session, or use absolute number of samples with the start sample
729 * taken into account. It will depend on whether the start sample offset clears the render
730 * buffer. */
731
732 if (state_.need_rebalance_at_next_work) {
733 return 0.1;
734 }
735 if (state_.last_rebalance_changed) {
736 return 0.2;
737 }
738
739 if (headless_) {
740 /* In headless mode do rare updates, so that the device occupancy is high, but there are still
741 * progress messages printed to the logs. */
742 return 30.0;
743 }
744
745 if (background_) {
746 if (num_rendered_samples < 32) {
747 return 1.0;
748 }
749 return 2.0;
750 }
751
752 /* Render time and number of samples rendered are used to figure out the display update interval.
753 * Render time is used to allow for fast display updates in the first few seconds of rendering
754 * on fast devices. Number of samples rendered is used to allow for potentially quicker display
755 * updates on slow devices during the first few samples. */
756 const double render_time = path_trace_time_.get_wall();
757 if (render_time < 1) {
758 return 0.1;
759 }
760 if (render_time < 2) {
761 return 0.25;
762 }
763 if (render_time < 4) {
764 return 0.5;
765 }
766 if (render_time < 8 || num_rendered_samples < 32) {
767 return 1.0;
768 }
769 return 2.0;
770}
771
773{
774 const double time_per_sample_average = path_trace_time_.get_average();
775 /* Fall back to 1 sample if we have not recorded a time yet. */
776 if (time_per_sample_average == 0.0) {
777 return 1;
778 }
779
780 const double num_samples_in_second = pixel_size_ * pixel_size_ / time_per_sample_average;
781
782 const double update_interval_in_seconds = guess_display_update_interval_in_seconds();
783
784 return max(int(num_samples_in_second * update_interval_in_seconds), 1);
785}
786
788{
789 return sample_offset_ + state_.num_rendered_samples;
790}
791
792/* Round number of samples to the closest power of two.
793 * Rounding might happen to higher or lower value depending on which one is closer. Such behavior
794 * allows to have number of samples to be power of two without diverging from the planned number of
795 * samples too much. */
796static inline uint round_num_samples_to_power_of_2(const uint num_samples)
797{
798 if (num_samples == 1) {
799 return 1;
800 }
801
802 if (is_power_of_two(num_samples)) {
803 return num_samples;
804 }
805
806 const uint num_samples_up = next_power_of_two(num_samples);
807 const uint num_samples_down = num_samples_up - (num_samples_up >> 1);
808
809 const uint delta_up = num_samples_up - num_samples;
810 const uint delta_down = num_samples - num_samples_down;
811
812 if (delta_up <= delta_down) {
813 return num_samples_up;
814 }
815
816 return num_samples_down;
817}
818
820{
821 if (state_.resolution_divider != pixel_size_) {
822 return get_num_samples_during_navigation(state_.resolution_divider);
823 }
824
825 /* Always start full resolution render with a single sample. Gives more instant feedback to
826 * artists, and allows to gather information for a subsequent path tracing works. Do it in the
827 * headless mode as well, to give some estimate of how long samples are taking. */
828 if (state_.num_rendered_samples == 0) {
829 return 1;
830 }
831
832 const int num_samples_per_update = calculate_num_samples_per_update();
833 const int path_trace_start_sample = get_start_sample_to_path_trace();
834
835 /* Round number of samples to a power of two, so that division of path states into tiles goes in
836 * a more integer manner.
837 * This might make it so updates happens more rarely due to rounding up. In the test scenes this
838 * is not huge deal because it is not seen that more than 8 samples can be rendered between
839 * updates. If that becomes a problem we can add some extra rules like never allow to round up
840 * more than N samples. */
841 const int num_samples_pot = round_num_samples_to_power_of_2(num_samples_per_update);
842
843 const int max_num_samples_to_render = sample_offset_ + num_samples_ - path_trace_start_sample;
844
845 int num_samples_to_render = min(num_samples_pot, max_num_samples_to_render);
846
847 /* When enough statistics is available and doing an offline rendering prefer to keep device
848 * occupied. */
849 if (state_.occupancy_num_samples && (background_ || headless_)) {
850 /* Keep occupancy at about 0.5 (this is more of an empirical figure which seems to match scenes
851 * with good performance without forcing occupancy to be higher). */
852 int num_samples_to_occupy = state_.occupancy_num_samples;
853 if (state_.occupancy > 0 && state_.occupancy < 0.5f) {
854 num_samples_to_occupy = lround(state_.occupancy_num_samples * 0.7f / state_.occupancy);
855 }
856
857 /* Time limit for path tracing, which constraints the scheduler from "over-scheduling" work
858 * in scenes which have very long path trace times and low occupancy. This allows faster
859 * feedback of render results, and faster canceling when artists notice something is wrong.
860 *
861 * Additionally, when the time limit is enabled, do not render more samples than it is needed
862 * to reach the time limit. */
863 double path_tracing_time_limit = 0;
864 if (headless_) {
865 /* In the headless (command-line) render "over-scheduling" is not as bad, as it ensures the
866 * best possible render time. */
867 }
868 else if (background_) {
869 /* For the first few seconds prefer quicker updates, giving it a better chance for artists
870 * to cancel render early on when they notice something is wrong. After that increase the
871 * update times a lot, giving the best possible performance on a complicated scenes like
872 * the Spring splash screen (where occupancy is just very bad). */
873 if (state_.start_render_time == 0.0 || time_dt() - state_.start_render_time < 10) {
874 path_tracing_time_limit = 2.0;
875 }
876 else {
877 path_tracing_time_limit = 15.0;
878 }
879 }
880 else {
881 /* Viewport render: prefer faster updates over overall render time reduction. */
882 /* TODO: Look into enabling this entire code-path for the viewport as well, allowing
883 * compensation even in viewport (currently parent scope checks for non-viewport render). */
884 path_tracing_time_limit = guess_display_update_interval_in_seconds();
885 }
886 if (time_limit_ != 0.0 && state_.start_render_time != 0.0) {
887 const double remaining_render_time = max(
888 0.0, time_limit_ - (time_dt() - state_.start_render_time));
889 if (path_tracing_time_limit == 0) {
890 path_tracing_time_limit = remaining_render_time;
891 }
892 else {
893 path_tracing_time_limit = min(path_tracing_time_limit, remaining_render_time);
894 }
895 }
896 if (path_tracing_time_limit != 0) {
897 /* Use the per-sample time from the previously rendered batch of samples so that the
898 * correction is applied much quicker. */
899 const double predicted_render_time = num_samples_to_occupy *
900 path_trace_time_.get_last_sample_time();
901 if (predicted_render_time > path_tracing_time_limit) {
902 num_samples_to_occupy = lround(num_samples_to_occupy *
903 (path_tracing_time_limit / predicted_render_time));
904 }
905 }
906
907 num_samples_to_render = max(num_samples_to_render,
908 min(num_samples_to_occupy, max_num_samples_to_render));
909 }
910
912 num_samples_to_render = min(limit_samples_per_update_, num_samples_to_render);
913 }
914
915 /* If adaptive sampling is not use, render as many samples per update as possible, keeping
916 * the device fully occupied, without much overhead of display updates. */
917 if (!adaptive_sampling_.use) {
918 return num_samples_to_render;
919 }
920
921 /* TODO(sergey): Add extra "clamping" here so that none of the filtering points is missing. This
922 * is to ensure that the final render is pixel-matched regardless of how many samples per second
923 * compute device can do. */
924
925 return adaptive_sampling_.align_samples(path_trace_start_sample - sample_offset_,
926 num_samples_to_render);
927}
928
930{
931 /* Special trick for fast navigation: schedule multiple samples during fast navigation
932 * (which will prefer to use lower resolution to keep up with refresh rate). This gives more
933 * usable visual feedback for artists. */
934
936 /* When denoising is used during navigation prefer using a higher resolution with less samples
937 * (scheduling less samples here will make it so the resolution_divider calculation will use a
938 * lower value for the divider). This is because both OpenImageDenoise and OptiX denoiser
939 * give visually better results on a higher resolution image with less samples. */
940 return 1;
941 }
942
943 /* Schedule samples equal to the resolution divider up to a maximum of 4, limited by the maximum
944 * number of samples overall.
945 * The idea is to have enough information on the screen by increasing the sample count as the
946 * resolution is decreased. */
947 const int max_navigation_samples = min(num_samples_, 4);
948 /* NOTE: Changing this formula will change the formula in
949 * `RenderScheduler::calculate_resolution_divider_for_time()`. */
950 return min(max(1, resolution_divider / pixel_size_), max_navigation_samples);
951}
952
957
959{
961 return adaptive_sampling_.threshold;
962 }
963
964 return max(state_.adaptive_sampling_threshold, adaptive_sampling_.threshold);
965}
966
967bool RenderScheduler::work_need_denoise(bool &delayed, bool &ready_to_display)
968{
969 delayed = false;
970 ready_to_display = true;
971
972 if (!denoiser_params_.use) {
973 /* Denoising is disabled, no need to scheduler work for it. */
974 return false;
975 }
976
977 /* When multiple tiles are used the full frame will be denoised.
978 * Avoid per-tile denoising to save up render time. */
979 if (tile_manager_.has_multiple_tiles()) {
980 return false;
981 }
982
983 if (done()) {
984 /* Always denoise at the last sample. */
985 return true;
986 }
987
988 if (background_) {
989 /* Background render, only denoise when rendering the last sample. */
990 /* TODO(sergey): Follow similar logic to viewport, giving an overview of how final denoised
991 * image looks like even for the background rendering. */
992 return false;
993 }
994
995 /* Viewport render. */
996
997 /* Navigation might render multiple samples at a lower resolution. Those are not to be counted as
998 * final samples. */
999 const int num_samples_finished = state_.resolution_divider == pixel_size_ ?
1000 state_.num_rendered_samples :
1001 1;
1002
1003 /* Immediately denoise when we reach the start sample or last sample. */
1004 if (num_samples_finished == denoiser_params_.start_sample ||
1005 num_samples_finished == num_samples_)
1006 {
1007 return true;
1008 }
1009
1010 /* Do not denoise until the sample at which denoising should start is reached. */
1011 if (num_samples_finished < denoiser_params_.start_sample) {
1012 ready_to_display = false;
1013 return false;
1014 }
1015
1016 /* Avoid excessive denoising in viewport after reaching a certain sample count and render time.
1017 */
1018 /* TODO(sergey): Consider making time interval and sample configurable. */
1019 delayed = (path_trace_time_.get_wall() > 4 && num_samples_finished >= 20 &&
1020 (time_dt() - state_.last_display_update_time) < 1.0);
1021
1022 return !delayed;
1023}
1024
1025bool RenderScheduler::work_need_update_display(const bool denoiser_delayed)
1026{
1027 if (headless_) {
1028 /* Force disable display update in headless mode. There will be nothing to display the
1029 * in-progress result. */
1030 return false;
1031 }
1032
1033 if (denoiser_delayed) {
1034 /* If denoiser has been delayed the display can not be updated as it will not contain
1035 * up-to-date state of the render result. */
1036 return false;
1037 }
1038
1039 if (!adaptive_sampling_.use) {
1040 /* When adaptive sampling is not used the work is scheduled in a way that they keep render
1041 * device busy for long enough, so that the display update can happen right after the
1042 * rendering. */
1043 return true;
1044 }
1045
1046 if (done() || state_.last_display_update_sample == -1) {
1047 /* Make sure an initial and final results of adaptive sampling is communicated ot the display.
1048 */
1049 return true;
1050 }
1051
1052 /* For the development purposes of adaptive sampling it might be very useful to see all updates
1053 * of active pixels after convergence check. However, it would cause a slowdown for regular usage
1054 * users. Possibly, make it a debug panel option to allow rapid update to ease development
1055 * without need to re-compiled. */
1056 // if (work_need_adaptive_filter()) {
1057 // return true;
1058 // }
1059
1060 /* When adaptive sampling is used, its possible that only handful of samples of a very simple
1061 * scene will be scheduled to a powerful device (in order to not "miss" any of filtering points).
1062 * We take care of skipping updates here based on when previous display update did happen. */
1063 const double update_interval = guess_display_update_interval_in_seconds_for_num_samples(
1064 state_.last_display_update_sample);
1065 return (time_dt() - state_.last_display_update_time) > update_interval;
1066}
1067
1069{
1070 /* This is the minimum time, as the rebalancing can not happen more often than the path trace
1071 * work. */
1072 static const double kRebalanceIntervalInSeconds = 1;
1073
1075 return false;
1076 }
1077
1078 if (state_.resolution_divider != pixel_size_) {
1079 /* Don't rebalance at a non-final resolution divider. Some reasons for this:
1080 * - It will introduce unnecessary during navigation.
1081 * - Per-render device timing information is not very reliable yet. */
1082 return false;
1083 }
1084
1085 if (state_.num_rendered_samples == 0) {
1086 state_.need_rebalance_at_next_work = true;
1087 return false;
1088 }
1089
1090 if (state_.need_rebalance_at_next_work) {
1091 state_.need_rebalance_at_next_work = false;
1092 return true;
1093 }
1094
1095 if (state_.last_rebalance_changed) {
1096 return true;
1097 }
1098
1099 return (time_dt() - state_.last_rebalance_time) > kRebalanceIntervalInSeconds;
1100}
1101
1103{
1105 return;
1106 }
1107
1108 /* Calculate the maximum resolution divider possible while keeping the long axis of the viewport
1109 * above our preferred minimum axis size (128). */
1110 const int long_viewport_axis = max(buffer_params_.width, buffer_params_.height);
1111 const int max_res_divider_for_desired_size = long_viewport_axis / 128;
1112
1113 if (start_resolution_divider_ == 0) {
1114 /* Resolution divider has never been calculated before: start with a high resolution divider so
1115 * that we have a somewhat good initial behavior, giving a chance to collect real numbers. */
1117 max_res_divider_for_desired_size);
1118 VLOG_WORK << "Initial resolution divider is " << start_resolution_divider_;
1119 return;
1120 }
1121
1122 if (first_render_time_.path_trace_per_sample == 0.0) {
1123 /* Not enough information to calculate better resolution, keep the existing one. */
1124 return;
1125 }
1126
1127 const double desired_update_interval_in_seconds =
1129
1130 const double actual_time_per_update = first_render_time_.path_trace_per_sample +
1131 first_render_time_.denoise_time +
1132 first_render_time_.display_update_time;
1133
1134 /* Allow some percent of tolerance, so that if the render time is close enough to the higher
1135 * resolution we prefer to use it instead of going way lower resolution and time way below the
1136 * desired one. */
1137 const int resolution_divider_for_update = calculate_resolution_divider_for_time(
1138 desired_update_interval_in_seconds * 1.4, actual_time_per_update);
1139
1140 /* TODO(sergey): Need to add hysteresis to avoid resolution divider bouncing around when actual
1141 * render time is somewhere on a boundary between two resolutions. */
1142
1143 /* Don't let resolution drop below the desired one. It's better to be slow than provide an
1144 * unreadable viewport render. */
1145 start_resolution_divider_ = min(resolution_divider_for_update, max_res_divider_for_desired_size);
1146
1147 VLOG_WORK << "Calculated resolution divider is " << start_resolution_divider_;
1148}
1149
1151{
1153 /* Use lower value than the non-denoised case to allow having more pixels to reconstruct the
1154 * image from. With the faster updates and extra compute required the resolution becomes too
1155 * low to give usable feedback. */
1156 /* NOTE: Based on performance of OpenImageDenoise on CPU. For OptiX denoiser or other denoiser
1157 * on GPU the value might need to become lower for faster navigation. */
1158 return 1.0 / 12.0;
1159 }
1160
1161 /* For the best match with the Blender's viewport the refresh ratio should be 60fps. This will
1162 * avoid "jelly" effects. However, on a non-trivial scenes this can only be achieved with high
1163 * values of the resolution divider which does not give very pleasant updates during navigation.
1164 * Choose less frequent updates to allow more noise-free and higher resolution updates. */
1165
1166 /* TODO(sergey): Can look into heuristic which will allow to have 60fps if the resolution divider
1167 * is not too high. Alternatively, synchronize Blender's overlays updates to Cycles updates. */
1168
1169 return 1.0 / 30.0;
1170}
1171
1173{
1174 if (!denoiser_params_.use) {
1175 return false;
1176 }
1177
1178 if (denoiser_params_.start_sample > 1) {
1179 return false;
1180 }
1181
1182 return true;
1183}
1184
1186{
1187 return render_work.resolution_divider == pixel_size_ &&
1188 render_work.path_trace.start_sample == sample_offset_;
1189}
1190
1192{
1193 /* When rendering at a non-final resolution divider time average is not very useful because it
1194 * will either bias average down (due to lower render times on the smaller images) or will give
1195 * incorrect result when trying to estimate time which would have spent on the final resolution.
1196 *
1197 * So we only accumulate average for the latest resolution divider which was rendered. */
1198 return render_work.resolution_divider != pixel_size_;
1199}
1200
1202{
1203 if (time_limit_ == 0.0) {
1204 /* No limit is enforced. */
1205 return;
1206 }
1207
1208 if (state_.start_render_time == 0.0) {
1209 /* Rendering did not start yet. */
1210 return;
1211 }
1212
1213 const double current_time = time_dt();
1214
1215 if (current_time - state_.start_render_time < time_limit_) {
1216 /* Time limit is not reached yet. */
1217 return;
1218 }
1219
1220 state_.time_limit_reached = true;
1221 state_.end_render_time = current_time;
1222}
1223
1224/* --------------------------------------------------------------------
1225 * Utility functions.
1226 */
1227
1229 const double actual_time)
1230{
1231 const double ratio_between_times = actual_time / desired_time;
1232
1233 /* We can pass `ratio_between_times` to `get_num_samples_during_navigation()` to get our
1234 * navigation samples because the equation for calculating the resolution divider is as follows:
1235 * `actual_time / desired_time = sqr(resolution_divider) / sample_count`.
1236 * While `resolution_divider` is less than or equal to 4, `resolution_divider = sample_count`
1237 * (This relationship is determined in `get_num_samples_during_navigation()`). With some
1238 * substitution we end up with `actual_time / desired_time = resolution_divider` while the
1239 * resolution divider is less than or equal to 4. Once the resolution divider increases above 4,
1240 * the relationship of `actual_time / desired_time = resolution_divider` is no longer true,
1241 * however the sample count retrieved from `get_num_samples_during_navigation()` is still
1242 * accurate if we continue using this assumption. It should be noted that the interaction between
1243 * `pixel_size`, sample count, and resolution divider are automatically accounted for and that's
1244 * why `pixel_size` isn't included in any of the equations. */
1245 const int navigation_samples = get_num_samples_during_navigation(
1246 ceil_to_int(ratio_between_times));
1247
1248 return ceil_to_int(sqrt(navigation_samples * ratio_between_times));
1249}
1250
1251int calculate_resolution_divider_for_resolution(int width, int height, const int resolution)
1252{
1253 if (resolution == INT_MAX) {
1254 return 1;
1255 }
1256
1257 int resolution_divider = 1;
1258 while (width * height > resolution * resolution) {
1259 width = max(1, width / 2);
1260 height = max(1, height / 2);
1261
1262 resolution_divider <<= 1;
1263 }
1264
1265 return resolution_divider;
1266}
1267
1269 const int height,
1270 const int resolution_divider)
1271{
1272 const int pixel_area = width * height;
1273 const int resolution = lround(sqrt(pixel_area));
1274
1275 return resolution / resolution_divider;
1276}
1277
unsigned int uint
void reset()
clear internal cached data and reset random seed
static const int MAX_SAMPLES
Definition integrator.h:79
AdaptiveSampling adaptive_sampling_
DenoiseParams denoiser_params_
void set_time_limit(const double time_limit)
void update_state_for_render_work(const RenderWork &render_work)
void report_path_trace_occupancy(const RenderWork &render_work, const float occupancy)
double guess_display_update_interval_in_seconds_for_num_samples(const int num_rendered_samples) const
float work_adaptive_threshold() const
int calculate_num_samples_per_update() const
int get_rendered_sample() const
void report_adaptive_filter_time(const RenderWork &render_work, const double time, bool is_cancelled)
bool work_need_update_display(const bool denoiser_delayed)
BufferParams buffer_params_
void set_need_schedule_rebalance(bool need_schedule_rebalance)
double guess_display_update_interval_in_seconds() const
bool work_need_denoise(bool &delayed, bool &ready_to_display)
bool work_need_adaptive_filter() const
int get_num_rendered_samples() const
string full_report() const
bool need_schedule_rebalance_works_
bool is_denoise_active_during_update() const
void report_denoise_time(const RenderWork &render_work, const double time)
bool set_postprocess_render_work(RenderWork *render_work)
struct RenderScheduler::@307162023347152161146266305120135320017210234154 first_render_time_
double get_time_limit() const
double guess_display_update_interval_in_seconds_for_num_samples_no_limit(int num_rendered_samples) const
bool work_is_usable_for_first_render_estimation(const RenderWork &render_work)
int calculate_resolution_divider_for_time(const double desired_time, const double actual_time)
bool is_adaptive_sampling_used() const
void set_denoiser_params(const DenoiseParams &params)
bool render_work_reschedule_on_idle(RenderWork &render_work)
int get_num_samples() const
TimeWithAverage adaptive_filter_time_
void report_display_update_time(const RenderWork &render_work, const double time)
TimeWithAverage rebalance_time_
void set_sample_params(const int num_samples, const bool use_sample_subset, const int sample_subset_offset, const int sample_subset_length)
int get_sample_offset() const
int default_start_resolution_divider_
bool is_denoiser_gpu_used() const
TimeWithAverage display_update_time_
void reset(const BufferParams &buffer_params)
void set_full_frame_render_work(RenderWork *render_work)
double guess_viewport_navigation_update_interval_in_seconds() const
void update_start_resolution_divider()
void report_path_trace_time(const RenderWork &render_work, const double time, bool is_cancelled)
void set_adaptive_sampling(const AdaptiveSampling &adaptive_sampling)
int get_num_samples_during_navigation(const int resolution_divider) const
bool render_work_reschedule_on_converge(RenderWork &render_work)
TileManager & tile_manager_
bool work_report_reset_average(const RenderWork &render_work)
TimeWithAverage path_trace_time_
int get_start_sample_to_path_trace() const
int get_num_samples_to_path_trace() const
void render_work_reschedule_on_cancel(RenderWork &render_work)
void report_work_begin(const RenderWork &render_work)
TimeWithAverage denoise_time_
void set_need_schedule_cryptomatte(bool need_schedule_cryptomatte)
void report_rebalance_time(const RenderWork &render_work, const double time, bool balance_changed)
void set_limit_samples_per_update(const int limit_samples)
struct RenderScheduler::@103162170344336044223356376364272314126075132375 state_
RenderWork get_render_work()
RenderScheduler(TileManager &tile_manager, const SessionParams &params)
bool is_background() const
struct RenderWork::@234243005336240312017300353332335015376006011032 tile
bool use_denoised_result
bool init_render_buffers
struct RenderWork::@321020210137044117222023120321050035111302276245 adaptive_sampling
struct RenderWork::@077136342205336051112102203105130245030065023152 cryptomatte
struct RenderWork::@020211146253350223233167140037223126305301153241 display
struct RenderWork::@070014147123337267036007142117171267326104033173 path_trace
struct RenderWork::@046146066063235207264076124106046251201154246321 full
CCL_NAMESPACE_BEGIN const char * denoiserTypeToHumanReadable(DenoiserType type)
Definition denoise.cpp:9
#define CCL_NAMESPACE_END
static const char * to_string(const Interpolation &interp)
Definition gl_shader.cc:109
#define sqrt
uiWidgetBaseParameters params[MAX_WIDGET_BASE_BATCH]
#define VLOG_WORK
Definition log.h:74
#define DCHECK_GT(a, b)
Definition log.h:59
ccl_device_inline int ceil_to_int(const float f)
Definition math_base.h:424
ccl_device_inline uint next_power_of_two(const uint x)
Definition math_base.h:771
int calculate_resolution_divider_for_resolution(int width, int height, const int resolution)
int calculate_resolution_for_divider(const int width, const int height, const int resolution_divider)
static double approximate_final_time(const RenderWork &render_work, const double time)
static uint round_num_samples_to_power_of_2(const uint num_samples)
#define min(a, b)
Definition sort.cc:36
string string_from_bool(bool var)
Definition string.cpp:183
CCL_NAMESPACE_BEGIN string string_printf(const char *format,...)
Definition string.cpp:23
max
Definition text_draw.cc:251
CCL_NAMESPACE_BEGIN double time_dt()
Definition time.cpp:38
ccl_device_inline bool is_power_of_two(const size_t x)
Definition types_base.h:67
double total_time