Blender V4.3
binning.cpp
Go to the documentation of this file.
1/* SPDX-FileCopyrightText: 2009-2011 Intel Corporation
2 * SPDX-FileCopyrightText: 2012-2022 Blender Foundation
3 *
4 * SPDX-License-Identifier: Apache-2.0
5 *
6 * Adapted code from Intel Corporation. */
7
8// #define __KERNEL_SSE__
9
10#include "bvh/binning.h"
11
12#include <stdlib.h>
13
14#include "util/algorithm.h"
15#include "util/boundbox.h"
16#include "util/types.h"
17
19
20/* SSE replacements */
21
22__forceinline void prefetch_L1(const void * /*ptr*/) {}
23__forceinline void prefetch_L2(const void * /*ptr*/) {}
24__forceinline void prefetch_L3(const void * /*ptr*/) {}
25__forceinline void prefetch_NTA(const void * /*ptr*/) {}
26
27template<size_t src> __forceinline float extract(const int4 &b)
28{
29 return b[src];
30}
31template<size_t dst> __forceinline const float4 insert(const float4 &a, const float b)
32{
33 float4 r = a;
34 r[dst] = b;
35 return r;
36}
37
38__forceinline int get_best_dimension(const float4 &bestSAH)
39{
40 // return (int)__bsf(movemask(reduce_min(bestSAH) == bestSAH));
41
42 float minSAH = min(bestSAH.x, min(bestSAH.y, bestSAH.z));
43
44 if (bestSAH.x == minSAH) {
45 return 0;
46 }
47 else if (bestSAH.y == minSAH) {
48 return 1;
49 }
50 else {
51 return 2;
52 }
53}
54
55/* BVH Object Binning */
56
58 BVHReference *prims,
59 const BVHUnaligned *unaligned_heuristic,
60 const Transform *aligned_space)
61 : BVHRange(job),
62 splitSAH(FLT_MAX),
63 dim(0),
64 pos(0),
65 unaligned_heuristic_(unaligned_heuristic),
66 aligned_space_(aligned_space)
67{
68 if (aligned_space_ == NULL) {
69 bounds_ = bounds();
71 }
72 else {
73 /* TODO(sergey): With some additional storage we can avoid
74 * need in re-calculating this.
75 */
76 bounds_ = unaligned_heuristic->compute_aligned_boundbox(
77 *this, prims, *aligned_space, &cent_bounds_);
78 }
79
80 /* compute number of bins to use and precompute scaling factor for binning */
81 num_bins = min(size_t(MAX_BINS), size_t(4.0f + 0.05f * size()));
82 scale = rcp(cent_bounds_.size()) * make_float3((float)num_bins);
83
84 /* initialize binning counter and bounds */
85 BoundBox bin_bounds[MAX_BINS][4]; /* bounds for every bin in every dimension */
86 int4 bin_count[MAX_BINS]; /* number of primitives mapped to bin */
87
88 for (size_t i = 0; i < num_bins; i++) {
89 bin_count[i] = make_int4(0);
90 bin_bounds[i][0] = bin_bounds[i][1] = bin_bounds[i][2] = BoundBox::empty;
91 }
92
93 /* map geometry to bins, unrolled once */
94 {
95 int64_t i;
96
97 for (i = 0; i < int64_t(size()) - 1; i += 2) {
98 prefetch_L2(&prims[start() + i + 8]);
99
100 /* map even and odd primitive to bin */
101 const BVHReference &prim0 = prims[start() + i + 0];
102 const BVHReference &prim1 = prims[start() + i + 1];
103
104 BoundBox bounds0 = get_prim_bounds(prim0);
105 BoundBox bounds1 = get_prim_bounds(prim1);
106
107 int4 bin0 = get_bin(bounds0);
108 int4 bin1 = get_bin(bounds1);
109
110 /* increase bounds for bins for even primitive */
111 int b00 = (int)extract<0>(bin0);
112 bin_count[b00][0]++;
113 bin_bounds[b00][0].grow(bounds0);
114 int b01 = (int)extract<1>(bin0);
115 bin_count[b01][1]++;
116 bin_bounds[b01][1].grow(bounds0);
117 int b02 = (int)extract<2>(bin0);
118 bin_count[b02][2]++;
119 bin_bounds[b02][2].grow(bounds0);
120
121 /* increase bounds of bins for odd primitive */
122 int b10 = (int)extract<0>(bin1);
123 bin_count[b10][0]++;
124 bin_bounds[b10][0].grow(bounds1);
125 int b11 = (int)extract<1>(bin1);
126 bin_count[b11][1]++;
127 bin_bounds[b11][1].grow(bounds1);
128 int b12 = (int)extract<2>(bin1);
129 bin_count[b12][2]++;
130 bin_bounds[b12][2].grow(bounds1);
131 }
132
133 /* for uneven number of primitives */
134 if (i < int64_t(size())) {
135 /* map primitive to bin */
136 const BVHReference &prim0 = prims[start() + i];
137 BoundBox bounds0 = get_prim_bounds(prim0);
138 int4 bin0 = get_bin(bounds0);
139
140 /* increase bounds of bins */
141 int b00 = (int)extract<0>(bin0);
142 bin_count[b00][0]++;
143 bin_bounds[b00][0].grow(bounds0);
144 int b01 = (int)extract<1>(bin0);
145 bin_count[b01][1]++;
146 bin_bounds[b01][1].grow(bounds0);
147 int b02 = (int)extract<2>(bin0);
148 bin_count[b02][2]++;
149 bin_bounds[b02][2].grow(bounds0);
150 }
151 }
152
153 /* sweep from right to left and compute parallel prefix of merged bounds */
154 float4 r_area[MAX_BINS]; /* area of bounds of primitives on the right */
155 float4 r_count[MAX_BINS]; /* number of primitives on the right */
156 int4 count = make_int4(0);
157
161
162 for (size_t i = num_bins - 1; i > 0; i--) {
163 count = count + bin_count[i];
164 r_count[i] = blocks(count);
165
166 bx = merge(bx, bin_bounds[i][0]);
167 r_area[i][0] = bx.half_area();
168 by = merge(by, bin_bounds[i][1]);
169 r_area[i][1] = by.half_area();
170 bz = merge(bz, bin_bounds[i][2]);
171 r_area[i][2] = bz.half_area();
172 r_area[i][3] = r_area[i][2];
173 }
174
175 /* sweep from left to right and compute SAH */
176 int4 ii = make_int4(1);
177 float4 bestSAH = make_float4(FLT_MAX);
178 int4 bestSplit = make_int4(-1);
179
180 count = make_int4(0);
181
182 bx = BoundBox::empty;
183 by = BoundBox::empty;
184 bz = BoundBox::empty;
185
186 for (size_t i = 1; i < num_bins; i++, ii += make_int4(1)) {
187 count = count + bin_count[i - 1];
188
189 bx = merge(bx, bin_bounds[i - 1][0]);
190 float Ax = bx.half_area();
191 by = merge(by, bin_bounds[i - 1][1]);
192 float Ay = by.half_area();
193 bz = merge(bz, bin_bounds[i - 1][2]);
194 float Az = bz.half_area();
195
196 float4 lCount = blocks(count);
197 float4 lArea = make_float4(Ax, Ay, Az, Az);
198 float4 sah = lArea * lCount + r_area[i] * r_count[i];
199
200 bestSplit = select(sah < bestSAH, ii, bestSplit);
201 bestSAH = min(sah, bestSAH);
202 }
203
205 bestSAH = insert<3>(select(mask, make_float4(FLT_MAX), bestSAH), FLT_MAX);
206
207 /* find best dimension */
208 dim = get_best_dimension(bestSAH);
209 splitSAH = bestSAH[dim];
210 pos = bestSplit[dim];
212}
213
215 BVHObjectBinning &left_o,
216 BVHObjectBinning &right_o) const
217{
218 size_t N = size();
219
220 BoundBox lgeom_bounds = BoundBox::empty;
221 BoundBox rgeom_bounds = BoundBox::empty;
222 BoundBox lcent_bounds = BoundBox::empty;
223 BoundBox rcent_bounds = BoundBox::empty;
224
225 int64_t l = 0, r = N - 1;
226
227 while (l <= r) {
228 prefetch_L2(&prims[start() + l + 8]);
229 prefetch_L2(&prims[start() + r - 8]);
230
231 BVHReference prim = prims[start() + l];
233 float3 unaligned_center = unaligned_bounds.center2();
234 float3 center = prim.bounds().center2();
235
236 if (get_bin(unaligned_center)[dim] < pos) {
237 lgeom_bounds.grow(prim.bounds());
238 lcent_bounds.grow(center);
239 l++;
240 }
241 else {
242 rgeom_bounds.grow(prim.bounds());
243 rcent_bounds.grow(center);
244 swap(prims[start() + l], prims[start() + r]);
245 r--;
246 }
247 }
248 /* finish */
249 if (l != 0 && N - 1 - r != 0) {
250 right_o = BVHObjectBinning(BVHRange(rgeom_bounds, rcent_bounds, start() + l, N - 1 - r),
251 prims);
252 left_o = BVHObjectBinning(BVHRange(lgeom_bounds, lcent_bounds, start(), l), prims);
253 return;
254 }
255
256 /* object medium split if we did not make progress, can happen when all
257 * primitives have same centroid */
258 lgeom_bounds = BoundBox::empty;
259 rgeom_bounds = BoundBox::empty;
260 lcent_bounds = BoundBox::empty;
261 rcent_bounds = BoundBox::empty;
262
263 for (size_t i = 0; i < N / 2; i++) {
264 lgeom_bounds.grow(prims[start() + i].bounds());
265 lcent_bounds.grow(prims[start() + i].bounds().center2());
266 }
267
268 for (size_t i = N / 2; i < N; i++) {
269 rgeom_bounds.grow(prims[start() + i].bounds());
270 rcent_bounds.grow(prims[start() + i].bounds().center2());
271 }
272
273 right_o = BVHObjectBinning(BVHRange(rgeom_bounds, rcent_bounds, start() + N / 2, N / 2 + N % 2),
274 prims);
275 left_o = BVHObjectBinning(BVHRange(lgeom_bounds, lcent_bounds, start(), N / 2), prims);
276}
277
void BLI_kdtree_nd_ insert(KDTree *tree, int index, const float co[KD_DIMS]) ATTR_NONNULL(1
CCL_NAMESPACE_BEGIN __forceinline void prefetch_L1(const void *)
Definition binning.cpp:22
__forceinline float extract(const int4 &b)
Definition binning.cpp:27
__forceinline void prefetch_L2(const void *)
Definition binning.cpp:23
__forceinline void prefetch_NTA(const void *)
Definition binning.cpp:25
__forceinline void prefetch_L3(const void *)
Definition binning.cpp:24
__forceinline const float4 insert(const float4 &a, const float b)
Definition binning.cpp:31
__forceinline int get_best_dimension(const float4 &bestSAH)
Definition binning.cpp:38
ATTR_WARN_UNUSED_RESULT const BMLoop * l
void split(BVHReference *prims, BVHObjectBinning &left_o, BVHObjectBinning &right_o) const
Definition binning.cpp:214
__forceinline int4 get_bin(const BoundBox &box) const
Definition binning.h:63
__forceinline BVHObjectBinning()
Definition binning.h:29
BoundBox bounds_
Definition binning.h:53
__forceinline BoundBox get_prim_bounds(const BVHReference &prim) const
Definition binning.h:90
__forceinline const BoundBox & unaligned_bounds()
Definition binning.h:38
__forceinline float4 blocks(const int4 &a) const
Definition binning.h:79
const Transform * aligned_space_
Definition binning.h:57
size_t num_bins
Definition binning.h:49
BoundBox cent_bounds_
Definition binning.h:54
__forceinline int size() const
Definition params.h:291
__forceinline int start() const
Definition params.h:287
__forceinline const BoundBox & bounds() const
Definition params.h:279
__forceinline BVHRange()
Definition params.h:255
__forceinline const BoundBox & cent_bounds() const
Definition params.h:283
__forceinline const BoundBox & bounds() const
Definition params.h:205
BoundBox compute_aligned_boundbox(const BVHObjectBinning &range, const BVHReference *references, const Transform &aligned_space, BoundBox *cent_bounds=NULL) const
Definition unaligned.cpp:98
local_group_size(16, 16) .push_constant(Type b
#define CCL_NAMESPACE_END
ccl_device_forceinline float4 make_float4(const float x, const float y, const float z, const float w)
ccl_device_forceinline float3 make_float3(const float x, const float y, const float z)
#define NULL
ccl_device_forceinline int4 make_int4(const int x, const int y, const int z, const int w)
#define __forceinline
draw_view push_constant(Type::INT, "radiance_src") .push_constant(Type capture_info_buf storage_buf(1, Qualifier::READ, "ObjectBounds", "bounds_buf[]") .push_constant(Type draw_view int
int count
OrientationBounds merge(const OrientationBounds &cone_a, const OrientationBounds &cone_b)
ccl_device_inline float3 rcp(const float3 a)
ccl_device_inline float4 select(const int4 mask, const float4 a, const float4 b)
CCL_NAMESPACE_BEGIN ccl_device_inline float4 zero_float4()
Definition math_float4.h:15
#define N
#define swap(a, b)
Definition sort.c:55
#define min(a, b)
Definition sort.c:32
#define FLT_MAX
Definition stdcycles.h:14
__int64 int64_t
Definition stdint.h:89
__forceinline float half_area() const
Definition boundbox.h:107
__forceinline float3 size() const
Definition boundbox.h:123
__forceinline void grow(const float3 &pt)
Definition boundbox.h:36
__forceinline float3 center2() const
Definition boundbox.h:118
ccl_device_inline float4 float3_to_float4(const float3 a)
Definition util/math.h:540