Blender V4.3
bevel.h
Go to the documentation of this file.
1/* SPDX-FileCopyrightText: 2011-2022 Blender Foundation
2 *
3 * SPDX-License-Identifier: Apache-2.0 */
4
5#pragma once
6
7#include "kernel/bvh/bvh.h"
10
12
13#ifdef __SHADER_RAYTRACE__
14
15/* Planar Cubic BSSRDF falloff, reused for bevel.
16 *
17 * This is basically (Rm - x)^3, with some factors to normalize it. For sampling
18 * we integrate 2*pi*x * (Rm - x)^3, which gives us a quintic equation that as
19 * far as I can tell has no closed form solution. So we get an iterative solution
20 * instead with newton-raphson. */
21
22ccl_device float svm_bevel_cubic_eval(const float radius, float r)
23{
24 const float Rm = radius;
25
26 if (r >= Rm) {
27 return 0.0f;
28 }
29
30 /* integrate (2*pi*r * 10*(R - r)^3)/(pi * R^5) from 0 to R = 1 */
31 const float Rm5 = (Rm * Rm) * (Rm * Rm) * Rm;
32 const float f = Rm - r;
33 const float num = f * f * f;
34
35 return (10.0f * num) / (Rm5 * M_PI_F);
36}
37
38ccl_device float svm_bevel_cubic_pdf(const float radius, float r)
39{
40 return svm_bevel_cubic_eval(radius, r);
41}
42
43/* solve 10x^2 - 20x^3 + 15x^4 - 4x^5 - xi == 0 */
44ccl_device_forceinline float svm_bevel_cubic_quintic_root_find(float xi)
45{
46 /* newton-raphson iteration, usually succeeds in 2-4 iterations, except
47 * outside 0.02 ... 0.98 where it can go up to 10, so overall performance
48 * should not be too bad */
49 const float tolerance = 1e-6f;
50 const int max_iteration_count = 10;
51 float x = 0.25f;
52 int i;
53
54 for (i = 0; i < max_iteration_count; i++) {
55 float x2 = x * x;
56 float x3 = x2 * x;
57 float nx = (1.0f - x);
58
59 float f = 10.0f * x2 - 20.0f * x3 + 15.0f * x2 * x2 - 4.0f * x2 * x3 - xi;
60 float f_ = 20.0f * (x * nx) * (nx * nx);
61
62 if (fabsf(f) < tolerance || f_ == 0.0f) {
63 break;
64 }
65
66 x = saturatef(x - f / f_);
67 }
68
69 return x;
70}
71
72ccl_device void svm_bevel_cubic_sample(const float radius,
73 float xi,
74 ccl_private float *r,
75 ccl_private float *h)
76{
77 float Rm = radius;
78 float r_ = svm_bevel_cubic_quintic_root_find(xi);
79
80 r_ *= Rm;
81 *r = r_;
82
83 /* h^2 + r^2 = Rm^2 */
84 *h = safe_sqrtf(Rm * Rm - r_ * r_);
85}
86
87/* Bevel shader averaging normals from nearby surfaces.
88 *
89 * Sampling strategy from: BSSRDF Importance Sampling, SIGGRAPH 2013
90 * http://library.imageworks.com/pdfs/imageworks-library-BSSRDF-sampling.pdf
91 */
92
93# ifdef __KERNEL_OPTIX__
94extern "C" __device__ float3 __direct_callable__svm_node_bevel(
95# else
96ccl_device float3 svm_bevel(
97# endif
101 float radius,
102 int num_samples)
103{
104 /* Early out if no sampling needed. */
105 if (radius <= 0.0f || num_samples < 1 || sd->object == OBJECT_NONE) {
106 return sd->N;
107 }
108
109 /* Can't ray-trace from shaders like displacement, before BVH exists. */
110 if (kernel_data.bvh.bvh_layout == BVH_LAYOUT_NONE) {
111 return sd->N;
112 }
113
114 /* Don't bevel for blurry indirect rays. */
115 if (INTEGRATOR_STATE(state, path, min_ray_pdf) < 8.0f) {
116 return sd->N;
117 }
118
119 /* Setup for multi intersection. */
120 LocalIntersection isect;
121 uint lcg_state = lcg_state_init(INTEGRATOR_STATE(state, path, rng_pixel),
122 INTEGRATOR_STATE(state, path, rng_offset),
124 0x64c6a40e);
125
126 /* Sample normals from surrounding points on surface. */
127 float3 sum_N = make_float3(0.0f, 0.0f, 0.0f);
128
129 /* TODO: support ray-tracing in shadow shader evaluation? */
130 RNGState rng_state;
131 path_state_rng_load(state, &rng_state);
132
133 for (int sample = 0; sample < num_samples; sample++) {
134 float2 rand_disk = path_branched_rng_2D(
135 kg, &rng_state, sample, num_samples, PRNG_SURFACE_BEVEL);
136
137 /* Pick random axis in local frame and point on disk. */
138 float3 disk_N, disk_T, disk_B;
139 float pick_pdf_N, pick_pdf_T, pick_pdf_B;
140
141 disk_N = sd->Ng;
142 make_orthonormals(disk_N, &disk_T, &disk_B);
143
144 float axisu = rand_disk.x;
145
146 if (axisu < 0.5f) {
147 pick_pdf_N = 0.5f;
148 pick_pdf_T = 0.25f;
149 pick_pdf_B = 0.25f;
150 rand_disk.x *= 2.0f;
151 }
152 else if (axisu < 0.75f) {
153 float3 tmp = disk_N;
154 disk_N = disk_T;
155 disk_T = tmp;
156 pick_pdf_N = 0.25f;
157 pick_pdf_T = 0.5f;
158 pick_pdf_B = 0.25f;
159 rand_disk.x = (rand_disk.x - 0.5f) * 4.0f;
160 }
161 else {
162 float3 tmp = disk_N;
163 disk_N = disk_B;
164 disk_B = tmp;
165 pick_pdf_N = 0.25f;
166 pick_pdf_T = 0.25f;
167 pick_pdf_B = 0.5f;
168 rand_disk.x = (rand_disk.x - 0.75f) * 4.0f;
169 }
170
171 /* Sample point on disk. */
172 float phi = M_2PI_F * rand_disk.x;
173 float disk_r = rand_disk.y;
174 float disk_height;
175
176 /* Perhaps find something better than Cubic BSSRDF, but happens to work well. */
177 svm_bevel_cubic_sample(radius, disk_r, &disk_r, &disk_height);
178
179 float3 disk_P = (disk_r * cosf(phi)) * disk_T + (disk_r * sinf(phi)) * disk_B;
180
181 /* Create ray. */
183 ray.P = sd->P + disk_N * disk_height + disk_P;
184 ray.D = -disk_N;
185 ray.tmin = 0.0f;
186 ray.tmax = 2.0f * disk_height;
187 ray.dP = differential_zero_compact();
188 ray.dD = differential_zero_compact();
189 ray.time = sd->time;
190 ray.self.object = OBJECT_NONE;
191 ray.self.prim = PRIM_NONE;
192 ray.self.light_object = OBJECT_NONE;
193 ray.self.light_prim = PRIM_NONE;
194 ray.self.light = LAMP_NONE;
195
196 /* Intersect with the same object. if multiple intersections are found it
197 * will use at most LOCAL_MAX_HITS hits, a random subset of all hits. */
198 scene_intersect_local(kg, &ray, &isect, sd->object, &lcg_state, LOCAL_MAX_HITS);
199
200 int num_eval_hits = min(isect.num_hits, LOCAL_MAX_HITS);
201
202 for (int hit = 0; hit < num_eval_hits; hit++) {
203 /* Quickly retrieve P and Ng without setting up ShaderData. */
204 float3 hit_P;
205 if (sd->type == PRIMITIVE_TRIANGLE) {
207 kg, sd, isect.hits[hit].prim, isect.hits[hit].u, isect.hits[hit].v);
208 }
209# ifdef __OBJECT_MOTION__
210 else if (sd->type == PRIMITIVE_MOTION_TRIANGLE) {
211 float3 verts[3];
212 motion_triangle_vertices(kg, sd->object, isect.hits[hit].prim, sd->time, verts);
213 hit_P = motion_triangle_point_from_uv(kg, sd, isect.hits[hit].u, isect.hits[hit].v, verts);
214 }
215# endif /* __OBJECT_MOTION__ */
216
217 /* Get geometric normal. */
218 float3 hit_Ng = isect.Ng[hit];
219 int object = isect.hits[hit].object;
220 int object_flag = kernel_data_fetch(object_flag, object);
221 if (object_negative_scale_applied(object_flag)) {
222 hit_Ng = -hit_Ng;
223 }
224
225 /* Compute smooth normal. */
226 float3 N = hit_Ng;
227 int prim = isect.hits[hit].prim;
228 int shader = kernel_data_fetch(tri_shader, prim);
229
230 if (shader & SHADER_SMOOTH_NORMAL) {
231 float u = isect.hits[hit].u;
232 float v = isect.hits[hit].v;
233
234 if (sd->type == PRIMITIVE_TRIANGLE) {
235 N = triangle_smooth_normal(kg, N, prim, u, v);
236 }
237# ifdef __OBJECT_MOTION__
238 else if (sd->type == PRIMITIVE_MOTION_TRIANGLE) {
239 N = motion_triangle_smooth_normal(kg, N, sd->object, prim, u, v, sd->time);
240 }
241# endif /* __OBJECT_MOTION__ */
242 }
243
244 /* Transform normals to world space. */
245 if (!(object_flag & SD_OBJECT_TRANSFORM_APPLIED)) {
246 object_normal_transform(kg, sd, &N);
247 object_normal_transform(kg, sd, &hit_Ng);
248 }
249
250 /* Probability densities for local frame axes. */
251 float pdf_N = pick_pdf_N * fabsf(dot(disk_N, hit_Ng));
252 float pdf_T = pick_pdf_T * fabsf(dot(disk_T, hit_Ng));
253 float pdf_B = pick_pdf_B * fabsf(dot(disk_B, hit_Ng));
254
255 /* Multiple importance sample between 3 axes, power heuristic
256 * found to be slightly better than balance heuristic. pdf_N
257 * in the MIS weight and denominator canceled out. */
258 float w = pdf_N / (sqr(pdf_N) + sqr(pdf_T) + sqr(pdf_B));
259 if (isect.num_hits > LOCAL_MAX_HITS) {
260 w *= isect.num_hits / (float)LOCAL_MAX_HITS;
261 }
262
263 /* Real distance to sampled point. */
264 float r = len(hit_P - sd->P);
265
266 /* Compute weight. */
267 float pdf = svm_bevel_cubic_pdf(radius, r);
268 float disk_pdf = svm_bevel_cubic_pdf(radius, disk_r);
269
270 w *= pdf / disk_pdf;
271
272 /* Sum normal and weight. */
273 sum_N += w * N;
274 }
275 }
276
277 /* Normalize. */
278 float3 N = safe_normalize(sum_N);
279 return is_zero(N) ? sd->N : (sd->flag & SD_BACKFACING) ? -N : N;
280}
281
282template<uint node_feature_mask, typename ConstIntegratorGenericState>
283# if defined(__KERNEL_OPTIX__)
285# else
287# endif
288 void
289 svm_node_bevel(KernelGlobals kg,
290 ConstIntegratorGenericState state,
292 ccl_private float *stack,
293 uint4 node)
294{
295 uint num_samples, radius_offset, normal_offset, out_offset;
296 svm_unpack_node_uchar4(node.y, &num_samples, &radius_offset, &normal_offset, &out_offset);
297
298 float3 bevel_N = sd->N;
299
301 {
302 float radius = stack_load_float(stack, radius_offset);
303
304# ifdef __KERNEL_OPTIX__
305 bevel_N = optixDirectCall<float3>(1, kg, state, sd, radius, num_samples);
306# else
307 bevel_N = svm_bevel(kg, state, sd, radius, num_samples);
308# endif
309
310 if (stack_valid(normal_offset)) {
311 /* Preserve input normal. */
312 float3 ref_N = stack_load_float3(stack, normal_offset);
313 bevel_N = normalize(ref_N + (bevel_N - sd->N));
314 }
315 }
316
317 stack_store_float3(stack, out_offset, bevel_N);
318}
319
320#endif /* __SHADER_RAYTRACE__ */
321
MINLINE float safe_sqrtf(float a)
unsigned int uint
ATTR_WARN_UNUSED_RESULT const BMVert * v
SIMD_FORCE_INLINE const btScalar & w() const
Return the w value.
Definition btQuadWord.h:119
SIMD_FORCE_INLINE btVector3 & normalize()
Normalize this vector x^2 + y^2 + z^2 = 1.
Definition btVector3.h:303
additional_info("compositor_sum_squared_difference_float_shared") .push_constant(Type output_img float dot(value.rgb, luminance_coefficients)") .define("LOAD(value)"
#define kernel_data
const KernelGlobalsCPU *ccl_restrict KernelGlobals
#define kernel_data_fetch(name, index)
#define ccl_device_forceinline
#define ccl_optional_struct_init
#define sinf(x)
#define cosf(x)
#define ccl_device
#define ccl_private
#define ccl_device_inline
#define ccl_device_noinline
#define CCL_NAMESPACE_END
#define saturatef(x)
ccl_device_forceinline float3 make_float3(const float x, const float y, const float z)
#define fabsf(x)
#define __device__
ccl_device_forceinline float differential_zero_compact()
int len
draw_view in_light_buf[] float
ccl_device_inline float3 triangle_smooth_normal(KernelGlobals kg, float3 Ng, int prim, float u, float v)
static float verts[][3]
ccl_device_inline bool object_negative_scale_applied(const int object_flag)
ccl_device_inline void object_normal_transform(KernelGlobals kg, ccl_private const ShaderData *sd, ccl_private float3 *N)
ccl_device_inline void stack_store_float3(ccl_private float *stack, uint a, float3 f)
CCL_NAMESPACE_BEGIN ccl_device_inline float3 stack_load_float3(ccl_private float *stack, uint a)
ccl_device_inline float stack_load_float(ccl_private float *stack, uint a)
ccl_device_forceinline void svm_unpack_node_uchar4(uint i, ccl_private uint *x, ccl_private uint *y, ccl_private uint *z, ccl_private uint *w)
ccl_device_inline bool stack_valid(uint a)
@ SD_BACKFACING
@ PRIMITIVE_MOTION_TRIANGLE
@ PRIMITIVE_TRIANGLE
#define IF_KERNEL_NODES_FEATURE(feature)
@ PRNG_SURFACE_BEVEL
#define PRIM_NONE
#define OBJECT_NONE
#define LOCAL_MAX_HITS
ShaderData
@ SHADER_SMOOTH_NORMAL
@ SD_OBJECT_TRANSFORM_APPLIED
@ BVH_LAYOUT_NONE
#define LAMP_NONE
ccl_device_inline uint lcg_state_init(const uint rng_hash, const uint rng_offset, const uint sample, const uint scramble)
Definition lcg.h:36
ccl_device_inline bool is_zero(const float2 a)
ccl_device_inline float2 safe_normalize(const float2 a)
static ulong state[N]
#define N
#define M_PI_F
Definition mikk_util.hh:15
ccl_device_inline float3 motion_triangle_smooth_normal(KernelGlobals kg, float3 Ng, int object, uint3 tri_vindex, int numsteps, int step, float t, float u, float v)
ccl_device_inline void motion_triangle_vertices(KernelGlobals kg, int object, uint3 tri_vindex, int numsteps, int numverts, int step, float t, float3 verts[3])
CCL_NAMESPACE_BEGIN ccl_device_inline float3 motion_triangle_point_from_uv(KernelGlobals kg, ccl_private ShaderData *sd, const float u, const float v, float3 verts[3])
ccl_device_inline void path_state_rng_load(ConstIntegratorState state, ccl_private RNGState *rng_state)
Definition path_state.h:315
ccl_device_inline float2 path_branched_rng_2D(KernelGlobals kg, ccl_private const RNGState *rng_state, const int branch, const int num_branches, const int dimension)
Definition path_state.h:375
#define M_2PI_F
Definition sky_float3.h:23
#define min(a, b)
Definition sort.c:32
const IntegratorStateCPU *ccl_restrict ConstIntegratorState
Definition state.h:229
#define INTEGRATOR_STATE(state, nested_struct, member)
Definition state.h:235
struct Intersection hits[LOCAL_MAX_HITS]
float3 Ng[LOCAL_MAX_HITS]
float3 P
float x
float y
ccl_device_inline float3 triangle_point_from_uv(KernelGlobals kg, ccl_private ShaderData *sd, const int isect_prim, const float u, const float v)
ccl_device_inline float sqr(float a)
Definition util/math.h:782
ccl_device_inline void make_orthonormals(const float3 N, ccl_private float3 *a, ccl_private float3 *b)
Definition util/math.h:593