Blender V5.0
noise.h
Go to the documentation of this file.
1/* SPDX-FileCopyrightText: 2009-2010 Sony Pictures Imageworks Inc., et al. All Rights Reserved.
2 * SPDX-FileCopyrightText: 2011-2022 Blender Foundation
3 *
4 * SPDX-License-Identifier: BSD-3-Clause
5 *
6 * Adapted code from Open Shading Language. */
7
8#pragma once
9
10#include "util/hash.h"
11#include "util/math.h"
12#include "util/types.h"
13
15
16/* **** Perlin Noise **** */
17
18ccl_device float fade(const float t)
19{
20 return t * t * t * (t * (t * 6.0f - 15.0f) + 10.0f);
21}
22
23ccl_device_inline float negate_if(const float val, const int condition)
24{
25 return (condition) ? -val : val;
26}
27
28ccl_device float grad1(const int hash, const float x)
29{
30 const int h = hash & 15;
31 const float g = 1 + (h & 7);
32 return negate_if(g, h & 8) * x;
33}
34
36{
37 int X;
38 const float fx = floorfrac(x, &X);
39 const float u = fade(fx);
40
41 return mix(grad1(hash_uint(X), fx), grad1(hash_uint(X + 1), fx - 1.0f), u);
42}
43
44/* 2D, 3D, and 4D noise can be accelerated using SSE, so we first check if
45 * SSE is supported, that is, if __KERNEL_SSE__ is defined. If it is not
46 * supported, we do a standard implementation, but if it is supported, we
47 * do an implementation using SSE intrinsics.
48 */
49#if !defined(__KERNEL_SSE__)
50
51/* ** Standard Implementation ** */
52
53/* Bilinear Interpolation:
54 *
55 * v2 v3
56 * @ + + + + @ y
57 * + + ^
58 * + + |
59 * + + |
60 * @ + + + + @ @------> x
61 * v0 v1
62 *
63 */
65 const float v0, const float v1, const float v2, const float v3, const float x, float y)
66{
67 float x1 = 1.0f - x;
68 return (1.0f - y) * (v0 * x1 + v1 * x) + y * (v2 * x1 + v3 * x);
69}
70
71/* Trilinear Interpolation:
72 *
73 * v6 v7
74 * @ + + + + + + @
75 * +\ +\
76 * + \ + \
77 * + \ + \
78 * + \ v4 + \ v5
79 * + @ + + + +++ + @ z
80 * + + + + y ^
81 * v2 @ + +++ + + + @ v3 + \ |
82 * \ + \ + \ |
83 * \ + \ + \|
84 * \ + \ + +---------> x
85 * \+ \+
86 * @ + + + + + + @
87 * v0 v1
88 */
89ccl_device float tri_mix(const float v0,
90 float v1,
91 float v2,
92 float v3,
93 float v4,
94 float v5,
95 float v6,
96 float v7,
97 const float x,
98 const float y,
99 const float z)
100{
101 float x1 = 1.0f - x;
102 float y1 = 1.0f - y;
103 float z1 = 1.0f - z;
104 return z1 * (y1 * (v0 * x1 + v1 * x) + y * (v2 * x1 + v3 * x)) +
105 z * (y1 * (v4 * x1 + v5 * x) + y * (v6 * x1 + v7 * x));
106}
107
108ccl_device float quad_mix(const float v0,
109 float v1,
110 float v2,
111 float v3,
112 float v4,
113 float v5,
114 float v6,
115 float v7,
116 float v8,
117 float v9,
118 float v10,
119 float v11,
120 float v12,
121 float v13,
122 float v14,
123 float v15,
124 const float x,
125 const float y,
126 const float z,
127 const float w)
128{
129 return mix(tri_mix(v0, v1, v2, v3, v4, v5, v6, v7, x, y, z),
130 tri_mix(v8, v9, v10, v11, v12, v13, v14, v15, x, y, z),
131 w);
132}
133
134ccl_device float grad2(const int hash, const float x, float y)
135{
136 int h = hash & 7;
137 float u = h < 4 ? x : y;
138 float v = 2.0f * (h < 4 ? y : x);
139 return negate_if(u, h & 1) + negate_if(v, h & 2);
140}
141
142ccl_device float grad3(const int hash, const float x, float y, const float z)
143{
144 int h = hash & 15;
145 float u = h < 8 ? x : y;
146 float vt = ((h == 12) || (h == 14)) ? x : z;
147 float v = h < 4 ? y : vt;
148 return negate_if(u, h & 1) + negate_if(v, h & 2);
149}
150
151ccl_device float grad4(const int hash, const float x, float y, const float z, float w)
152{
153 int h = hash & 31;
154 float u = h < 24 ? x : y;
155 float v = h < 16 ? y : z;
156 float s = h < 8 ? z : w;
157 return negate_if(u, h & 1) + negate_if(v, h & 2) + negate_if(s, h & 4);
158}
159
160ccl_device_noinline_cpu float perlin_2d(const float x, const float y)
161{
162 int X;
163 int Y;
164
165 float fx = floorfrac(x, &X);
166 float fy = floorfrac(y, &Y);
167
168 float u = fade(fx);
169 float v = fade(fy);
170
171 float r = bi_mix(grad2(hash_uint2(X, Y), fx, fy),
172 grad2(hash_uint2(X + 1, Y), fx - 1.0f, fy),
173 grad2(hash_uint2(X, Y + 1), fx, fy - 1.0f),
174 grad2(hash_uint2(X + 1, Y + 1), fx - 1.0f, fy - 1.0f),
175 u,
176 v);
177
178 return r;
179}
180
181ccl_device_noinline_cpu float perlin_3d(const float x, const float y, float z)
182{
183 int X;
184 int Y;
185 int Z;
186
187 float fx = floorfrac(x, &X);
188 float fy = floorfrac(y, &Y);
189 float fz = floorfrac(z, &Z);
190
191 float u = fade(fx);
192 float v = fade(fy);
193 float w = fade(fz);
194
195 float r = tri_mix(grad3(hash_uint3(X, Y, Z), fx, fy, fz),
196 grad3(hash_uint3(X + 1, Y, Z), fx - 1.0f, fy, fz),
197 grad3(hash_uint3(X, Y + 1, Z), fx, fy - 1.0f, fz),
198 grad3(hash_uint3(X + 1, Y + 1, Z), fx - 1.0f, fy - 1.0f, fz),
199 grad3(hash_uint3(X, Y, Z + 1), fx, fy, fz - 1.0f),
200 grad3(hash_uint3(X + 1, Y, Z + 1), fx - 1.0f, fy, fz - 1.0f),
201 grad3(hash_uint3(X, Y + 1, Z + 1), fx, fy - 1.0f, fz - 1.0f),
202 grad3(hash_uint3(X + 1, Y + 1, Z + 1), fx - 1.0f, fy - 1.0f, fz - 1.0f),
203 u,
204 v,
205 w);
206 return r;
207}
208
209ccl_device_noinline_cpu float perlin_4d(const float x, const float y, float z, const float w)
210{
211 int X;
212 int Y;
213 int Z;
214 int W;
215
216 float fx = floorfrac(x, &X);
217 float fy = floorfrac(y, &Y);
218 float fz = floorfrac(z, &Z);
219 float fw = floorfrac(w, &W);
220
221 float u = fade(fx);
222 float v = fade(fy);
223 float t = fade(fz);
224 float s = fade(fw);
225
226 float r = quad_mix(
227 grad4(hash_uint4(X, Y, Z, W), fx, fy, fz, fw),
228 grad4(hash_uint4(X + 1, Y, Z, W), fx - 1.0f, fy, fz, fw),
229 grad4(hash_uint4(X, Y + 1, Z, W), fx, fy - 1.0f, fz, fw),
230 grad4(hash_uint4(X + 1, Y + 1, Z, W), fx - 1.0f, fy - 1.0f, fz, fw),
231 grad4(hash_uint4(X, Y, Z + 1, W), fx, fy, fz - 1.0f, fw),
232 grad4(hash_uint4(X + 1, Y, Z + 1, W), fx - 1.0f, fy, fz - 1.0f, fw),
233 grad4(hash_uint4(X, Y + 1, Z + 1, W), fx, fy - 1.0f, fz - 1.0f, fw),
234 grad4(hash_uint4(X + 1, Y + 1, Z + 1, W), fx - 1.0f, fy - 1.0f, fz - 1.0f, fw),
235 grad4(hash_uint4(X, Y, Z, W + 1), fx, fy, fz, fw - 1.0f),
236 grad4(hash_uint4(X + 1, Y, Z, W + 1), fx - 1.0f, fy, fz, fw - 1.0f),
237 grad4(hash_uint4(X, Y + 1, Z, W + 1), fx, fy - 1.0f, fz, fw - 1.0f),
238 grad4(hash_uint4(X + 1, Y + 1, Z, W + 1), fx - 1.0f, fy - 1.0f, fz, fw - 1.0f),
239 grad4(hash_uint4(X, Y, Z + 1, W + 1), fx, fy, fz - 1.0f, fw - 1.0f),
240 grad4(hash_uint4(X + 1, Y, Z + 1, W + 1), fx - 1.0f, fy, fz - 1.0f, fw - 1.0f),
241 grad4(hash_uint4(X, Y + 1, Z + 1, W + 1), fx, fy - 1.0f, fz - 1.0f, fw - 1.0f),
242 grad4(hash_uint4(X + 1, Y + 1, Z + 1, W + 1), fx - 1.0f, fy - 1.0f, fz - 1.0f, fw - 1.0f),
243 u,
244 v,
245 t,
246 s);
247
248 return r;
249}
250
251#else /* SSE is supported. */
252
253/* ** SSE Implementation ** */
254
255/* SSE Bilinear Interpolation:
256 *
257 * The function takes two float4 inputs:
258 * - p : Contains the values at the points (v0, v1, v2, v3).
259 * - f : Contains the values (x, y, _, _). The third and fourth values are unused.
260 *
261 * The interpolation is done in two steps:
262 * 1. Interpolate (v0, v1) and (v2, v3) along the x axis to get g (g0, g1).
263 * (v2, v3) is generated by moving v2 and v3 to the first and second
264 * places of the float4 using the shuffle mask <2, 3, 2, 3>. The third and
265 * fourth values are unused.
266 * 2. Interpolate g0 and g1 along the y axis to get the final value.
267 * g1 is generated by populating an float4 with the second value of g.
268 * Only the first value is important in the final float4.
269 *
270 * v1 v3 g1
271 * @ + + + + @ @ y
272 * + + (1) + (2) ^
273 * + + ---> + ---> final |
274 * + + + |
275 * @ + + + + @ @ @------> x
276 * v0 v2 g0
277 *
278 */
280{
281 const float4 g = mix(p, shuffle<2, 3, 2, 3>(p), shuffle<0>(f));
282 return mix(g, shuffle<1>(g), shuffle<1>(f));
283}
284
286{
287 const float4 a = madd(t, make_float4(6.0f), make_float4(-15.0f));
288 const float4 b = madd(t, a, make_float4(10.0f));
289 return (t * t) * (t * b);
290}
291
292/* Negate val if the nth bit of h is 1. */
293# define negate_if_nth_bit(val, h, n) ((val) ^ cast(((h) & (1 << (n))) << (31 - (n))))
294
295ccl_device_inline float4 grad(const int4 hash, const float4 x, const float4 y)
296{
297 const int4 h = hash & 7;
298 const float4 u = select(h < 4, x, y);
299 const float4 v = 2.0f * select(h < 4, y, x);
300 return negate_if_nth_bit(u, h, 0) + negate_if_nth_bit(v, h, 1);
301}
302
303/* We use SSE to compute and interpolate 4 gradients at once:
304 *
305 * Point Offset from v0
306 * v0 (0, 0)
307 * v1 (0, 1)
308 * v2 (1, 0) (0, 1, 0, 1) = shuffle<0, 2, 0, 2>(shuffle<1, 1, 1, 1>(V, V + 1))
309 * v3 (1, 1) ^
310 * | |__________| (0, 0, 1, 1) = shuffle<0, 0, 0, 0>(V, V + 1)
311 * | ^
312 * |__________________________|
313 *
314 */
315ccl_device_noinline_cpu float perlin_2d(const float x, const float y)
316{
317 int4 XY;
318 const float4 fxy = floorfrac(make_float4(x, y, 0.0f, 0.0f), &XY);
319 const float4 uv = fade(fxy);
320
321 const int4 XY1 = XY + make_int4(1);
322 const int4 X = shuffle<0, 0, 0, 0>(XY, XY1);
323 const int4 Y = shuffle<0, 2, 0, 2>(shuffle<1, 1, 1, 1>(XY, XY1));
324
325 const int4 h = hash_int4_2(X, Y);
326
327 const float4 fxy1 = fxy - make_float4(1.0f);
328 const float4 fx = shuffle<0, 0, 0, 0>(fxy, fxy1);
329 const float4 fy = shuffle<0, 2, 0, 2>(shuffle<1, 1, 1, 1>(fxy, fxy1));
330
331 const float4 g = grad(h, fx, fy);
332
333 return extract<0>(bi_mix(g, uv));
334}
335
336/* SSE Trilinear Interpolation:
337 *
338 * The function takes three float4 inputs:
339 * - p : Contains the values at the points (v0, v1, v2, v3).
340 * - q : Contains the values at the points (v4, v5, v6, v7).
341 * - f : Contains the values (x, y, z, _). The fourth value is unused.
342 *
343 * The interpolation is done in three steps:
344 * 1. Interpolate p and q along the x axis to get s (s0, s1, s2, s3).
345 * 2. Interpolate (s0, s1) and (s2, s3) along the y axis to get g (g0, g1).
346 * (s2, s3) is generated by moving v2 and v3 to the first and second
347 * places of the float4 using the shuffle mask <2, 3, 2, 3>. The third and
348 * fourth values are unused.
349 * 3. Interpolate g0 and g1 along the z axis to get the final value.
350 * g1 is generated by populating an float4 with the second value of g.
351 * Only the first value is important in the final float4.
352 *
353 * v3 v7
354 * @ + + + + + + @ s3 @
355 * +\ +\ +\
356 * + \ + \ + \
357 * + \ + \ + \ g1
358 * + \ v1 + \ v5 + \ s1 @
359 * + @ + + + +++ + @ + @ + z
360 * + + + + (1) + + (2) + (3) y ^
361 * v2 @ + +++ + + + @ v6 + ---> s2 @ + ---> + ---> final \ |
362 * \ + \ + \ + + \ |
363 * \ + \ + \ + + \|
364 * \ + \ + \ + @ +---------> x
365 * \+ \+ \+ g0
366 * @ + + + + + + @ @
367 * v0 v4 s0
368 */
370{
371 const float4 s = mix(p, q, shuffle<0>(f));
372 const float4 g = mix(s, shuffle<2, 3, 2, 3>(s), shuffle<1>(f));
373 return mix(g, shuffle<1>(g), shuffle<2>(f));
374}
375
376/* 3D and 4D noise can be accelerated using AVX, so we first check if AVX
377 * is supported, that is, if __KERNEL_AVX__ is defined. If it is not
378 * supported, we do an SSE implementation, but if it is supported,
379 * we do an implementation using AVX intrinsics.
380 */
381# if !defined(__KERNEL_AVX2__)
382
383ccl_device_inline float4 grad(const int4 hash, const float4 x, const float4 y, const float4 z)
384{
385 const int4 h = hash & 15;
386 const float4 u = select(h < 8, x, y);
387 const float4 vt = select((h == 12) | (h == 14), x, z);
388 const float4 v = select(h < 4, y, vt);
389 return negate_if_nth_bit(u, h, 0) + negate_if_nth_bit(v, h, 1);
390}
391
393grad(const int4 hash, const float4 x, const float4 y, const float4 z, const float4 w)
394{
395 const int4 h = hash & 31;
396 const float4 u = select(h < 24, x, y);
397 const float4 v = select(h < 16, y, z);
398 const float4 s = select(h < 8, z, w);
399 return negate_if_nth_bit(u, h, 0) + negate_if_nth_bit(v, h, 1) + negate_if_nth_bit(s, h, 2);
400}
401
402/* SSE Quadrilinear Interpolation:
403 *
404 * Quadrilinear interpolation is as simple as a linear interpolation
405 * between two trilinear interpolations.
406 *
407 */
409quad_mix(const float4 p, const float4 q, float4 r, const float4 s, float4 f)
410{
411 return mix(tri_mix(p, q, f), tri_mix(r, s, f), shuffle<3>(f));
412}
413
414/* We use SSE to compute and interpolate 4 gradients at once. Since we have 8
415 * gradients in 3D, we need to compute two sets of gradients at the points:
416 *
417 * Point Offset from v0
418 * v0 (0, 0, 0)
419 * v1 (0, 0, 1)
420 * v2 (0, 1, 0) (0, 1, 0, 1) = shuffle<0, 2, 0, 2>(shuffle<2, 2, 2, 2>(V, V + 1))
421 * v3 (0, 1, 1) ^
422 * | |__________| (0, 0, 1, 1) = shuffle<1, 1, 1, 1>(V, V + 1)
423 * | ^
424 * |__________________________|
425 *
426 * Point Offset from v0
427 * v4 (1, 0, 0)
428 * v5 (1, 0, 1)
429 * v6 (1, 1, 0)
430 * v7 (1, 1, 1)
431 *
432 */
433ccl_device_noinline_cpu float perlin_3d(const float x, const float y, float z)
434{
435 int4 XYZ;
436 const float4 fxyz = floorfrac(make_float4(x, y, z, 0.0f), &XYZ);
437 const float4 uvw = fade(fxyz);
438
439 const int4 XYZ1 = XYZ + make_int4(1);
440 const int4 Y = shuffle<1, 1, 1, 1>(XYZ, XYZ1);
441 const int4 Z = shuffle<0, 2, 0, 2>(shuffle<2, 2, 2, 2>(XYZ, XYZ1));
442
443 const int4 h1 = hash_int4_3(shuffle<0>(XYZ), Y, Z);
444 const int4 h2 = hash_int4_3(shuffle<0>(XYZ1), Y, Z);
445
446 const float4 fxyz1 = fxyz - make_float4(1.0f);
447 const float4 fy = shuffle<1, 1, 1, 1>(fxyz, fxyz1);
448 const float4 fz = shuffle<0, 2, 0, 2>(shuffle<2, 2, 2, 2>(fxyz, fxyz1));
449
450 const float4 g1 = grad(h1, shuffle<0>(fxyz), fy, fz);
451 const float4 g2 = grad(h2, shuffle<0>(fxyz1), fy, fz);
452
453 return extract<0>(tri_mix(g1, g2, uvw));
454}
455
456/* We use SSE to compute and interpolate 4 gradients at once. Since we have 16
457 * gradients in 4D, we need to compute four sets of gradients at the points:
458 *
459 * Point Offset from v0
460 * v0 (0, 0, 0, 0)
461 * v1 (0, 0, 1, 0)
462 * v2 (0, 1, 0, 0) (0, 1, 0, 1) = shuffle<0, 2, 0, 2>(shuffle<2, 2, 2, 2>(V, V + 1))
463 * v3 (0, 1, 1, 0) ^
464 * | |________| (0, 0, 1, 1) = shuffle<1, 1, 1, 1>(V, V + 1)
465 * | ^
466 * |_______________________|
467 *
468 * Point Offset from v0
469 * v4 (1, 0, 0, 0)
470 * v5 (1, 0, 1, 0)
471 * v6 (1, 1, 0, 0)
472 * v7 (1, 1, 1, 0)
473 *
474 * Point Offset from v0
475 * v8 (0, 0, 0, 1)
476 * v9 (0, 0, 1, 1)
477 * v10 (0, 1, 0, 1)
478 * v11 (0, 1, 1, 1)
479 *
480 * Point Offset from v0
481 * v12 (1, 0, 0, 1)
482 * v13 (1, 0, 1, 1)
483 * v14 (1, 1, 0, 1)
484 * v15 (1, 1, 1, 1)
485 *
486 */
487ccl_device_noinline_cpu float perlin_4d(const float x, const float y, float z, const float w)
488{
489 int4 XYZW;
490 const float4 fxyzw = floorfrac(make_float4(x, y, z, w), &XYZW);
491 const float4 uvws = fade(fxyzw);
492
493 const int4 XYZW1 = XYZW + make_int4(1);
494 const int4 Y = shuffle<1, 1, 1, 1>(XYZW, XYZW1);
495 const int4 Z = shuffle<0, 2, 0, 2>(shuffle<2, 2, 2, 2>(XYZW, XYZW1));
496
497 const int4 h1 = hash_int4_4(shuffle<0>(XYZW), Y, Z, shuffle<3>(XYZW));
498 const int4 h2 = hash_int4_4(shuffle<0>(XYZW1), Y, Z, shuffle<3>(XYZW));
499
500 const int4 h3 = hash_int4_4(shuffle<0>(XYZW), Y, Z, shuffle<3>(XYZW1));
501 const int4 h4 = hash_int4_4(shuffle<0>(XYZW1), Y, Z, shuffle<3>(XYZW1));
502
503 const float4 fxyzw1 = fxyzw - make_float4(1.0f);
504 const float4 fy = shuffle<1, 1, 1, 1>(fxyzw, fxyzw1);
505 const float4 fz = shuffle<0, 2, 0, 2>(shuffle<2, 2, 2, 2>(fxyzw, fxyzw1));
506
507 const float4 g1 = grad(h1, shuffle<0>(fxyzw), fy, fz, shuffle<3>(fxyzw));
508 const float4 g2 = grad(h2, shuffle<0>(fxyzw1), fy, fz, shuffle<3>(fxyzw));
509
510 const float4 g3 = grad(h3, shuffle<0>(fxyzw), fy, fz, shuffle<3>(fxyzw1));
511 const float4 g4 = grad(h4, shuffle<0>(fxyzw1), fy, fz, shuffle<3>(fxyzw1));
512
513 return extract<0>(quad_mix(g1, g2, g3, g4, uvws));
514}
515
516# else /* AVX is supported. */
517
518/* AVX Implementation */
519
520ccl_device_inline vfloat8 grad(const vint8 hash, const vfloat8 x, const vfloat8 y, const vfloat8 z)
521{
522 vint8 h = hash & 15;
523 vfloat8 u = select(h < 8, x, y);
524 vfloat8 vt = select((h == 12) | (h == 14), x, z);
525 vfloat8 v = select(h < 4, y, vt);
526 return negate_if_nth_bit(u, h, 0) + negate_if_nth_bit(v, h, 1);
527}
528
529ccl_device_inline vfloat8
530grad(const vint8 hash, const vfloat8 x, const vfloat8 y, const vfloat8 z, const vfloat8 w)
531{
532 vint8 h = hash & 31;
533 vfloat8 u = select(h < 24, x, y);
534 vfloat8 v = select(h < 16, y, z);
535 vfloat8 s = select(h < 8, z, w);
536 return negate_if_nth_bit(u, h, 0) + negate_if_nth_bit(v, h, 1) + negate_if_nth_bit(s, h, 2);
537}
538
539/* SSE Quadrilinear Interpolation:
540 *
541 * The interpolation is done in two steps:
542 * 1. Interpolate p and q along the w axis to get s.
543 * 2. Trilinearly interpolate (s0, s1, s2, s3) and (s4, s5, s6, s7) to get the final
544 * value. (s0, s1, s2, s3) and (s4, s5, s6, s7) are generated by extracting the
545 * low and high float4 from s.
546 *
547 */
548ccl_device_inline float4 quad_mix(vfloat8 p, vfloat8 q, const float4 f)
549{
550 float4 fv = shuffle<3>(f);
551 vfloat8 s = mix(p, q, make_vfloat8(fv, fv));
552 return tri_mix(low(s), high(s), f);
553}
554
555/* We use AVX to compute and interpolate 8 gradients at once.
556 *
557 * Point Offset from v0
558 * v0 (0, 0, 0)
559 * v1 (0, 0, 1) The full AVX type is computed by inserting the following
560 * v2 (0, 1, 0) SSE types into both the low and high parts of the AVX.
561 * v3 (0, 1, 1)
562 * v4 (1, 0, 0)
563 * v5 (1, 0, 1) (0, 1, 0, 1) = shuffle<0, 2, 0, 2>(shuffle<2, 2, 2, 2>(V, V + 1))
564 * v6 (1, 1, 0) ^
565 * v7 (1, 1, 1) |
566 * | |__________| (0, 0, 1, 1) = shuffle<1, 1, 1, 1>(V, V + 1)
567 * | ^
568 * |__________________________|
569 *
570 */
571ccl_device_noinline_cpu float perlin_3d(const float x, const float y, float z)
572{
573 int4 XYZ;
574 float4 fxyz = floorfrac(make_float4(x, y, z, 0.0f), &XYZ);
575 float4 uvw = fade(fxyz);
576
577 int4 XYZ1 = XYZ + make_int4(1);
578 int4 X = shuffle<0>(XYZ);
579 int4 X1 = shuffle<0>(XYZ1);
580 int4 Y = shuffle<1, 1, 1, 1>(XYZ, XYZ1);
581 int4 Z = shuffle<0, 2, 0, 2>(shuffle<2, 2, 2, 2>(XYZ, XYZ1));
582
583 vint8 h = hash_int8_3(make_vint8(X, X1), make_vint8(Y, Y), make_vint8(Z, Z));
584
585 float4 fxyz1 = fxyz - make_float4(1.0f);
586 float4 fx = shuffle<0>(fxyz);
587 float4 fx1 = shuffle<0>(fxyz1);
588 float4 fy = shuffle<1, 1, 1, 1>(fxyz, fxyz1);
589 float4 fz = shuffle<0, 2, 0, 2>(shuffle<2, 2, 2, 2>(fxyz, fxyz1));
590
591 vfloat8 g = grad(h, make_vfloat8(fx, fx1), make_vfloat8(fy, fy), make_vfloat8(fz, fz));
592
593 return extract<0>(tri_mix(low(g), high(g), uvw));
594}
595
596/* We use AVX to compute and interpolate 8 gradients at once. Since we have 16
597 * gradients in 4D, we need to compute two sets of gradients at the points:
598 *
599 * Point Offset from v0
600 * v0 (0, 0, 0, 0)
601 * v1 (0, 0, 1, 0) The full AVX type is computed by inserting the following
602 * v2 (0, 1, 0, 0) SSE types into both the low and high parts of the AVX.
603 * v3 (0, 1, 1, 0)
604 * v4 (1, 0, 0, 0)
605 * v5 (1, 0, 1, 0) (0, 1, 0, 1) = shuffle<0, 2, 0, 2>(shuffle<2, 2, 2, 2>(V, V + 1))
606 * v6 (1, 1, 0, 0) ^
607 * v7 (1, 1, 1, 0) |
608 * | |________| (0, 0, 1, 1) = shuffle<1, 1, 1, 1>(V, V + 1)
609 * | ^
610 * |_______________________|
611 *
612 * Point Offset from v0
613 * v8 (0, 0, 0, 1)
614 * v9 (0, 0, 1, 1)
615 * v10 (0, 1, 0, 1)
616 * v11 (0, 1, 1, 1)
617 * v12 (1, 0, 0, 1)
618 * v13 (1, 0, 1, 1)
619 * v14 (1, 1, 0, 1)
620 * v15 (1, 1, 1, 1)
621 *
622 */
623ccl_device_noinline_cpu float perlin_4d(const float x, const float y, float z, const float w)
624{
625 int4 XYZW;
626 float4 fxyzw = floorfrac(make_float4(x, y, z, w), &XYZW);
627 float4 uvws = fade(fxyzw);
628
629 int4 XYZW1 = XYZW + make_int4(1);
630 int4 X = shuffle<0>(XYZW);
631 int4 X1 = shuffle<0>(XYZW1);
632 int4 Y = shuffle<1, 1, 1, 1>(XYZW, XYZW1);
633 int4 Z = shuffle<0, 2, 0, 2>(shuffle<2, 2, 2, 2>(XYZW, XYZW1));
634 int4 W = shuffle<3>(XYZW);
635 int4 W1 = shuffle<3>(XYZW1);
636
637 vint8 h1 = hash_int8_4(make_vint8(X, X1), make_vint8(Y, Y), make_vint8(Z, Z), make_vint8(W, W));
638 vint8 h2 = hash_int8_4(
639 make_vint8(X, X1), make_vint8(Y, Y), make_vint8(Z, Z), make_vint8(W1, W1));
640
641 float4 fxyzw1 = fxyzw - make_float4(1.0f);
642 float4 fx = shuffle<0>(fxyzw);
643 float4 fx1 = shuffle<0>(fxyzw1);
644 float4 fy = shuffle<1, 1, 1, 1>(fxyzw, fxyzw1);
645 float4 fz = shuffle<0, 2, 0, 2>(shuffle<2, 2, 2, 2>(fxyzw, fxyzw1));
646 float4 fw = shuffle<3>(fxyzw);
647 float4 fw1 = shuffle<3>(fxyzw1);
648
649 vfloat8 g1 = grad(
650 h1, make_vfloat8(fx, fx1), make_vfloat8(fy, fy), make_vfloat8(fz, fz), make_vfloat8(fw, fw));
651 vfloat8 g2 = grad(h2,
652 make_vfloat8(fx, fx1),
653 make_vfloat8(fy, fy),
654 make_vfloat8(fz, fz),
655 make_vfloat8(fw1, fw1));
656
657 return extract<0>(quad_mix(g1, g2, uvws));
658}
659# endif
660
661# undef negate_if_nth_bit
662
663#endif
664
665/* Remap the output of noise to a predictable range [-1, 1].
666 * The scale values were computed experimentally by the OSL developers.
667 */
668
670{
671 return 0.2500f * result;
672}
673
675{
676 return 0.6616f * result;
677}
678
680{
681 return 0.9820f * result;
682}
683
685{
686 return 0.8344f * result;
687}
688
689/* Safe Signed And Unsigned Noise */
690
692{
693 const float precision_correction = 0.5f * float(fabsf(p) >= 1000000.0f);
694 /* Repeat Perlin noise texture every 100000.0 on each axis to prevent floating point
695 * representation issues. */
696 /* The 1D variant of fmod is called fmodf. */
697 p = fmodf(p, 100000.0f) + precision_correction;
698
699 return noise_scale1(perlin_1d(p));
700}
701
702ccl_device_inline float noise_1d(const float p)
703{
704 return 0.5f * snoise_1d(p) + 0.5f;
705}
706
708{
709 const float2 precision_correction = 0.5f *
710 mask(fabs(p) >= make_float2(1000000.0f), one_float2());
711
712 /* Repeat Perlin noise texture every 100000.0f on each axis to prevent floating point
713 * representation issues. This causes discontinuities every 100000.0f, however at such scales
714 * this usually shouldn't be noticeable. */
715 p = fmod(p, 100000.0f) + precision_correction;
716
717 return noise_scale2(perlin_2d(p.x, p.y));
718}
719
721{
722 return 0.5f * snoise_2d(p) + 0.5f;
723}
724
726{
727 const float3 precision_correction = 0.5f *
728 mask(fabs(p) >= make_float3(1000000.0f), one_float3());
729
730 /* Repeat Perlin noise texture every 100000.0f on each axis to prevent floating point
731 * representation issues. This causes discontinuities every 100000.0f, however at such scales
732 * this usually shouldn't be noticeable. */
733 p = fmod(p, 100000.0f) + precision_correction;
734
735 return noise_scale3(perlin_3d(p.x, p.y, p.z));
736}
737
739{
740 return 0.5f * snoise_3d(p) + 0.5f;
741}
742
744{
745 const float4 precision_correction = 0.5f *
746 mask(fabs(p) >= make_float4(1000000.0f), one_float4());
747
748 /* Repeat Perlin noise texture every 100000.0f on each axis to prevent floating point
749 * representation issues. This causes discontinuities every 100000.0f, however at such scales
750 * this usually shouldn't be noticeable. */
751 p = fmod(p, 100000.0f) + precision_correction;
752
753 return noise_scale4(perlin_4d(p.x, p.y, p.z, p.w));
754}
755
757{
758 return 0.5f * snoise_4d(p) + 0.5f;
759}
760
#define X
#define Z
#define Y
#define X1
Definition RandGen.cpp:24
__forceinline float extract(const int4 &b)
Definition binning.cpp:27
ATTR_WARN_UNUSED_RESULT const BMVert * v2
ATTR_WARN_UNUSED_RESULT const BMVert * v
#define XY(_x, _y)
SIMD_FORCE_INLINE const btScalar & z() const
Return the z value.
Definition btQuadWord.h:117
SIMD_FORCE_INLINE const btScalar & w() const
Return the w value.
Definition btQuadWord.h:119
nullptr float
#define ccl_device_noinline_cpu
#define ccl_device_inline
#define CCL_NAMESPACE_END
ccl_device_forceinline float3 make_float3(const float x, const float y, const float z)
#define fmodf(x, y)
ccl_device_forceinline int4 make_int4(const int x, const int y, const int z, const int w)
#define select(A, B, C)
ccl_device_inline uint hash_uint4(const uint kx, const uint ky, const uint kz, const uint kw)
Definition hash.h:168
ccl_device_inline uint hash_uint2(const uint kx, const uint ky)
Definition hash.h:139
ccl_device_inline uint hash_uint(const uint kx)
Definition hash.h:126
ccl_device_inline uint hash_uint3(const uint kx, const uint ky, const uint kz)
Definition hash.h:153
ccl_device_inline float floorfrac(const float x, ccl_private int *i)
Definition math_base.h:417
CCL_NAMESPACE_BEGIN ccl_device_inline float madd(const float a, const float b, const float c)
Definition math_fast.h:35
ccl_device_inline float2 fmod(const float2 a, const float b)
ccl_device_inline float2 one_float2()
Definition math_float2.h:18
ccl_device_inline float2 fabs(const float2 a)
ccl_device_inline float2 mask(const MaskType mask, const float2 a)
ccl_device_inline float3 one_float3()
Definition math_float3.h:26
ccl_device_inline float4 one_float4()
Definition math_float4.h:22
ccl_device_inline float snoise_1d(float p)
Definition noise.h:691
ccl_device float bi_mix(const float v0, const float v1, const float v2, const float v3, const float x, float y)
Definition noise.h:64
ccl_device float grad3(const int hash, const float x, float y, const float z)
Definition noise.h:142
ccl_device_noinline_cpu float perlin_2d(const float x, const float y)
Definition noise.h:160
ccl_device_noinline_cpu float perlin_3d(const float x, const float y, float z)
Definition noise.h:181
ccl_device_inline float noise_3d(const float3 p)
Definition noise.h:738
ccl_device_inline float snoise_2d(float2 p)
Definition noise.h:707
ccl_device float quad_mix(const float v0, float v1, float v2, float v3, float v4, float v5, float v6, float v7, float v8, float v9, float v10, float v11, float v12, float v13, float v14, float v15, const float x, const float y, const float z, const float w)
Definition noise.h:108
CCL_NAMESPACE_BEGIN ccl_device float fade(const float t)
Definition noise.h:18
ccl_device_inline float snoise_3d(float3 p)
Definition noise.h:725
ccl_device_inline float negate_if(const float val, const int condition)
Definition noise.h:23
ccl_device float grad1(const int hash, const float x)
Definition noise.h:28
ccl_device_inline float snoise_4d(float4 p)
Definition noise.h:743
ccl_device_inline float noise_4d(const float4 p)
Definition noise.h:756
ccl_device_inline float noise_scale2(const float result)
Definition noise.h:674
ccl_device_inline float noise_scale3(const float result)
Definition noise.h:679
ccl_device_noinline_cpu float perlin_1d(const float x)
Definition noise.h:35
ccl_device_inline float noise_scale1(const float result)
Definition noise.h:669
ccl_device_noinline_cpu float perlin_4d(const float x, const float y, float z, const float w)
Definition noise.h:209
ccl_device float tri_mix(const float v0, float v1, float v2, float v3, float v4, float v5, float v6, float v7, const float x, const float y, const float z)
Definition noise.h:89
ccl_device_inline float noise_scale4(const float result)
Definition noise.h:684
ccl_device float grad2(const int hash, const float x, float y)
Definition noise.h:134
ccl_device_inline float noise_2d(const float2 p)
Definition noise.h:720
ccl_device_inline float noise_1d(const float p)
Definition noise.h:702
ccl_device float grad4(const int hash, const float x, float y, const float z, float w)
Definition noise.h:151
#define hash
Definition noise_c.cc:154
BLI_INLINE float grad(int hash_val, float x, float y, float z)
Definition noise_c.cc:271
#define mix
#define fabsf
#define ccl_device
#define make_float2
#define make_float4
float x
float y
float z
Definition sky_math.h:136
float y
Definition sky_math.h:136
float x
Definition sky_math.h:136
float y
Definition sky_math.h:225
float z
Definition sky_math.h:225
float x
Definition sky_math.h:225
float w
Definition sky_math.h:225
ccl_device_inline vfloat8 make_vfloat8(const float f)
ccl_device_inline vint8 make_vint8(const vfloat8 f)