Blender V4.3
noise.h
Go to the documentation of this file.
1/* SPDX-FileCopyrightText: 2009-2010 Sony Pictures Imageworks Inc., et al. All Rights Reserved.
2 * SPDX-FileCopyrightText: 2011-2022 Blender Foundation
3 *
4 * SPDX-License-Identifier: BSD-3-Clause
5 *
6 * Adapted code from Open Shading Language. */
7
8#pragma once
9
11
12/* **** Perlin Noise **** */
13
14ccl_device float fade(float t)
15{
16 return t * t * t * (t * (t * 6.0f - 15.0f) + 10.0f);
17}
18
19ccl_device_inline float negate_if(float val, int condition)
20{
21 return (condition) ? -val : val;
22}
23
24ccl_device float grad1(int hash, float x)
25{
26 int h = hash & 15;
27 float g = 1 + (h & 7);
28 return negate_if(g, h & 8) * x;
29}
30
32{
33 int X;
34 float fx = floorfrac(x, &X);
35 float u = fade(fx);
36
37 return mix(grad1(hash_uint(X), fx), grad1(hash_uint(X + 1), fx - 1.0f), u);
38}
39
40/* 2D, 3D, and 4D noise can be accelerated using SSE, so we first check if
41 * SSE is supported, that is, if __KERNEL_SSE__ is defined. If it is not
42 * supported, we do a standard implementation, but if it is supported, we
43 * do an implementation using SSE intrinsics.
44 */
45#if !defined(__KERNEL_SSE__)
46
47/* ** Standard Implementation ** */
48
49/* Bilinear Interpolation:
50 *
51 * v2 v3
52 * @ + + + + @ y
53 * + + ^
54 * + + |
55 * + + |
56 * @ + + + + @ @------> x
57 * v0 v1
58 *
59 */
60ccl_device float bi_mix(float v0, float v1, float v2, float v3, float x, float y)
61{
62 float x1 = 1.0f - x;
63 return (1.0f - y) * (v0 * x1 + v1 * x) + y * (v2 * x1 + v3 * x);
64}
65
66/* Trilinear Interpolation:
67 *
68 * v6 v7
69 * @ + + + + + + @
70 * +\ +\
71 * + \ + \
72 * + \ + \
73 * + \ v4 + \ v5
74 * + @ + + + +++ + @ z
75 * + + + + y ^
76 * v2 @ + +++ + + + @ v3 + \ |
77 * \ + \ + \ |
78 * \ + \ + \|
79 * \ + \ + +---------> x
80 * \+ \+
81 * @ + + + + + + @
82 * v0 v1
83 */
84ccl_device float tri_mix(float v0,
85 float v1,
86 float v2,
87 float v3,
88 float v4,
89 float v5,
90 float v6,
91 float v7,
92 float x,
93 float y,
94 float z)
95{
96 float x1 = 1.0f - x;
97 float y1 = 1.0f - y;
98 float z1 = 1.0f - z;
99 return z1 * (y1 * (v0 * x1 + v1 * x) + y * (v2 * x1 + v3 * x)) +
100 z * (y1 * (v4 * x1 + v5 * x) + y * (v6 * x1 + v7 * x));
101}
102
103ccl_device float quad_mix(float v0,
104 float v1,
105 float v2,
106 float v3,
107 float v4,
108 float v5,
109 float v6,
110 float v7,
111 float v8,
112 float v9,
113 float v10,
114 float v11,
115 float v12,
116 float v13,
117 float v14,
118 float v15,
119 float x,
120 float y,
121 float z,
122 float w)
123{
124 return mix(tri_mix(v0, v1, v2, v3, v4, v5, v6, v7, x, y, z),
125 tri_mix(v8, v9, v10, v11, v12, v13, v14, v15, x, y, z),
126 w);
127}
128
129ccl_device float grad2(int hash, float x, float y)
130{
131 int h = hash & 7;
132 float u = h < 4 ? x : y;
133 float v = 2.0f * (h < 4 ? y : x);
134 return negate_if(u, h & 1) + negate_if(v, h & 2);
135}
136
137ccl_device float grad3(int hash, float x, float y, float z)
138{
139 int h = hash & 15;
140 float u = h < 8 ? x : y;
141 float vt = ((h == 12) || (h == 14)) ? x : z;
142 float v = h < 4 ? y : vt;
143 return negate_if(u, h & 1) + negate_if(v, h & 2);
144}
145
146ccl_device float grad4(int hash, float x, float y, float z, float w)
147{
148 int h = hash & 31;
149 float u = h < 24 ? x : y;
150 float v = h < 16 ? y : z;
151 float s = h < 8 ? z : w;
152 return negate_if(u, h & 1) + negate_if(v, h & 2) + negate_if(s, h & 4);
153}
154
155ccl_device_noinline_cpu float perlin_2d(float x, float y)
156{
157 int X;
158 int Y;
159
160 float fx = floorfrac(x, &X);
161 float fy = floorfrac(y, &Y);
162
163 float u = fade(fx);
164 float v = fade(fy);
165
166 float r = bi_mix(grad2(hash_uint2(X, Y), fx, fy),
167 grad2(hash_uint2(X + 1, Y), fx - 1.0f, fy),
168 grad2(hash_uint2(X, Y + 1), fx, fy - 1.0f),
169 grad2(hash_uint2(X + 1, Y + 1), fx - 1.0f, fy - 1.0f),
170 u,
171 v);
172
173 return r;
174}
175
176ccl_device_noinline_cpu float perlin_3d(float x, float y, float z)
177{
178 int X;
179 int Y;
180 int Z;
181
182 float fx = floorfrac(x, &X);
183 float fy = floorfrac(y, &Y);
184 float fz = floorfrac(z, &Z);
185
186 float u = fade(fx);
187 float v = fade(fy);
188 float w = fade(fz);
189
190 float r = tri_mix(grad3(hash_uint3(X, Y, Z), fx, fy, fz),
191 grad3(hash_uint3(X + 1, Y, Z), fx - 1.0f, fy, fz),
192 grad3(hash_uint3(X, Y + 1, Z), fx, fy - 1.0f, fz),
193 grad3(hash_uint3(X + 1, Y + 1, Z), fx - 1.0f, fy - 1.0f, fz),
194 grad3(hash_uint3(X, Y, Z + 1), fx, fy, fz - 1.0f),
195 grad3(hash_uint3(X + 1, Y, Z + 1), fx - 1.0f, fy, fz - 1.0f),
196 grad3(hash_uint3(X, Y + 1, Z + 1), fx, fy - 1.0f, fz - 1.0f),
197 grad3(hash_uint3(X + 1, Y + 1, Z + 1), fx - 1.0f, fy - 1.0f, fz - 1.0f),
198 u,
199 v,
200 w);
201 return r;
202}
203
204ccl_device_noinline_cpu float perlin_4d(float x, float y, float z, float w)
205{
206 int X;
207 int Y;
208 int Z;
209 int W;
210
211 float fx = floorfrac(x, &X);
212 float fy = floorfrac(y, &Y);
213 float fz = floorfrac(z, &Z);
214 float fw = floorfrac(w, &W);
215
216 float u = fade(fx);
217 float v = fade(fy);
218 float t = fade(fz);
219 float s = fade(fw);
220
221 float r = quad_mix(
222 grad4(hash_uint4(X, Y, Z, W), fx, fy, fz, fw),
223 grad4(hash_uint4(X + 1, Y, Z, W), fx - 1.0f, fy, fz, fw),
224 grad4(hash_uint4(X, Y + 1, Z, W), fx, fy - 1.0f, fz, fw),
225 grad4(hash_uint4(X + 1, Y + 1, Z, W), fx - 1.0f, fy - 1.0f, fz, fw),
226 grad4(hash_uint4(X, Y, Z + 1, W), fx, fy, fz - 1.0f, fw),
227 grad4(hash_uint4(X + 1, Y, Z + 1, W), fx - 1.0f, fy, fz - 1.0f, fw),
228 grad4(hash_uint4(X, Y + 1, Z + 1, W), fx, fy - 1.0f, fz - 1.0f, fw),
229 grad4(hash_uint4(X + 1, Y + 1, Z + 1, W), fx - 1.0f, fy - 1.0f, fz - 1.0f, fw),
230 grad4(hash_uint4(X, Y, Z, W + 1), fx, fy, fz, fw - 1.0f),
231 grad4(hash_uint4(X + 1, Y, Z, W + 1), fx - 1.0f, fy, fz, fw - 1.0f),
232 grad4(hash_uint4(X, Y + 1, Z, W + 1), fx, fy - 1.0f, fz, fw - 1.0f),
233 grad4(hash_uint4(X + 1, Y + 1, Z, W + 1), fx - 1.0f, fy - 1.0f, fz, fw - 1.0f),
234 grad4(hash_uint4(X, Y, Z + 1, W + 1), fx, fy, fz - 1.0f, fw - 1.0f),
235 grad4(hash_uint4(X + 1, Y, Z + 1, W + 1), fx - 1.0f, fy, fz - 1.0f, fw - 1.0f),
236 grad4(hash_uint4(X, Y + 1, Z + 1, W + 1), fx, fy - 1.0f, fz - 1.0f, fw - 1.0f),
237 grad4(hash_uint4(X + 1, Y + 1, Z + 1, W + 1), fx - 1.0f, fy - 1.0f, fz - 1.0f, fw - 1.0f),
238 u,
239 v,
240 t,
241 s);
242
243 return r;
244}
245
246#else /* SSE is supported. */
247
248/* ** SSE Implementation ** */
249
250/* SSE Bilinear Interpolation:
251 *
252 * The function takes two float4 inputs:
253 * - p : Contains the values at the points (v0, v1, v2, v3).
254 * - f : Contains the values (x, y, _, _). The third and fourth values are unused.
255 *
256 * The interpolation is done in two steps:
257 * 1. Interpolate (v0, v1) and (v2, v3) along the x axis to get g (g0, g1).
258 * (v2, v3) is generated by moving v2 and v3 to the first and second
259 * places of the float4 using the shuffle mask <2, 3, 2, 3>. The third and
260 * fourth values are unused.
261 * 2. Interpolate g0 and g1 along the y axis to get the final value.
262 * g1 is generated by populating an float4 with the second value of g.
263 * Only the first value is important in the final float4.
264 *
265 * v1 v3 g1
266 * @ + + + + @ @ y
267 * + + (1) + (2) ^
268 * + + ---> + ---> final |
269 * + + + |
270 * @ + + + + @ @ @------> x
271 * v0 v2 g0
272 *
273 */
274ccl_device_inline float4 bi_mix(float4 p, float4 f)
275{
276 float4 g = mix(p, shuffle<2, 3, 2, 3>(p), shuffle<0>(f));
277 return mix(g, shuffle<1>(g), shuffle<1>(f));
278}
279
280ccl_device_inline float4 fade(const float4 t)
281{
282 float4 a = madd(t, make_float4(6.0f), make_float4(-15.0f));
283 float4 b = madd(t, a, make_float4(10.0f));
284 return (t * t) * (t * b);
285}
286
287/* Negate val if the nth bit of h is 1. */
288# define negate_if_nth_bit(val, h, n) ((val) ^ cast(((h) & (1 << (n))) << (31 - (n))))
289
290ccl_device_inline float4 grad(const int4 hash, const float4 x, const float4 y)
291{
292 int4 h = hash & 7;
293 float4 u = select(h < 4, x, y);
294 float4 v = 2.0f * select(h < 4, y, x);
295 return negate_if_nth_bit(u, h, 0) + negate_if_nth_bit(v, h, 1);
296}
297
298/* We use SSE to compute and interpolate 4 gradients at once:
299 *
300 * Point Offset from v0
301 * v0 (0, 0)
302 * v1 (0, 1)
303 * v2 (1, 0) (0, 1, 0, 1) = shuffle<0, 2, 0, 2>(shuffle<1, 1, 1, 1>(V, V + 1))
304 * v3 (1, 1) ^
305 * | |__________| (0, 0, 1, 1) = shuffle<0, 0, 0, 0>(V, V + 1)
306 * | ^
307 * |__________________________|
308 *
309 */
310ccl_device_noinline_cpu float perlin_2d(float x, float y)
311{
312 int4 XY;
313 float4 fxy = floorfrac(make_float4(x, y, 0.0f, 0.0f), &XY);
314 float4 uv = fade(fxy);
315
316 int4 XY1 = XY + make_int4(1);
317 int4 X = shuffle<0, 0, 0, 0>(XY, XY1);
318 int4 Y = shuffle<0, 2, 0, 2>(shuffle<1, 1, 1, 1>(XY, XY1));
319
320 int4 h = hash_int4_2(X, Y);
321
322 float4 fxy1 = fxy - make_float4(1.0f);
323 float4 fx = shuffle<0, 0, 0, 0>(fxy, fxy1);
324 float4 fy = shuffle<0, 2, 0, 2>(shuffle<1, 1, 1, 1>(fxy, fxy1));
325
326 float4 g = grad(h, fx, fy);
327
328 return extract<0>(bi_mix(g, uv));
329}
330
331/* SSE Trilinear Interpolation:
332 *
333 * The function takes three float4 inputs:
334 * - p : Contains the values at the points (v0, v1, v2, v3).
335 * - q : Contains the values at the points (v4, v5, v6, v7).
336 * - f : Contains the values (x, y, z, _). The fourth value is unused.
337 *
338 * The interpolation is done in three steps:
339 * 1. Interpolate p and q along the x axis to get s (s0, s1, s2, s3).
340 * 2. Interpolate (s0, s1) and (s2, s3) along the y axis to get g (g0, g1).
341 * (s2, s3) is generated by moving v2 and v3 to the first and second
342 * places of the float4 using the shuffle mask <2, 3, 2, 3>. The third and
343 * fourth values are unused.
344 * 3. Interpolate g0 and g1 along the z axis to get the final value.
345 * g1 is generated by populating an float4 with the second value of g.
346 * Only the first value is important in the final float4.
347 *
348 * v3 v7
349 * @ + + + + + + @ s3 @
350 * +\ +\ +\
351 * + \ + \ + \
352 * + \ + \ + \ g1
353 * + \ v1 + \ v5 + \ s1 @
354 * + @ + + + +++ + @ + @ + z
355 * + + + + (1) + + (2) + (3) y ^
356 * v2 @ + +++ + + + @ v6 + ---> s2 @ + ---> + ---> final \ |
357 * \ + \ + \ + + \ |
358 * \ + \ + \ + + \|
359 * \ + \ + \ + @ +---------> x
360 * \+ \+ \+ g0
361 * @ + + + + + + @ @
362 * v0 v4 s0
363 */
364ccl_device_inline float4 tri_mix(float4 p, float4 q, float4 f)
365{
366 float4 s = mix(p, q, shuffle<0>(f));
367 float4 g = mix(s, shuffle<2, 3, 2, 3>(s), shuffle<1>(f));
368 return mix(g, shuffle<1>(g), shuffle<2>(f));
369}
370
371/* 3D and 4D noise can be accelerated using AVX, so we first check if AVX
372 * is supported, that is, if __KERNEL_AVX__ is defined. If it is not
373 * supported, we do an SSE implementation, but if it is supported,
374 * we do an implementation using AVX intrinsics.
375 */
376# if !defined(__KERNEL_AVX2__)
377
378ccl_device_inline float4 grad(const int4 hash, const float4 x, const float4 y, const float4 z)
379{
380 int4 h = hash & 15;
381 float4 u = select(h < 8, x, y);
382 float4 vt = select((h == 12) | (h == 14), x, z);
383 float4 v = select(h < 4, y, vt);
384 return negate_if_nth_bit(u, h, 0) + negate_if_nth_bit(v, h, 1);
385}
386
388grad(const int4 hash, const float4 x, const float4 y, const float4 z, const float4 w)
389{
390 int4 h = hash & 31;
391 float4 u = select(h < 24, x, y);
392 float4 v = select(h < 16, y, z);
393 float4 s = select(h < 8, z, w);
394 return negate_if_nth_bit(u, h, 0) + negate_if_nth_bit(v, h, 1) + negate_if_nth_bit(s, h, 2);
395}
396
397/* SSE Quadrilinear Interpolation:
398 *
399 * Quadrilinear interpolation is as simple as a linear interpolation
400 * between two trilinear interpolations.
401 *
402 */
403ccl_device_inline float4 quad_mix(float4 p, float4 q, float4 r, float4 s, float4 f)
404{
405 return mix(tri_mix(p, q, f), tri_mix(r, s, f), shuffle<3>(f));
406}
407
408/* We use SSE to compute and interpolate 4 gradients at once. Since we have 8
409 * gradients in 3D, we need to compute two sets of gradients at the points:
410 *
411 * Point Offset from v0
412 * v0 (0, 0, 0)
413 * v1 (0, 0, 1)
414 * v2 (0, 1, 0) (0, 1, 0, 1) = shuffle<0, 2, 0, 2>(shuffle<2, 2, 2, 2>(V, V + 1))
415 * v3 (0, 1, 1) ^
416 * | |__________| (0, 0, 1, 1) = shuffle<1, 1, 1, 1>(V, V + 1)
417 * | ^
418 * |__________________________|
419 *
420 * Point Offset from v0
421 * v4 (1, 0, 0)
422 * v5 (1, 0, 1)
423 * v6 (1, 1, 0)
424 * v7 (1, 1, 1)
425 *
426 */
427ccl_device_noinline_cpu float perlin_3d(float x, float y, float z)
428{
429 int4 XYZ;
430 float4 fxyz = floorfrac(make_float4(x, y, z, 0.0f), &XYZ);
431 float4 uvw = fade(fxyz);
432
433 int4 XYZ1 = XYZ + make_int4(1);
434 int4 Y = shuffle<1, 1, 1, 1>(XYZ, XYZ1);
435 int4 Z = shuffle<0, 2, 0, 2>(shuffle<2, 2, 2, 2>(XYZ, XYZ1));
436
437 int4 h1 = hash_int4_3(shuffle<0>(XYZ), Y, Z);
438 int4 h2 = hash_int4_3(shuffle<0>(XYZ1), Y, Z);
439
440 float4 fxyz1 = fxyz - make_float4(1.0f);
441 float4 fy = shuffle<1, 1, 1, 1>(fxyz, fxyz1);
442 float4 fz = shuffle<0, 2, 0, 2>(shuffle<2, 2, 2, 2>(fxyz, fxyz1));
443
444 float4 g1 = grad(h1, shuffle<0>(fxyz), fy, fz);
445 float4 g2 = grad(h2, shuffle<0>(fxyz1), fy, fz);
446
447 return extract<0>(tri_mix(g1, g2, uvw));
448}
449
450/* We use SSE to compute and interpolate 4 gradients at once. Since we have 16
451 * gradients in 4D, we need to compute four sets of gradients at the points:
452 *
453 * Point Offset from v0
454 * v0 (0, 0, 0, 0)
455 * v1 (0, 0, 1, 0)
456 * v2 (0, 1, 0, 0) (0, 1, 0, 1) = shuffle<0, 2, 0, 2>(shuffle<2, 2, 2, 2>(V, V + 1))
457 * v3 (0, 1, 1, 0) ^
458 * | |________| (0, 0, 1, 1) = shuffle<1, 1, 1, 1>(V, V + 1)
459 * | ^
460 * |_______________________|
461 *
462 * Point Offset from v0
463 * v4 (1, 0, 0, 0)
464 * v5 (1, 0, 1, 0)
465 * v6 (1, 1, 0, 0)
466 * v7 (1, 1, 1, 0)
467 *
468 * Point Offset from v0
469 * v8 (0, 0, 0, 1)
470 * v9 (0, 0, 1, 1)
471 * v10 (0, 1, 0, 1)
472 * v11 (0, 1, 1, 1)
473 *
474 * Point Offset from v0
475 * v12 (1, 0, 0, 1)
476 * v13 (1, 0, 1, 1)
477 * v14 (1, 1, 0, 1)
478 * v15 (1, 1, 1, 1)
479 *
480 */
481ccl_device_noinline_cpu float perlin_4d(float x, float y, float z, float w)
482{
483 int4 XYZW;
484 float4 fxyzw = floorfrac(make_float4(x, y, z, w), &XYZW);
485 float4 uvws = fade(fxyzw);
486
487 int4 XYZW1 = XYZW + make_int4(1);
488 int4 Y = shuffle<1, 1, 1, 1>(XYZW, XYZW1);
489 int4 Z = shuffle<0, 2, 0, 2>(shuffle<2, 2, 2, 2>(XYZW, XYZW1));
490
491 int4 h1 = hash_int4_4(shuffle<0>(XYZW), Y, Z, shuffle<3>(XYZW));
492 int4 h2 = hash_int4_4(shuffle<0>(XYZW1), Y, Z, shuffle<3>(XYZW));
493
494 int4 h3 = hash_int4_4(shuffle<0>(XYZW), Y, Z, shuffle<3>(XYZW1));
495 int4 h4 = hash_int4_4(shuffle<0>(XYZW1), Y, Z, shuffle<3>(XYZW1));
496
497 float4 fxyzw1 = fxyzw - make_float4(1.0f);
498 float4 fy = shuffle<1, 1, 1, 1>(fxyzw, fxyzw1);
499 float4 fz = shuffle<0, 2, 0, 2>(shuffle<2, 2, 2, 2>(fxyzw, fxyzw1));
500
501 float4 g1 = grad(h1, shuffle<0>(fxyzw), fy, fz, shuffle<3>(fxyzw));
502 float4 g2 = grad(h2, shuffle<0>(fxyzw1), fy, fz, shuffle<3>(fxyzw));
503
504 float4 g3 = grad(h3, shuffle<0>(fxyzw), fy, fz, shuffle<3>(fxyzw1));
505 float4 g4 = grad(h4, shuffle<0>(fxyzw1), fy, fz, shuffle<3>(fxyzw1));
506
507 return extract<0>(quad_mix(g1, g2, g3, g4, uvws));
508}
509
510# else /* AVX is supported. */
511
512/* AVX Implementation */
513
514ccl_device_inline vfloat8 grad(const vint8 hash, const vfloat8 x, const vfloat8 y, const vfloat8 z)
515{
516 vint8 h = hash & 15;
517 vfloat8 u = select(h < 8, x, y);
518 vfloat8 vt = select((h == 12) | (h == 14), x, z);
519 vfloat8 v = select(h < 4, y, vt);
520 return negate_if_nth_bit(u, h, 0) + negate_if_nth_bit(v, h, 1);
521}
522
523ccl_device_inline vfloat8
524grad(const vint8 hash, const vfloat8 x, const vfloat8 y, const vfloat8 z, const vfloat8 w)
525{
526 vint8 h = hash & 31;
527 vfloat8 u = select(h < 24, x, y);
528 vfloat8 v = select(h < 16, y, z);
529 vfloat8 s = select(h < 8, z, w);
530 return negate_if_nth_bit(u, h, 0) + negate_if_nth_bit(v, h, 1) + negate_if_nth_bit(s, h, 2);
531}
532
533/* SSE Quadrilinear Interpolation:
534 *
535 * The interpolation is done in two steps:
536 * 1. Interpolate p and q along the w axis to get s.
537 * 2. Trilinearly interpolate (s0, s1, s2, s3) and (s4, s5, s6, s7) to get the final
538 * value. (s0, s1, s2, s3) and (s4, s5, s6, s7) are generated by extracting the
539 * low and high float4 from s.
540 *
541 */
542ccl_device_inline float4 quad_mix(vfloat8 p, vfloat8 q, float4 f)
543{
544 float4 fv = shuffle<3>(f);
545 vfloat8 s = mix(p, q, make_vfloat8(fv, fv));
546 return tri_mix(low(s), high(s), f);
547}
548
549/* We use AVX to compute and interpolate 8 gradients at once.
550 *
551 * Point Offset from v0
552 * v0 (0, 0, 0)
553 * v1 (0, 0, 1) The full AVX type is computed by inserting the following
554 * v2 (0, 1, 0) SSE types into both the low and high parts of the AVX.
555 * v3 (0, 1, 1)
556 * v4 (1, 0, 0)
557 * v5 (1, 0, 1) (0, 1, 0, 1) = shuffle<0, 2, 0, 2>(shuffle<2, 2, 2, 2>(V, V + 1))
558 * v6 (1, 1, 0) ^
559 * v7 (1, 1, 1) |
560 * | |__________| (0, 0, 1, 1) = shuffle<1, 1, 1, 1>(V, V + 1)
561 * | ^
562 * |__________________________|
563 *
564 */
565ccl_device_noinline_cpu float perlin_3d(float x, float y, float z)
566{
567 int4 XYZ;
568 float4 fxyz = floorfrac(make_float4(x, y, z, 0.0f), &XYZ);
569 float4 uvw = fade(fxyz);
570
571 int4 XYZ1 = XYZ + make_int4(1);
572 int4 X = shuffle<0>(XYZ);
573 int4 X1 = shuffle<0>(XYZ1);
574 int4 Y = shuffle<1, 1, 1, 1>(XYZ, XYZ1);
575 int4 Z = shuffle<0, 2, 0, 2>(shuffle<2, 2, 2, 2>(XYZ, XYZ1));
576
577 vint8 h = hash_int8_3(make_vint8(X, X1), make_vint8(Y, Y), make_vint8(Z, Z));
578
579 float4 fxyz1 = fxyz - make_float4(1.0f);
580 float4 fx = shuffle<0>(fxyz);
581 float4 fx1 = shuffle<0>(fxyz1);
582 float4 fy = shuffle<1, 1, 1, 1>(fxyz, fxyz1);
583 float4 fz = shuffle<0, 2, 0, 2>(shuffle<2, 2, 2, 2>(fxyz, fxyz1));
584
585 vfloat8 g = grad(h, make_vfloat8(fx, fx1), make_vfloat8(fy, fy), make_vfloat8(fz, fz));
586
587 return extract<0>(tri_mix(low(g), high(g), uvw));
588}
589
590/* We use AVX to compute and interpolate 8 gradients at once. Since we have 16
591 * gradients in 4D, we need to compute two sets of gradients at the points:
592 *
593 * Point Offset from v0
594 * v0 (0, 0, 0, 0)
595 * v1 (0, 0, 1, 0) The full AVX type is computed by inserting the following
596 * v2 (0, 1, 0, 0) SSE types into both the low and high parts of the AVX.
597 * v3 (0, 1, 1, 0)
598 * v4 (1, 0, 0, 0)
599 * v5 (1, 0, 1, 0) (0, 1, 0, 1) = shuffle<0, 2, 0, 2>(shuffle<2, 2, 2, 2>(V, V + 1))
600 * v6 (1, 1, 0, 0) ^
601 * v7 (1, 1, 1, 0) |
602 * | |________| (0, 0, 1, 1) = shuffle<1, 1, 1, 1>(V, V + 1)
603 * | ^
604 * |_______________________|
605 *
606 * Point Offset from v0
607 * v8 (0, 0, 0, 1)
608 * v9 (0, 0, 1, 1)
609 * v10 (0, 1, 0, 1)
610 * v11 (0, 1, 1, 1)
611 * v12 (1, 0, 0, 1)
612 * v13 (1, 0, 1, 1)
613 * v14 (1, 1, 0, 1)
614 * v15 (1, 1, 1, 1)
615 *
616 */
617ccl_device_noinline_cpu float perlin_4d(float x, float y, float z, float w)
618{
619 int4 XYZW;
620 float4 fxyzw = floorfrac(make_float4(x, y, z, w), &XYZW);
621 float4 uvws = fade(fxyzw);
622
623 int4 XYZW1 = XYZW + make_int4(1);
624 int4 X = shuffle<0>(XYZW);
625 int4 X1 = shuffle<0>(XYZW1);
626 int4 Y = shuffle<1, 1, 1, 1>(XYZW, XYZW1);
627 int4 Z = shuffle<0, 2, 0, 2>(shuffle<2, 2, 2, 2>(XYZW, XYZW1));
628 int4 W = shuffle<3>(XYZW);
629 int4 W1 = shuffle<3>(XYZW1);
630
631 vint8 h1 = hash_int8_4(make_vint8(X, X1), make_vint8(Y, Y), make_vint8(Z, Z), make_vint8(W, W));
632 vint8 h2 = hash_int8_4(
633 make_vint8(X, X1), make_vint8(Y, Y), make_vint8(Z, Z), make_vint8(W1, W1));
634
635 float4 fxyzw1 = fxyzw - make_float4(1.0f);
636 float4 fx = shuffle<0>(fxyzw);
637 float4 fx1 = shuffle<0>(fxyzw1);
638 float4 fy = shuffle<1, 1, 1, 1>(fxyzw, fxyzw1);
639 float4 fz = shuffle<0, 2, 0, 2>(shuffle<2, 2, 2, 2>(fxyzw, fxyzw1));
640 float4 fw = shuffle<3>(fxyzw);
641 float4 fw1 = shuffle<3>(fxyzw1);
642
643 vfloat8 g1 = grad(
644 h1, make_vfloat8(fx, fx1), make_vfloat8(fy, fy), make_vfloat8(fz, fz), make_vfloat8(fw, fw));
645 vfloat8 g2 = grad(h2,
646 make_vfloat8(fx, fx1),
647 make_vfloat8(fy, fy),
648 make_vfloat8(fz, fz),
649 make_vfloat8(fw1, fw1));
650
651 return extract<0>(quad_mix(g1, g2, uvws));
652}
653# endif
654
655# undef negate_if_nth_bit
656
657#endif
658
659/* Remap the output of noise to a predictable range [-1, 1].
660 * The scale values were computed experimentally by the OSL developers.
661 */
662
664{
665 return 0.2500f * result;
666}
667
669{
670 return 0.6616f * result;
671}
672
674{
675 return 0.9820f * result;
676}
677
679{
680 return 0.8344f * result;
681}
682
683/* Safe Signed And Unsigned Noise */
684
686{
687 float precision_correction = 0.5f * float(fabsf(p) >= 1000000.0f);
688 /* Repeat Perlin noise texture every 100000.0 on each axis to prevent floating point
689 * representation issues. */
690 /* The 1D variant of fmod is called fmodf. */
691 p = fmodf(p, 100000.0f) + precision_correction;
692
693 return noise_scale1(perlin_1d(p));
694}
695
697{
698 return 0.5f * snoise_1d(p) + 0.5f;
699}
700
702{
703 float2 precision_correction = 0.5f * make_float2(float(fabsf(p.x) >= 1000000.0f),
704 float(fabsf(p.y) >= 1000000.0f));
705 /* Repeat Perlin noise texture every 100000.0f on each axis to prevent floating point
706 * representation issues. This causes discontinuities every 100000.0f, however at such scales
707 * this usually shouldn't be noticeable. */
708 p = fmod(p, 100000.0f) + precision_correction;
709
710 return noise_scale2(perlin_2d(p.x, p.y));
711}
712
714{
715 return 0.5f * snoise_2d(p) + 0.5f;
716}
717
719{
720 float3 precision_correction = 0.5f * make_float3(float(fabsf(p.x) >= 1000000.0f),
721 float(fabsf(p.y) >= 1000000.0f),
722 float(fabsf(p.z) >= 1000000.0f));
723 /* Repeat Perlin noise texture every 100000.0f on each axis to prevent floating point
724 * representation issues. This causes discontinuities every 100000.0f, however at such scales
725 * this usually shouldn't be noticeable. */
726 p = fmod(p, 100000.0f) + precision_correction;
727
728 return noise_scale3(perlin_3d(p.x, p.y, p.z));
729}
730
732{
733 return 0.5f * snoise_3d(p) + 0.5f;
734}
735
737{
738 float4 precision_correction = 0.5f * make_float4(float(fabsf(p.x) >= 1000000.0f),
739 float(fabsf(p.y) >= 1000000.0f),
740 float(fabsf(p.z) >= 1000000.0f),
741 float(fabsf(p.w) >= 1000000.0f));
742 /* Repeat Perlin noise texture every 100000.0f on each axis to prevent floating point
743 * representation issues. This causes discontinuities every 100000.0f, however at such scales
744 * this usually shouldn't be noticeable. */
745 p = fmod(p, 100000.0f) + precision_correction;
746
747 return noise_scale4(perlin_4d(p.x, p.y, p.z, p.w));
748}
749
751{
752 return 0.5f * snoise_4d(p) + 0.5f;
753}
754
#define X
#define Z
#define Y
in reality light always falls off quadratically Particle Retrieve the data of the particle that spawned the object for example to give variation to multiple instances of an object Point Retrieve information about points in a point cloud Retrieve the edges of an object as it appears to Cycles topology will always appear triangulated Convert a blackbody temperature to an RGB value Normal Generate a perturbed normal from an RGB normal map image Typically used for faking highly detailed surfaces Generate an OSL shader from a file or text data block Image Sample an image file as a texture Gabor Generate Gabor noise Gradient Generate interpolated color and intensity values based on the input vector Magic Generate a psychedelic color texture Voronoi Generate Worley noise based on the distance to random points Typically used to generate textures such as or biological cells Brick Generate a procedural texture producing bricks Texture Retrieve multiple types of texture coordinates nTypically used as inputs for texture nodes Vector Convert a or normal between and object coordinate space Combine Create a color from its and value channels Color Retrieve a color or the default fallback if none is specified Separate XYZ
#define X1
Definition RandGen.cpp:24
__forceinline float extract(const int4 &b)
Definition binning.cpp:27
ATTR_WARN_UNUSED_RESULT const BMVert * v2
ATTR_WARN_UNUSED_RESULT const BMVert * v
#define XY(_x, _y)
SIMD_FORCE_INLINE const btScalar & z() const
Return the z value.
Definition btQuadWord.h:117
SIMD_FORCE_INLINE const btScalar & w() const
Return the w value.
Definition btQuadWord.h:119
local_group_size(16, 16) .push_constant(Type b
#define ccl_device
#define ccl_device_noinline_cpu
#define ccl_device_inline
#define CCL_NAMESPACE_END
ccl_device_forceinline float4 make_float4(const float x, const float y, const float z, const float w)
ccl_device_forceinline float3 make_float3(const float x, const float y, const float z)
#define fmodf(x, y)
ccl_device_forceinline float2 make_float2(const float x, const float y)
#define fabsf(x)
ccl_device_forceinline int4 make_int4(const int x, const int y, const int z, const int w)
draw_view in_light_buf[] float
ccl_device_inline uint hash_uint2(uint kx, uint ky)
Definition hash.h:89
ccl_device_inline uint hash_uint3(uint kx, uint ky, uint kz)
Definition hash.h:101
ccl_device_inline uint hash_uint4(uint kx, uint ky, uint kz, uint kw)
Definition hash.h:114
ccl_device_inline uint hash_uint(uint kx)
Definition hash.h:78
#define mix(a, b, c)
Definition hash.h:36
CCL_NAMESPACE_BEGIN ccl_device_inline float madd(const float a, const float b, const float c)
Definition math_fast.h:29
ccl_device_inline float2 fmod(const float2 a, const float b)
ccl_device_inline float4 select(const int4 mask, const float4 a, const float4 b)
VecBase< float, 4 > float4
#define hash
Definition noise.c:154
BLI_INLINE float grad(int hash_val, float x, float y, float z)
Definition noise.c:271
ccl_device float grad4(int hash, float x, float y, float z, float w)
Definition noise.h:146
ccl_device float quad_mix(float v0, float v1, float v2, float v3, float v4, float v5, float v6, float v7, float v8, float v9, float v10, float v11, float v12, float v13, float v14, float v15, float x, float y, float z, float w)
Definition noise.h:103
ccl_device_inline float snoise_1d(float p)
Definition noise.h:685
ccl_device float grad3(int hash, float x, float y, float z)
Definition noise.h:137
ccl_device_noinline_cpu float perlin_2d(float x, float y)
Definition noise.h:155
ccl_device_inline float noise_scale4(float result)
Definition noise.h:678
ccl_device float bi_mix(float v0, float v1, float v2, float v3, float x, float y)
Definition noise.h:60
ccl_device_inline float noise_2d(float2 p)
Definition noise.h:713
ccl_device_inline float snoise_2d(float2 p)
Definition noise.h:701
ccl_device float tri_mix(float v0, float v1, float v2, float v3, float v4, float v5, float v6, float v7, float x, float y, float z)
Definition noise.h:84
ccl_device_inline float snoise_3d(float3 p)
Definition noise.h:718
ccl_device_inline float snoise_4d(float4 p)
Definition noise.h:736
ccl_device_inline float noise_3d(float3 p)
Definition noise.h:731
ccl_device_inline float noise_4d(float4 p)
Definition noise.h:750
ccl_device_inline float noise_scale3(float result)
Definition noise.h:673
ccl_device_inline float noise_scale2(float result)
Definition noise.h:668
CCL_NAMESPACE_BEGIN ccl_device float fade(float t)
Definition noise.h:14
ccl_device float grad2(int hash, float x, float y)
Definition noise.h:129
ccl_device_inline float noise_1d(float p)
Definition noise.h:696
ccl_device_inline float negate_if(float val, int condition)
Definition noise.h:19
ccl_device_noinline_cpu float perlin_4d(float x, float y, float z, float w)
Definition noise.h:204
ccl_device float grad1(int hash, float x)
Definition noise.h:24
ccl_device_inline float noise_scale1(float result)
Definition noise.h:663
ccl_device_noinline_cpu float perlin_1d(float x)
Definition noise.h:31
ccl_device_noinline_cpu float perlin_3d(float x, float y, float z)
Definition noise.h:176
float x
float y
float z
Definition sky_float3.h:27
float y
Definition sky_float3.h:27
float x
Definition sky_float3.h:27
ccl_device_inline vfloat8 make_vfloat8(float f)
ccl_device_inline vint8 make_vint8(int a, int b, int c, int d, int e, int f, int g, int h)
ccl_device_inline float floorfrac(float x, ccl_private int *i)
Definition util/math.h:434