Blender V4.3
cycles/kernel/device/gpu/image.h
Go to the documentation of this file.
1/* SPDX-FileCopyrightText: 2017-2022 Blender Foundation
2 *
3 * SPDX-License-Identifier: Apache-2.0 */
4
5#pragma once
6
8
9#if !defined __KERNEL_METAL__
10# ifdef WITH_NANOVDB
11# include "kernel/util/nanovdb.h"
12# endif
13#endif
14
15ccl_device_inline float frac(float x, ccl_private int *ix)
16{
17 int i = float_to_int(x) - ((x < 0.0f) ? 1 : 0);
18 *ix = i;
19 return x - (float)i;
20}
21
22/* w0, w1, w2, and w3 are the four cubic B-spline basis functions. */
23ccl_device float cubic_w0(float a)
24{
25 return (1.0f / 6.0f) * (a * (a * (-a + 3.0f) - 3.0f) + 1.0f);
26}
27ccl_device float cubic_w1(float a)
28{
29 return (1.0f / 6.0f) * (a * a * (3.0f * a - 6.0f) + 4.0f);
30}
31ccl_device float cubic_w2(float a)
32{
33 return (1.0f / 6.0f) * (a * (a * (-3.0f * a + 3.0f) + 3.0f) + 1.0f);
34}
35ccl_device float cubic_w3(float a)
36{
37 return (1.0f / 6.0f) * (a * a * a);
38}
39
40/* g0 and g1 are the two amplitude functions. */
41ccl_device float cubic_g0(float a)
42{
43 return cubic_w0(a) + cubic_w1(a);
44}
45ccl_device float cubic_g1(float a)
46{
47 return cubic_w2(a) + cubic_w3(a);
48}
49
50/* h0 and h1 are the two offset functions */
51ccl_device float cubic_h0(float a)
52{
53 return (cubic_w1(a) / cubic_g0(a)) - 1.0f;
54}
55ccl_device float cubic_h1(float a)
56{
57 return (cubic_w3(a) / cubic_g1(a)) + 1.0f;
58}
59
60/* Fast bicubic texture lookup using 4 bilinear lookups, adapted from CUDA samples. */
61template<typename T>
63 float x,
64 float y)
65{
67
68 x = (x * info.width) - 0.5f;
69 y = (y * info.height) - 0.5f;
70
71 float px = floorf(x);
72 float py = floorf(y);
73 float fx = x - px;
74 float fy = y - py;
75
76 float g0x = cubic_g0(fx);
77 float g1x = cubic_g1(fx);
78 /* Note +0.5 offset to compensate for CUDA linear filtering convention. */
79 float x0 = (px + cubic_h0(fx) + 0.5f) / info.width;
80 float x1 = (px + cubic_h1(fx) + 0.5f) / info.width;
81 float y0 = (py + cubic_h0(fy) + 0.5f) / info.height;
82 float y1 = (py + cubic_h1(fy) + 0.5f) / info.height;
83
84 return cubic_g0(fy) * (g0x * ccl_gpu_tex_object_read_2D<T>(tex, x0, y0) +
85 g1x * ccl_gpu_tex_object_read_2D<T>(tex, x1, y0)) +
86 cubic_g1(fy) * (g0x * ccl_gpu_tex_object_read_2D<T>(tex, x0, y1) +
87 g1x * ccl_gpu_tex_object_read_2D<T>(tex, x1, y1));
88}
89
90/* Fast tricubic texture lookup using 8 trilinear lookups. */
91template<typename T>
93kernel_tex_image_interp_tricubic(ccl_global const TextureInfo &info, float x, float y, float z)
94{
96
97 x = (x * info.width) - 0.5f;
98 y = (y * info.height) - 0.5f;
99 z = (z * info.depth) - 0.5f;
100
101 float px = floorf(x);
102 float py = floorf(y);
103 float pz = floorf(z);
104 float fx = x - px;
105 float fy = y - py;
106 float fz = z - pz;
107
108 float g0x = cubic_g0(fx);
109 float g1x = cubic_g1(fx);
110 float g0y = cubic_g0(fy);
111 float g1y = cubic_g1(fy);
112 float g0z = cubic_g0(fz);
113 float g1z = cubic_g1(fz);
114
115 /* Note +0.5 offset to compensate for CUDA linear filtering convention. */
116 float x0 = (px + cubic_h0(fx) + 0.5f) / info.width;
117 float x1 = (px + cubic_h1(fx) + 0.5f) / info.width;
118 float y0 = (py + cubic_h0(fy) + 0.5f) / info.height;
119 float y1 = (py + cubic_h1(fy) + 0.5f) / info.height;
120 float z0 = (pz + cubic_h0(fz) + 0.5f) / info.depth;
121 float z1 = (pz + cubic_h1(fz) + 0.5f) / info.depth;
122
123 return g0z * (g0y * (g0x * ccl_gpu_tex_object_read_3D<T>(tex, x0, y0, z0) +
124 g1x * ccl_gpu_tex_object_read_3D<T>(tex, x1, y0, z0)) +
125 g1y * (g0x * ccl_gpu_tex_object_read_3D<T>(tex, x0, y1, z0) +
126 g1x * ccl_gpu_tex_object_read_3D<T>(tex, x1, y1, z0))) +
127 g1z * (g0y * (g0x * ccl_gpu_tex_object_read_3D<T>(tex, x0, y0, z1) +
128 g1x * ccl_gpu_tex_object_read_3D<T>(tex, x1, y0, z1)) +
129 g1y * (g0x * ccl_gpu_tex_object_read_3D<T>(tex, x0, y1, z1) +
130 g1x * ccl_gpu_tex_object_read_3D<T>(tex, x1, y1, z1)));
131}
132
133#ifdef WITH_NANOVDB
134template<typename OutT, typename Acc>
135ccl_device OutT
136kernel_tex_image_interp_trilinear_nanovdb(ccl_private Acc &acc, float x, float y, float z)
137{
138 int ix, iy, iz;
139 const float tx = frac(x - 0.5f, &ix);
140 const float ty = frac(y - 0.5f, &iy);
141 const float tz = frac(z - 0.5f, &iz);
142
143 return mix(mix(mix(OutT(acc.getValue(nanovdb::Coord(ix, iy, iz))),
144 OutT(acc.getValue(nanovdb::Coord(ix, iy, iz + 1))),
145 tz),
146 mix(OutT(acc.getValue(nanovdb::Coord(ix, iy + 1, iz + 1))),
147 OutT(acc.getValue(nanovdb::Coord(ix, iy + 1, iz))),
148 1.0f - tz),
149 ty),
150 mix(mix(OutT(acc.getValue(nanovdb::Coord(ix + 1, iy + 1, iz))),
151 OutT(acc.getValue(nanovdb::Coord(ix + 1, iy + 1, iz + 1))),
152 tz),
153 mix(OutT(acc.getValue(nanovdb::Coord(ix + 1, iy, iz + 1))),
154 OutT(acc.getValue(nanovdb::Coord(ix + 1, iy, iz))),
155 1.0f - tz),
156 1.0f - ty),
157 tx);
158}
159
160template<typename OutT, typename Acc>
161ccl_device OutT
162kernel_tex_image_interp_tricubic_nanovdb(ccl_private Acc &acc, float x, float y, float z)
163{
164 int ix, iy, iz;
165 int nix, niy, niz;
166 int pix, piy, piz;
167 int nnix, nniy, nniz;
168
169 /* A -0.5 offset is used to center the cubic samples around the sample point. */
170 const float tx = frac(x - 0.5f, &ix);
171 const float ty = frac(y - 0.5f, &iy);
172 const float tz = frac(z - 0.5f, &iz);
173
174 pix = ix - 1;
175 piy = iy - 1;
176 piz = iz - 1;
177 nix = ix + 1;
178 niy = iy + 1;
179 niz = iz + 1;
180 nnix = ix + 2;
181 nniy = iy + 2;
182 nniz = iz + 2;
183
184 const int xc[4] = {pix, ix, nix, nnix};
185 const int yc[4] = {piy, iy, niy, nniy};
186 const int zc[4] = {piz, iz, niz, nniz};
187 float u[4], v[4], w[4];
188
189 /* Some helper macros to keep code size reasonable.
190 * Lets the compiler inline all the matrix multiplications.
191 */
192# define SET_CUBIC_SPLINE_WEIGHTS(u, t) \
193 { \
194 u[0] = (((-1.0f / 6.0f) * t + 0.5f) * t - 0.5f) * t + (1.0f / 6.0f); \
195 u[1] = ((0.5f * t - 1.0f) * t) * t + (2.0f / 3.0f); \
196 u[2] = ((-0.5f * t + 0.5f) * t + 0.5f) * t + (1.0f / 6.0f); \
197 u[3] = (1.0f / 6.0f) * t * t * t; \
198 } \
199 (void)0
200
201# define DATA(x, y, z) (OutT(acc.getValue(nanovdb::Coord(xc[x], yc[y], zc[z]))))
202# define COL_TERM(col, row) \
203 (v[col] * (u[0] * DATA(0, col, row) + u[1] * DATA(1, col, row) + u[2] * DATA(2, col, row) + \
204 u[3] * DATA(3, col, row)))
205# define ROW_TERM(row) \
206 (w[row] * (COL_TERM(0, row) + COL_TERM(1, row) + COL_TERM(2, row) + COL_TERM(3, row)))
207
211
212 /* Actual interpolation. */
213 return ROW_TERM(0) + ROW_TERM(1) + ROW_TERM(2) + ROW_TERM(3);
214
215# undef COL_TERM
216# undef ROW_TERM
217# undef DATA
218# undef SET_CUBIC_SPLINE_WEIGHTS
219}
220
221# if defined(__KERNEL_METAL__)
222template<typename OutT, typename T>
223__attribute__((noinline)) OutT kernel_tex_image_interp_nanovdb(
224 ccl_global const TextureInfo &info, float x, float y, float z, uint interpolation)
225# else
226template<typename OutT, typename T>
227ccl_device_noinline OutT kernel_tex_image_interp_nanovdb(
228 ccl_global const TextureInfo &info, float x, float y, float z, uint interpolation)
229# endif
230{
231 using namespace nanovdb;
232
233 ccl_global NanoGrid<T> *const grid = (ccl_global NanoGrid<T> *)info.data;
234
235 switch (interpolation) {
237 ReadAccessor<T> acc(grid->tree().root());
238 const nanovdb::Coord coord((int32_t)floorf(x), (int32_t)floorf(y), (int32_t)floorf(z));
239 return OutT(acc.getValue(coord));
240 }
242 CachedReadAccessor<T> acc(grid->tree().root());
243 return kernel_tex_image_interp_trilinear_nanovdb<OutT>(acc, x, y, z);
244 }
245 default: {
246 CachedReadAccessor<T> acc(grid->tree().root());
247 return kernel_tex_image_interp_tricubic_nanovdb<OutT>(acc, x, y, z);
248 }
249 }
250}
251#endif
252
253ccl_device float4 kernel_tex_image_interp(KernelGlobals kg, int id, float x, float y)
254{
255 ccl_global const TextureInfo &info = kernel_data_fetch(texture_info, id);
256
257 /* float4, byte4, ushort4 and half4 */
258 const int texture_type = info.data_type;
259 if (texture_type == IMAGE_DATA_TYPE_FLOAT4 || texture_type == IMAGE_DATA_TYPE_BYTE4 ||
260 texture_type == IMAGE_DATA_TYPE_HALF4 || texture_type == IMAGE_DATA_TYPE_USHORT4)
261 {
262 if (info.interpolation == INTERPOLATION_CUBIC || info.interpolation == INTERPOLATION_SMART) {
264 }
265 else {
268 }
269 }
270 /* float, byte and half */
271 else {
272 float f;
273
274 if (info.interpolation == INTERPOLATION_CUBIC || info.interpolation == INTERPOLATION_SMART) {
276 }
277 else {
280 }
281
282 return make_float4(f, f, f, 1.0f);
283 }
284}
285
287 int id,
288 float3 P,
290{
291 ccl_global const TextureInfo &info = kernel_data_fetch(texture_info, id);
292
293 if (info.use_transform_3d) {
294 P = transform_point(&info.transform_3d, P);
295 }
296
297 const float x = P.x;
298 const float y = P.y;
299 const float z = P.z;
300
301 uint interpolation = (interp == INTERPOLATION_NONE) ? info.interpolation : interp;
302 const int texture_type = info.data_type;
303
304#ifdef WITH_NANOVDB
305 if (texture_type == IMAGE_DATA_TYPE_NANOVDB_FLOAT) {
306 float f = kernel_tex_image_interp_nanovdb<float, float>(info, x, y, z, interpolation);
307 return make_float4(f, f, f, 1.0f);
308 }
309 if (texture_type == IMAGE_DATA_TYPE_NANOVDB_FLOAT3) {
310 float3 f = kernel_tex_image_interp_nanovdb<float3, packed_float3>(
311 info, x, y, z, interpolation);
312 return make_float4(f.x, f.y, f.z, 1.0f);
313 }
314 if (texture_type == IMAGE_DATA_TYPE_NANOVDB_FPN) {
315 float f = kernel_tex_image_interp_nanovdb<float, nanovdb::FpN>(info, x, y, z, interpolation);
316 return make_float4(f, f, f, 1.0f);
317 }
318 if (texture_type == IMAGE_DATA_TYPE_NANOVDB_FP16) {
319 float f = kernel_tex_image_interp_nanovdb<float, nanovdb::Fp16>(info, x, y, z, interpolation);
320 return make_float4(f, f, f, 1.0f);
321 }
322#endif
323 if (texture_type == IMAGE_DATA_TYPE_FLOAT4 || texture_type == IMAGE_DATA_TYPE_BYTE4 ||
324 texture_type == IMAGE_DATA_TYPE_HALF4 || texture_type == IMAGE_DATA_TYPE_USHORT4)
325 {
326 if (interpolation == INTERPOLATION_CUBIC || interpolation == INTERPOLATION_SMART) {
328 }
329 else {
332 }
333 }
334 else {
335 float f;
336
337 if (interpolation == INTERPOLATION_CUBIC || interpolation == INTERPOLATION_SMART) {
339 }
340 else {
343 }
344
345 return make_float4(f, f, f, 1.0f);
346 }
347}
348
unsigned int uint
ATTR_WARN_UNUSED_RESULT const BMVert * v
SIMD_FORCE_INLINE const btScalar & z() const
Return the z value.
Definition btQuadWord.h:117
SIMD_FORCE_INLINE const btScalar & w() const
Return the w value.
Definition btQuadWord.h:119
static __attribute__((constructor)) void cpu_check()
Definition cpu_check.cc:94
#define SET_CUBIC_SPLINE_WEIGHTS(u, t)
#define ROW_TERM(row)
ccl_device float cubic_w2(float a)
ccl_device float cubic_h0(float a)
ccl_device float cubic_g0(float a)
ccl_device float cubic_w3(float a)
ccl_device float cubic_w0(float a)
CCL_NAMESPACE_BEGIN ccl_device_inline float frac(float x, ccl_private int *ix)
ccl_device float cubic_w1(float a)
ccl_device_noinline T kernel_tex_image_interp_bicubic(ccl_global const TextureInfo &info, float x, float y)
ccl_device float cubic_g1(float a)
ccl_device float cubic_h1(float a)
ccl_device float4 kernel_tex_image_interp_3d(KernelGlobals kg, int id, float3 P, InterpolationType interp)
ccl_device float4 kernel_tex_image_interp(KernelGlobals kg, int id, float x, float y)
ccl_device_noinline T kernel_tex_image_interp_tricubic(ccl_global const TextureInfo &info, float x, float y, float z)
const KernelGlobalsCPU *ccl_restrict KernelGlobals
#define kernel_data_fetch(name, index)
ccl_device_forceinline T ccl_gpu_tex_object_read_2D(const ccl_gpu_tex_object_2D texobj, const float x, const float y)
CUtexObject ccl_gpu_tex_object_3D
#define ccl_device
#define ccl_private
#define ccl_device_inline
ccl_device_forceinline T ccl_gpu_tex_object_read_3D(const ccl_gpu_tex_object_3D texobj, const float x, const float y, const float z)
#define ccl_global
#define ccl_device_noinline
#define CCL_NAMESPACE_END
CUtexObject ccl_gpu_tex_object_2D
ccl_device_forceinline float4 make_float4(const float x, const float y, const float z, const float w)
#define floorf(x)
draw_view in_light_buf[] float
#define mix(a, b, c)
Definition hash.h:36
ccl_device_inline float2 interp(const float2 a, const float2 b, float t)
signed int int32_t
Definition stdint.h:77
float z
Definition sky_float3.h:27
float y
Definition sky_float3.h:27
float x
Definition sky_float3.h:27
CCL_NAMESPACE_END CCL_NAMESPACE_BEGIN ccl_device_inline float3 transform_point(ccl_private const Transform *t, const float3 a)
Definition transform.h:63
ccl_device_inline int float_to_int(float f)
Definition util/math.h:424
@ IMAGE_DATA_TYPE_NANOVDB_FP16
@ IMAGE_DATA_TYPE_FLOAT4
@ IMAGE_DATA_TYPE_USHORT4
@ IMAGE_DATA_TYPE_NANOVDB_FLOAT
@ IMAGE_DATA_TYPE_NANOVDB_FLOAT3
@ IMAGE_DATA_TYPE_BYTE4
@ IMAGE_DATA_TYPE_HALF4
@ IMAGE_DATA_TYPE_NANOVDB_FPN
InterpolationType
@ INTERPOLATION_LINEAR
@ INTERPOLATION_SMART
@ INTERPOLATION_NONE
@ INTERPOLATION_CLOSEST
@ INTERPOLATION_CUBIC