Blender V5.0
vk_texture.cc
Go to the documentation of this file.
1/* SPDX-FileCopyrightText: 2022 Blender Authors
2 *
3 * SPDX-License-Identifier: GPL-2.0-or-later */
4
8
9#include "GPU_capabilities.hh"
10
11/* vk_common needs to be included first to ensure win32 vulkan API is fully initialized, before
12 * working with it. */
13#include "vk_common.hh"
14
15#include "vk_texture.hh"
16
17#include "vk_buffer.hh"
18#include "vk_context.hh"
19#include "vk_data_conversion.hh"
20#include "vk_framebuffer.hh"
21#include "vk_memory_layout.hh"
22#include "vk_pixel_buffer.hh"
23#include "vk_shader.hh"
25#include "vk_state_manager.hh"
26#include "vk_vertex_buffer.hh"
27
28#include "BLI_math_vector.hh"
29
30#include "BKE_global.hh"
31
32namespace blender::gpu {
33
34static VkImageAspectFlags to_vk_image_aspect_single_bit(const VkImageAspectFlags format,
35 bool stencil)
36{
37 switch (format) {
38 case VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT:
39 return (stencil) ? VK_IMAGE_ASPECT_STENCIL_BIT : VK_IMAGE_ASPECT_DEPTH_BIT;
40 default:
41 break;
42 }
43 return format;
44}
45
47{
48 if (vk_image_ != VK_NULL_HANDLE && allocation_ != VK_NULL_HANDLE) {
49 VKDiscardPool::discard_pool_get().discard_image(vk_image_, allocation_);
50 vk_image_ = VK_NULL_HANDLE;
51 allocation_ = VK_NULL_HANDLE;
52 }
53}
54
56{
57 BLI_assert(!is_texture_view());
58 if (mipmaps_ <= 1) {
59 return;
60 }
61 /* Allow users to provide mipmaps stored in compressed textures.
62 * Skip generating mipmaps to avoid overriding the existing ones. */
64 return;
65 }
66
67 VKContext &context = *VKContext::get();
69 update_mipmaps.vk_image = vk_image_handle();
70 update_mipmaps.l0_size = int3(1);
71 mip_size_get(0, update_mipmaps.l0_size);
72 if (ELEM(this->type_get(), GPU_TEXTURE_1D_ARRAY)) {
73 update_mipmaps.l0_size.y = 1;
74 update_mipmaps.l0_size.z = 1;
75 }
76 else if (ELEM(this->type_get(), GPU_TEXTURE_2D_ARRAY)) {
77 update_mipmaps.l0_size.z = 1;
78 }
79 update_mipmaps.vk_image_aspect = to_vk_image_aspect_flag_bits(device_format_);
80 update_mipmaps.mipmaps = mipmaps_;
81 update_mipmaps.layer_count = vk_layer_count(1);
82 context.render_graph().add_node(update_mipmaps);
83}
84
85void VKTexture::copy_to(VKTexture &dst_texture, VkImageAspectFlags vk_image_aspect)
86{
88 copy_image.node_data.src_image = vk_image_handle();
89 copy_image.node_data.dst_image = dst_texture.vk_image_handle();
90 copy_image.node_data.region.srcSubresource.aspectMask = vk_image_aspect;
91 copy_image.node_data.region.srcSubresource.mipLevel = 0;
92 copy_image.node_data.region.srcSubresource.layerCount = vk_layer_count(1);
93 copy_image.node_data.region.dstSubresource.aspectMask = vk_image_aspect;
94 copy_image.node_data.region.dstSubresource.mipLevel = 0;
95 copy_image.node_data.region.dstSubresource.layerCount = vk_layer_count(1);
96 copy_image.node_data.region.extent = vk_extent_3d(0);
98
99 VKContext &context = *VKContext::get();
100 context.render_graph().add_node(copy_image);
101}
102
104{
105 VKTexture *dst = unwrap(tex);
106 VKTexture *src = this;
107 BLI_assert(dst);
108 BLI_assert(src->w_ == dst->w_ && src->h_ == dst->h_ && src->d_ == dst->d_);
109 BLI_assert(src->device_format_ == dst->device_format_);
110 BLI_assert(!is_texture_view());
112
113 copy_to(*dst, to_vk_image_aspect_flag_bits(device_format_));
114}
115
117{
119 float clear_depth = 0.0f;
120 convert_host_to_device(&clear_depth,
121 data,
122 1,
123 format,
124 TextureFormat::SFLOAT_32_DEPTH_UINT_8,
125 TextureFormat::SFLOAT_32_DEPTH_UINT_8);
126 clear_depth_stencil(GPU_DEPTH_BIT | GPU_STENCIL_BIT, clear_depth, 0u, std::nullopt);
127 return;
128 }
129
132 clear_color_image.vk_image = vk_image_handle();
133 clear_color_image.vk_image_subresource_range.aspectMask = to_vk_image_aspect_flag_bits(
134 device_format_);
135
136 IndexRange layers = layer_range();
137 clear_color_image.vk_image_subresource_range.baseArrayLayer = layers.start();
138 clear_color_image.vk_image_subresource_range.layerCount = layers.size();
139 IndexRange levels = mip_map_range();
140 clear_color_image.vk_image_subresource_range.baseMipLevel = levels.start();
141 clear_color_image.vk_image_subresource_range.levelCount = levels.size();
142
143 VKContext &context = *VKContext::get();
144
145 context.render_graph().add_node(clear_color_image);
146}
147
149 float clear_depth,
150 uint clear_stencil,
151 std::optional<int> layer)
152{
154 VkImageAspectFlags vk_image_aspect_device = to_vk_image_aspect_flag_bits(device_format_get());
155 VkImageAspectFlags vk_image_aspect = to_vk_image_aspect_flag_bits(
157 vk_image_aspect_device;
158 if (vk_image_aspect == VK_IMAGE_ASPECT_NONE) {
159 /* Early exit: texture doesn't have any aspect that needs to be cleared. */
160 return;
161 }
162
163 render_graph::VKClearDepthStencilImageNode::CreateInfo clear_depth_stencil_image = {};
164 clear_depth_stencil_image.node_data.vk_image = vk_image_handle();
165 clear_depth_stencil_image.vk_image_aspects = vk_image_aspect_device;
166 clear_depth_stencil_image.node_data.vk_clear_depth_stencil_value.depth = clear_depth;
167 clear_depth_stencil_image.node_data.vk_clear_depth_stencil_value.stencil = clear_stencil;
168 clear_depth_stencil_image.node_data.vk_image_subresource_range.aspectMask = vk_image_aspect;
169 clear_depth_stencil_image.node_data.vk_image_subresource_range.layerCount =
170 VK_REMAINING_ARRAY_LAYERS;
171 if (layer.has_value()) {
172 clear_depth_stencil_image.node_data.vk_image_subresource_range.baseArrayLayer = *layer;
173 clear_depth_stencil_image.node_data.vk_image_subresource_range.layerCount = 1;
174 }
175 clear_depth_stencil_image.node_data.vk_image_subresource_range.levelCount =
176 VK_REMAINING_MIP_LEVELS;
177
178 VKContext &context = *VKContext::get();
179 context.render_graph().add_node(clear_depth_stencil_image);
180}
181
182void VKTexture::swizzle_set(const char swizzle_mask[4])
183{
184 memcpy(swizzle_, swizzle_mask, 4);
185}
186
188{
189 mip_min_ = min;
190 mip_max_ = max;
191}
192
194 int mip, eGPUDataFormat format, const int region[6], const IndexRange layers, void *r_data)
195{
196 const int3 offset = int3(region[0], region[1], region[2]);
197 const int3 extent = int3(region[3] - region[0], region[4] - region[1], region[5] - region[2]);
198 TransferRegion full_transfer_region({offset, extent, layers});
199 const VkDeviceSize sample_bytesize = to_bytesize(device_format_);
200 const uint64_t x_bytesize = sample_bytesize * extent.x;
201 const uint64_t xy_bytesize = x_bytesize * extent.y;
202 const uint64_t xyz_bytesize = xy_bytesize * extent.z;
203 const uint64_t xyzl_bytesize = xyz_bytesize * layers.size();
204 /* #144887: Using a max transfer size of 2GB. NVIDIA doesn't seem to allocate transfer buffers
205 * larger than 4GB.*/
206 constexpr uint64_t max_transferbuffer_bytesize = 2ul * 1024ul * 1024ul * 1024ul;
207 BLI_assert_msg(x_bytesize < max_transferbuffer_bytesize,
208 "Transfer buffer should at least fit all pixels of a single row.");
209
210 /* Build a list of transfer regions to transfer the data back to the CPU, where the data can
211 * still be read as a continuous stream of data. This will reduce complexity during conversion.
212 */
213 Vector<TransferRegion> transfer_regions;
214 if (xyzl_bytesize <= max_transferbuffer_bytesize) {
215 /* All data fits in a single transfer buffer. */
216 transfer_regions.append(full_transfer_region);
217 }
218 else {
219 /* Always split by layer. */
220 for (int layer : layers) {
221 if (xyz_bytesize <= max_transferbuffer_bytesize) {
222 /* xyz data fits in a single transfer buffer. */
223 transfer_regions.append({offset, extent, IndexRange(layer, 1)});
224 }
225 else {
226 if (xy_bytesize <= max_transferbuffer_bytesize) {
227 /* Split by depth, transfer multiple depths at a time */
228 int64_t xy_in_single_transfer = max_transferbuffer_bytesize / xy_bytesize;
229 int depths_added = 0;
230 while (depths_added < extent.z) {
231 int3 offset_region(offset.x, offset.y, offset.z + depths_added);
232 int3 extent_region(
233 extent.x, extent.y, min_ii(xy_in_single_transfer, extent.z - depths_added));
234 transfer_regions.append({offset_region, extent_region, IndexRange(layer, 1)});
235 depths_added += extent_region.z;
236 }
237 }
238 else {
239 /* Split by depth and rows, transfer multiple rows at a time. */
240 int64_t x_in_single_transfer = max_transferbuffer_bytesize / x_bytesize;
241 for (int z = 0; z < extent.z; z++) {
242 int rows_added = 0;
243 while (rows_added < extent.y) {
244 int3 offset_region(offset.x, offset.y + rows_added, offset.z + z);
245 int3 extent_region(extent.x, min_ii(x_in_single_transfer, extent.y - rows_added), 1);
246 transfer_regions.append({offset_region, extent_region, IndexRange(layer, 1)});
247 rows_added += extent_region.y;
248 }
249 }
250 }
251 }
252 }
253 }
254
255 /* Create and schedule transfer regions. */
256 Array<VKBuffer> staging_buffers(transfer_regions.size());
257 VKContext &context = *VKContext::get();
258 context.rendering_end();
259 for (int index : transfer_regions.index_range()) {
260 const TransferRegion &transfer_region = transfer_regions[index];
261 VKBuffer &staging_buffer = staging_buffers[index];
262 size_t sample_len = transfer_region.sample_count();
263 size_t device_memory_size = sample_len * to_bytesize(device_format_);
264 staging_buffer.create(device_memory_size,
265 VK_BUFFER_USAGE_TRANSFER_DST_BIT,
266 VMA_MEMORY_USAGE_AUTO_PREFER_HOST,
267 /* Although we are only reading, we need to set the host access random
268 * bit to improve the performance on AMD GPUs. */
269 VMA_ALLOCATION_CREATE_HOST_ACCESS_RANDOM_BIT |
270 VMA_ALLOCATION_CREATE_MAPPED_BIT,
271 0.2f);
272
274 render_graph::VKCopyImageToBufferNode::Data &node_data = copy_image_to_buffer.node_data;
275 node_data.src_image = vk_image_handle();
276 node_data.dst_buffer = staging_buffer.vk_handle();
277 node_data.region.imageOffset.x = transfer_region.offset.x;
278 node_data.region.imageOffset.y = transfer_region.offset.y;
279 node_data.region.imageOffset.z = transfer_region.offset.z;
280 node_data.region.imageExtent.width = transfer_region.extent.x;
281 node_data.region.imageExtent.height = transfer_region.extent.y;
282 node_data.region.imageExtent.depth = transfer_region.extent.z;
283 VkImageAspectFlags vk_image_aspects = to_vk_image_aspect_flag_bits(device_format_);
284 copy_image_to_buffer.vk_image_aspects = vk_image_aspects;
285 node_data.region.imageSubresource.aspectMask = to_vk_image_aspect_single_bit(vk_image_aspects,
286 false);
287 node_data.region.imageSubresource.mipLevel = mip;
288 node_data.region.imageSubresource.baseArrayLayer = transfer_region.layers.start();
289 node_data.region.imageSubresource.layerCount = transfer_region.layers.size();
290
291 context.render_graph().add_node(copy_image_to_buffer);
292 }
293
294 /* Submit and wait for the transfers to be completed. */
295 context.flush_render_graph(RenderGraphFlushFlags::SUBMIT |
298
299 /* Convert the data to r_data. */
300 for (int index : transfer_regions.index_range()) {
301 const TransferRegion &transfer_region = transfer_regions[index];
302 const VKBuffer &staging_buffer = staging_buffers[index];
303 size_t sample_len = transfer_region.sample_count();
304
305 size_t data_offset = full_transfer_region.result_offset(transfer_region.offset,
306 transfer_region.layers.start()) *
307 sample_bytesize;
308 convert_device_to_host(static_cast<void *>(static_cast<uint8_t *>(r_data) + data_offset),
309 staging_buffer.mapped_memory_get(),
310 sample_len,
311 format,
312 format_,
313 device_format_);
314 }
315}
316
318{
320
321 int mip_size[3] = {1, 1, 1};
322 VkImageType vk_image_type = to_vk_image_type(type_);
323 mip_size_get(mip, mip_size);
324 switch (vk_image_type) {
325 case VK_IMAGE_TYPE_1D: {
326 mip_size[1] = 1;
327 mip_size[2] = 1;
328 } break;
329 case VK_IMAGE_TYPE_2D: {
330 mip_size[2] = 1;
331 } break;
332 case VK_IMAGE_TYPE_3D:
333 default:
334 break;
335 }
336
337 if (mip_size[2] == 0) {
338 mip_size[2] = 1;
339 }
340 IndexRange layers = IndexRange(layer_offset_, vk_layer_count(1));
341 size_t sample_len = mip_size[0] * mip_size[1] * mip_size[2] * layers.size();
342 size_t host_memory_size = sample_len * to_bytesize(format_, format);
343
344 void *data = MEM_mallocN(host_memory_size, __func__);
345 int region[6] = {0, 0, 0, mip_size[0], mip_size[1], mip_size[2]};
346 read_sub(mip, format, region, layers, data);
347 return data;
348}
349
351 int offset_[3],
352 int extent_[3],
354 const void *data,
355 VKPixelBuffer *pixel_buffer)
356{
357 BLI_assert(!is_texture_view());
358
359 const bool is_compressed = (format_flag_ & GPU_FORMAT_COMPRESSED);
360
361 int3 extent = int3(extent_[0], max_ii(extent_[1], 1), max_ii(extent_[2], 1));
362 int3 offset = int3(offset_[0], offset_[1], offset_[2]);
363 int layers = 1;
364 int start_layer = 0;
365 if (type_ & GPU_TEXTURE_1D) {
366 layers = extent.y;
367 start_layer = offset.y;
368 extent.y = 1;
369 extent.z = 1;
370 offset.y = 0;
371 offset.z = 0;
372 }
374 layers = extent.z;
375 start_layer = offset.z;
376 extent.z = 1;
377 offset.z = 0;
378 }
379 BLI_assert(offset.x + extent.x <= width_get());
380 BLI_assert(offset.y + extent.y <= max_ii(height_get(), 1));
381 BLI_assert(offset.z + extent.z <= max_ii(depth_get(), 1));
382
383 /* Vulkan images cannot be directly mapped to host memory and requires a staging buffer. */
384 VKContext &context = *VKContext::get();
385 size_t sample_len = size_t(extent.x) * extent.y * extent.z * layers;
386 size_t device_memory_size = sample_len * to_bytesize(device_format_);
387
388 if (is_compressed) {
389 BLI_assert_msg(extent.z == 1, "Compressed 3D textures are not supported");
390 size_t block_size = to_block_size(device_format_);
391 size_t blocks_x = divide_ceil_u(extent.x, 4);
392 size_t blocks_y = divide_ceil_u(extent.y, 4);
393 device_memory_size = blocks_x * blocks_y * block_size;
394 /* `convert_buffer` later on will use `sample_len * to_bytesize(device_format_)`
395 * as total memory size calculation. Make that work for compressed case. */
396 sample_len = device_memory_size / to_bytesize(device_format_);
397 }
398
399 VKBuffer staging_buffer;
400 VkBuffer vk_buffer = VK_NULL_HANDLE;
401 if (data) {
402 staging_buffer.create(device_memory_size,
403 VK_BUFFER_USAGE_TRANSFER_SRC_BIT,
404 VMA_MEMORY_USAGE_AUTO_PREFER_HOST,
405 VMA_ALLOCATION_CREATE_MAPPED_BIT |
406 VMA_ALLOCATION_CREATE_HOST_ACCESS_RANDOM_BIT,
407 0.4f);
408 vk_buffer = staging_buffer.vk_handle();
409 /* Rows are sequentially stored, when unpack row length is 0, or equal to the extent width. In
410 * other cases we unpack the rows to reduce the size of the staging buffer and data transfer.
411 */
412 const uint texture_unpack_row_length =
413 context.state_manager_get().texture_unpack_row_length_get();
414 if (ELEM(texture_unpack_row_length, 0, extent.x)) {
416 staging_buffer.mapped_memory_get(), data, sample_len, format, format_, device_format_);
417 }
418 else {
419 BLI_assert_msg(!is_compressed,
420 "Compressed data with texture_unpack_row_length != 0 is not supported.");
421 BLI_assert_msg(extent[2] <= 1,
422 "3D texture data with texture_unpack_row_length != 0 is not supported.");
423 size_t dst_row_stride = extent.x * to_bytesize(device_format_);
424 size_t src_row_stride = texture_unpack_row_length * to_bytesize(format_, format);
425 uint8_t *dst_ptr = static_cast<uint8_t *>(staging_buffer.mapped_memory_get());
426 const uint8_t *src_ptr = static_cast<const uint8_t *>(data);
427 for (int y = 0; y < extent.y; y++) {
428 convert_host_to_device(dst_ptr, src_ptr, extent.x, format, format_, device_format_);
429 src_ptr += src_row_stride;
430 dst_ptr += dst_row_stride;
431 }
432 }
433 }
434 else {
435 BLI_assert(pixel_buffer);
436 vk_buffer = pixel_buffer->buffer_get().vk_handle();
437 }
438
440 render_graph::VKCopyBufferToImageNode::Data &node_data = copy_buffer_to_image.node_data;
441 node_data.src_buffer = vk_buffer;
442 node_data.dst_image = vk_image_handle();
443 node_data.region.imageExtent.width = extent.x;
444 node_data.region.imageExtent.height = extent.y;
445 node_data.region.imageExtent.depth = extent.z;
446 node_data.region.imageOffset.x = offset.x;
447 node_data.region.imageOffset.y = offset.y;
448 node_data.region.imageOffset.z = offset.z;
449 VkImageAspectFlags vk_image_aspects = to_vk_image_aspect_flag_bits(device_format_);
450 copy_buffer_to_image.vk_image_aspects = vk_image_aspects;
451 node_data.region.imageSubresource.aspectMask = to_vk_image_aspect_single_bit(vk_image_aspects,
452 false);
453 node_data.region.imageSubresource.mipLevel = mip;
454 node_data.region.imageSubresource.baseArrayLayer = start_layer;
455 node_data.region.imageSubresource.layerCount = layers;
456
457 context.render_graph().add_node(copy_buffer_to_image);
458}
459
461 int mip, int offset[3], int extent[3], eGPUDataFormat format, const void *data)
462{
463 update_sub(mip, offset, extent, format, data, nullptr);
464}
465
466void VKTexture::update_sub(int offset[3],
467 int extent[3],
469 GPUPixelBuffer *pixbuf)
470{
471 VKPixelBuffer &pixel_buffer = *unwrap(unwrap(pixbuf));
472 update_sub(0, offset, extent, format, nullptr, &pixel_buffer);
473}
474
475VKMemoryExport VKTexture::export_memory(VkExternalMemoryHandleTypeFlagBits handle_type)
476{
477 const VKDevice &device = VKBackend::get().device;
480 "Can only import external memory when usage flag contains GPU_TEXTURE_USAGE_MEMORY_EXPORT.");
481 BLI_assert_msg(allocation_ != nullptr,
482 "Cannot export memory when the texture is not backed by any device memory.");
484 "Requested to export memory, but isn't supported by the device");
485 if (handle_type == VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT) {
486 VkMemoryGetFdInfoKHR vk_memory_get_fd_info = {VK_STRUCTURE_TYPE_MEMORY_GET_FD_INFO_KHR,
487 nullptr,
488 allocation_info_.deviceMemory,
489 handle_type};
490 int fd_handle = 0;
491 device.functions.vkGetMemoryFd(device.vk_handle(), &vk_memory_get_fd_info, &fd_handle);
492 return {uint64_t(fd_handle), allocation_info_.size, allocation_info_.offset};
493 }
494
495#ifdef _WIN32
496 if (handle_type == VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_BIT) {
497 VkMemoryGetWin32HandleInfoKHR vk_memory_get_win32_handle_info = {
498 VK_STRUCTURE_TYPE_MEMORY_GET_WIN32_HANDLE_INFO_KHR,
499 nullptr,
500 allocation_info_.deviceMemory,
501 handle_type};
502 HANDLE win32_handle = nullptr;
503 device.functions.vkGetMemoryWin32Handle(
504 device.vk_handle(), &vk_memory_get_win32_handle_info, &win32_handle);
505 return {uint64_t(win32_handle), allocation_info_.size, allocation_info_.offset};
506 }
507#endif
509 return {};
510}
511
513{
514 device_format_ = format_;
515 /* R16G16F16 formats are typically not supported (<1%). */
516 if (device_format_ == TextureFormat::SFLOAT_16_16_16) {
517 device_format_ = TextureFormat::SFLOAT_16_16_16_16;
518 }
519 if (device_format_ == TextureFormat::SFLOAT_32_32_32) {
520 device_format_ = TextureFormat::SFLOAT_32_32_32_32;
521 }
522
523 if (!allocate()) {
524 return false;
525 }
526 this->mip_range_set(0, mipmaps_ - 1);
527
528 return true;
529}
530
532{
533 BLI_assert(source_buffer_ == nullptr);
534 device_format_ = format_;
535 source_buffer_ = unwrap(vbo);
536 return true;
537}
538
540 int mip_offset,
541 int layer_offset,
542 bool use_stencil)
543{
544 BLI_assert(source_texture_ == nullptr);
545 BLI_assert(src);
546
548 source_texture_ = texture;
549 device_format_ = texture->device_format_;
550 mip_min_ = mip_offset;
551 mip_max_ = mip_offset;
552 layer_offset_ = layer_offset;
553 use_stencil_ = use_stencil;
554
555 return true;
556}
557
559{
560 device_format_ = format_ = format;
562 vk_image_ = vk_image;
565}
566
567bool VKTexture::is_texture_view() const
568{
569 return source_texture_ != nullptr;
570}
571
572static VkImageUsageFlags to_vk_image_usage(const eGPUTextureUsage usage,
573 const GPUTextureFormatFlag format_flag)
574{
575 const VKDevice &device = VKBackend::get().device;
576 const bool supports_local_read = device.extensions_get().dynamic_rendering_local_read;
577
578 VkImageUsageFlags result = VK_IMAGE_USAGE_TRANSFER_SRC_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT |
579 VK_IMAGE_USAGE_SAMPLED_BIT;
580 if (usage & GPU_TEXTURE_USAGE_SHADER_READ) {
581 result |= VK_IMAGE_USAGE_STORAGE_BIT;
582 }
583 if (usage & GPU_TEXTURE_USAGE_SHADER_WRITE) {
584 result |= VK_IMAGE_USAGE_STORAGE_BIT;
585 }
586 if (usage & GPU_TEXTURE_USAGE_ATTACHMENT) {
587 if (format_flag & GPU_FORMAT_COMPRESSED) {
588 /* These formats aren't supported as an attachment. When using GPU_TEXTURE_USAGE_DEFAULT they
589 * are still being evaluated to be attachable. So we need to skip them. */
590 }
591 else {
592 if (format_flag & (GPU_FORMAT_DEPTH | GPU_FORMAT_STENCIL)) {
593 result |= VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT;
594 }
595 else {
596 result |= VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT;
597 if (supports_local_read) {
598 result |= VK_IMAGE_USAGE_INPUT_ATTACHMENT_BIT;
599 }
600 }
601 }
602 }
603 if (usage & GPU_TEXTURE_USAGE_HOST_READ) {
604 result |= VK_IMAGE_USAGE_TRANSFER_SRC_BIT;
605 }
606
607 /* Disable some usages based on the given format flag to support more devices. */
608 if (format_flag & GPU_FORMAT_SRGB) {
609 /* NVIDIA devices don't create SRGB textures when it storage bit is set. */
610 result &= ~VK_IMAGE_USAGE_STORAGE_BIT;
611 }
612 if (format_flag & (GPU_FORMAT_DEPTH | GPU_FORMAT_STENCIL)) {
613 /* NVIDIA devices don't create depth textures when it storage bit is set. */
614 result &= ~VK_IMAGE_USAGE_STORAGE_BIT;
615 }
616
617 return result;
618}
619
620static VkImageCreateFlags to_vk_image_create(const GPUTextureType texture_type,
621 const GPUTextureFormatFlag format_flag,
622 const eGPUTextureUsage usage)
623{
624 VkImageCreateFlags result = 0;
625
626 if (ELEM(texture_type, GPU_TEXTURE_CUBE, GPU_TEXTURE_CUBE_ARRAY)) {
627 result |= VK_IMAGE_CREATE_CUBE_COMPATIBLE_BIT;
628 }
629
630 /* sRGB textures needs to be mutable as they can be used as non-sRGB frame-buffer attachments. */
631 if (usage & GPU_TEXTURE_USAGE_ATTACHMENT && format_flag & GPU_FORMAT_SRGB) {
632 result |= VK_IMAGE_CREATE_MUTABLE_FORMAT_BIT;
633 }
634
635 return result;
636}
637
638static float memory_priority(const eGPUTextureUsage texture_usage)
639{
640 if (bool(texture_usage & GPU_TEXTURE_USAGE_MEMORY_EXPORT)) {
641 return 0.8f;
642 }
643 if (bool(texture_usage & GPU_TEXTURE_USAGE_ATTACHMENT)) {
644 return 1.0f;
645 }
646 return 0.5f;
647}
648
649bool VKTexture::allocate()
650{
651 BLI_assert(vk_image_ == VK_NULL_HANDLE);
652 BLI_assert(!is_texture_view());
653
654 VkExtent3D vk_extent = vk_extent_3d(0);
655 const uint32_t limit = (type_ == GPU_TEXTURE_3D) ? GPU_max_texture_3d_size() :
657 if (vk_extent.depth > limit || vk_extent.height > limit || vk_extent.depth > limit) {
658 return false;
659 }
660
661 const eGPUTextureUsage texture_usage = usage_get();
662
663 VKDevice &device = VKBackend::get().device;
664 VkImageCreateInfo image_info = {};
665 image_info.sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO;
666 image_info.flags = to_vk_image_create(type_, format_flag_, texture_usage);
667 image_info.imageType = to_vk_image_type(type_);
668 image_info.extent = vk_extent;
669 image_info.mipLevels = max_ii(mipmaps_, 1);
670 image_info.arrayLayers = vk_layer_count(1);
671 image_info.format = to_vk_format(device_format_);
672 /* Some platforms (NVIDIA) requires that attached textures are always tiled optimal.
673 *
674 * As image data are always accessed via an staging buffer we can enable optimal tiling for all
675 * texture. Tilings based on actual usages should be done in `VKFramebuffer`.
676 */
677 image_info.tiling = VK_IMAGE_TILING_OPTIMAL;
678 image_info.initialLayout = VK_IMAGE_LAYOUT_UNDEFINED;
680 image_info.samples = VK_SAMPLE_COUNT_1_BIT;
681
682 VkResult result;
683 if (G.debug & G_DEBUG_GPU) {
684 VkImageFormatProperties image_format = {};
685 result = vkGetPhysicalDeviceImageFormatProperties(device.physical_device_get(),
686 image_info.format,
687 image_info.imageType,
688 image_info.tiling,
689 image_info.usage,
690 image_info.flags,
691 &image_format);
692 if (result != VK_SUCCESS) {
693 printf("Image type not supported on device.\n");
694 return false;
695 }
696 }
697
698 VkExternalMemoryImageCreateInfo external_memory_create_info = {
699 VK_STRUCTURE_TYPE_EXTERNAL_MEMORY_IMAGE_CREATE_INFO, nullptr, 0};
700
701 VmaAllocationCreateInfo allocCreateInfo = {};
702 allocCreateInfo.usage = VMA_MEMORY_USAGE_AUTO_PREFER_DEVICE;
703 allocCreateInfo.priority = memory_priority(texture_usage);
704
705 if (bool(texture_usage & GPU_TEXTURE_USAGE_MEMORY_EXPORT)) {
706 image_info.pNext = &external_memory_create_info;
707 external_memory_create_info.handleTypes = vk_external_memory_handle_type();
708 allocCreateInfo.pool = device.vma_pools.external_memory_image.pool;
709 }
710 result = vmaCreateImage(device.mem_allocator_get(),
711 &image_info,
712 &allocCreateInfo,
713 &vk_image_,
714 &allocation_,
715 &allocation_info_);
716 if (result != VK_SUCCESS) {
717 return false;
718 }
719 debug::object_label(vk_image_, name_);
720
721 const bool use_subresource_tracking = image_info.arrayLayers > 1 || image_info.mipLevels > 1;
722 device.resources.add_image(vk_image_, use_subresource_tracking, name_);
723
724 return result == VK_SUCCESS;
725}
726
727/* -------------------------------------------------------------------- */
730
731IndexRange VKTexture::mip_map_range() const
732{
733 return IndexRange(mip_min_, mip_max_ - mip_min_ + 1);
734}
735
736IndexRange VKTexture::layer_range() const
737{
738 if (is_texture_view()) {
739 return IndexRange(layer_offset_, layer_count());
740 }
741 else {
742 return IndexRange(
743 0, ELEM(type_, GPU_TEXTURE_CUBE, GPU_TEXTURE_CUBE_ARRAY) ? d_ : VK_REMAINING_ARRAY_LAYERS);
744 }
745}
746
747int VKTexture::vk_layer_count(int non_layered_value) const
748{
749 if (is_texture_view()) {
750 return layer_count();
751 }
752 return type_ == GPU_TEXTURE_CUBE ? d_ :
754 non_layered_value;
755}
756
757VkExtent3D VKTexture::vk_extent_3d(int mip_level) const
758{
759 int extent[3] = {1, 1, 1};
760 mip_size_get(mip_level, extent);
762 extent[2] = 1;
763 }
765 extent[1] = 1;
766 extent[2] = 1;
767 }
768
769 VkExtent3D result{uint32_t(extent[0]), uint32_t(extent[1]), uint32_t(extent[2])};
770 return result;
771}
772
774{
775 if (is_texture_view()) {
776 /* TODO: API should be improved as we don't support image view specialization.
777 * In the current API this is still possible to setup when using attachments. */
779 }
780 for (const VKImageView &image_view : image_views_) {
781 if (image_view.info == info) {
782 return image_view;
783 }
784 }
785
786 image_views_.append(VKImageView(*this, info, name_));
787 return image_views_.last();
788}
789
791{
792 image_view_info_.mip_range = mip_map_range();
793 image_view_info_.use_srgb = true;
794 image_view_info_.use_stencil = use_stencil_;
795 image_view_info_.arrayed = arrayed;
796 image_view_info_.layer_range = layer_range();
797
798 if (arrayed == VKImageViewArrayed::NOT_ARRAYED) {
799 image_view_info_.layer_range = image_view_info_.layer_range.slice(
801 }
802
803 if (bool(flags & VKImageViewFlags::NO_SWIZZLING)) {
804 image_view_info_.swizzle[0] = 'r';
805 image_view_info_.swizzle[1] = 'g';
806 image_view_info_.swizzle[2] = 'b';
807 image_view_info_.swizzle[3] = 'a';
808 }
809 else {
810 image_view_info_.swizzle[0] = swizzle_[0];
811 image_view_info_.swizzle[1] = swizzle_[1];
812 image_view_info_.swizzle[2] = swizzle_[2];
813 image_view_info_.swizzle[3] = swizzle_[3];
814 }
815
816 if (is_texture_view()) {
817 return source_texture_->image_view_get(image_view_info_);
818 }
819 return image_view_get(image_view_info_);
820}
821
823
824} // namespace blender::gpu
@ G_DEBUG_GPU
#define BLI_assert_unreachable()
Definition BLI_assert.h:93
#define BLI_assert(a)
Definition BLI_assert.h:46
#define BLI_assert_msg(a, msg)
Definition BLI_assert.h:53
MINLINE int min_ii(int a, int b)
MINLINE uint divide_ceil_u(uint a, uint b)
MINLINE int max_ii(int a, int b)
unsigned int uint
#define UNUSED_VARS_NDEBUG(...)
#define ELEM(...)
int GPU_max_texture_3d_size()
int GPU_max_texture_size()
GPUFrameBufferBits
@ GPU_DEPTH_BIT
@ GPU_STENCIL_BIT
eGPUDataFormat
@ GPU_DATA_UINT_24_8_DEPRECATED
eGPUTextureUsage
@ GPU_TEXTURE_USAGE_MEMORY_EXPORT
@ GPU_TEXTURE_USAGE_SHADER_READ
@ GPU_TEXTURE_USAGE_SHADER_WRITE
@ GPU_TEXTURE_USAGE_HOST_READ
@ GPU_TEXTURE_USAGE_ATTACHMENT
BMesh const char void * data
long long int int64_t
unsigned long long int uint64_t
SIMD_FORCE_INLINE const btScalar & z() const
Return the z value.
Definition btQuadWord.h:117
constexpr int64_t size() const
constexpr int64_t start() const
int64_t size() const
void append(const T &value)
IndexRange index_range() const
GPUTextureType type_get() const
eGPUTextureUsage gpu_image_usage_flags_
eGPUTextureUsage usage_get() const
char name_[DEBUG_NAME_LEN]
void mip_size_get(int mip, int r_size[3]) const
void usage_set(eGPUTextureUsage usage_flags)
Texture(const char *name)
GPUTextureFormatFlag format_flag_
static VKBackend & get()
Definition vk_backend.hh:91
VkBuffer vk_handle() const
Definition vk_buffer.hh:101
bool create(size_t size, VkBufferUsageFlags buffer_usage, VmaMemoryUsage vma_memory_usage, VmaAllocationCreateFlags vma_allocation_flags, float priority, bool export_memory=false)
Definition vk_buffer.cc:27
void * mapped_memory_get() const
Definition vk_buffer.hh:134
static VKContext * get()
PFN_vkGetMemoryFdKHR vkGetMemoryFd
Definition vk_device.hh:241
VkDevice vk_handle() const
Definition vk_device.hh:311
const VKExtensions & extensions_get() const
Definition vk_device.hh:371
struct blender::gpu::VKDevice::@152120360333013146246346216002113345357100126073 functions
static VKDiscardPool & discard_pool_get()
void discard_image(VkImage vk_image, VmaAllocation vma_allocation)
VKMemoryExport export_memory(VkExternalMemoryHandleTypeFlagBits handle_type)
void clear_depth_stencil(const GPUFrameBufferBits buffer, float clear_depth, uint clear_stencil, std::optional< int > layer)
void copy_to(Texture *tex) override
void clear(eGPUDataFormat format, const void *data) override
virtual ~VKTexture() override
Definition vk_texture.cc:46
void generate_mipmap() override
Definition vk_texture.cc:55
bool init_internal() override
void init_swapchain(VkImage vk_image, TextureFormat gpu_format)
void update_sub(int mip, int offset[3], int extent[3], eGPUDataFormat format, const void *data, VKPixelBuffer *pixel_buffer)
TextureFormat device_format_get() const
VkImage vk_image_handle() const
const VKImageView & image_view_get(const VKImageViewInfo &info)
void swizzle_set(const char swizzle_mask[4]) override
VKTexture(const char *name)
Definition vk_texture.hh:77
void mip_range_set(int min, int max) override
void read_sub(int mip, eGPUDataFormat format, const int region[6], IndexRange layers, void *r_data)
void * read(int mip, eGPUDataFormat format) override
#define printf(...)
TEX_TEMPLATE DataVec texture(T, FltCoord, float=0.0f) RET
format
void * MEM_mallocN(size_t len, const char *str)
Definition mallocn.cc:128
#define G(x, y, z)
void object_label(GLenum type, GLuint object, const char *name)
Definition gl_debug.cc:329
constexpr VkExternalMemoryHandleTypeFlags vk_external_memory_handle_type()
static Context * unwrap(GPUContext *ctx)
VkImageType to_vk_image_type(const GPUTextureType type)
Definition vk_common.cc:475
VkFormat to_vk_format(const TextureFormat format)
Definition vk_common.cc:136
static VkImageAspectFlags to_vk_image_aspect_single_bit(const VkImageAspectFlags format, bool stencil)
Definition vk_texture.cc:34
size_t to_block_size(TextureFormat data_type)
int to_bytesize(const DataFormat format)
VkClearColorValue to_vk_clear_color_value(const eGPUDataFormat format, const void *data)
Definition vk_common.cc:598
void convert_host_to_device(void *dst_buffer, const void *src_buffer, size_t buffer_size, eGPUDataFormat host_format, TextureFormat host_texture_format, TextureFormat device_format)
static VkImageCreateFlags to_vk_image_create(const GPUTextureType texture_type, const GPUTextureFormatFlag format_flag, const eGPUTextureUsage usage)
VkImageAspectFlags to_vk_image_aspect_flag_bits(const TextureFormat format)
Definition vk_common.cc:14
static float memory_priority(const eGPUTextureUsage texture_usage)
void convert_device_to_host(void *dst_buffer, const void *src_buffer, size_t buffer_size, eGPUDataFormat host_format, TextureFormat host_texture_format, TextureFormat device_format)
GPUTextureFormatFlag to_format_flag(TextureFormat format)
static VkImageUsageFlags to_vk_image_usage(const eGPUTextureUsage usage, const GPUTextureFormatFlag format_flag)
VecBase< int32_t, 3 > int3
#define min(a, b)
Definition sort.cc:36
max
Definition text_draw.cc:251
char * buffers[2]