Blender V5.0
mtl_texture.mm
Go to the documentation of this file.
1/* SPDX-FileCopyrightText: 2022-2023 Blender Authors
2 *
3 * SPDX-License-Identifier: GPL-2.0-or-later */
4
8
9#include "BKE_global.hh"
10#include "BLI_math_half.hh"
11
12#include "DNA_userdef_types.h"
13
14#include "GPU_batch.hh"
15#include "GPU_batch_presets.hh"
16#include "GPU_capabilities.hh"
17#include "GPU_framebuffer.hh"
18#include "GPU_immediate.hh"
19#include "GPU_platform.hh"
20#include "GPU_state.hh"
21
22#include "mtl_backend.hh"
23#include "mtl_common.hh"
24#include "mtl_context.hh"
25#include "mtl_debug.hh"
26#include "mtl_storage_buffer.hh"
27#include "mtl_texture.hh"
28#include "mtl_vertex_buffer.hh"
29
30#include "GHOST_C-api.h"
31
32namespace blender::gpu {
33
34/* -------------------------------------------------------------------- */
37
38void gpu::MTLTexture::mtl_texture_init()
39{
40 BLI_assert(MTLContext::get() != nullptr);
41
42 /* Status. */
43 is_baked_ = false;
44 is_dirty_ = false;
45 resource_mode_ = MTL_TEXTURE_MODE_DEFAULT;
46 mtl_max_mips_ = 1;
47
48 /* Metal properties. */
49 texture_ = nil;
50 mip_swizzle_view_ = nil;
51
52 /* Binding information. */
53 is_bound_ = false;
54
55 /* VBO. */
56 vert_buffer_ = nullptr;
57 vert_buffer_mtl_ = nil;
58
59 /* Default Swizzle. */
60 tex_swizzle_mask_[0] = 'r';
61 tex_swizzle_mask_[1] = 'g';
62 tex_swizzle_mask_[2] = 'b';
63 tex_swizzle_mask_[3] = 'a';
64 mtl_swizzle_mask_ = MTLTextureSwizzleChannelsMake(
65 MTLTextureSwizzleRed, MTLTextureSwizzleGreen, MTLTextureSwizzleBlue, MTLTextureSwizzleAlpha);
66}
67
69{
70 /* Common Initialization. */
71 mtl_texture_init();
72}
73
76 GPUTextureType type,
77 id<MTLTexture> metal_texture)
78 : Texture(name)
79{
80 /* Common Initialization. */
81 mtl_texture_init();
82
83 /* Prep texture from METAL handle. */
84 BLI_assert(metal_texture != nil);
86 type_ = type;
87 init_2D((int)metal_texture.width, (int)metal_texture.height, 0, 1, format);
88
89 /* Assign MTLTexture. */
90 texture_ = metal_texture;
91 [texture_ retain];
92 internal_gpu_image_usage_flags_ = gpu_usage_from_mtl(metal_texture.usage);
93 gpu_image_usage_flags_ = internal_gpu_image_usage_flags_;
94
95 /* Flag as Baked. */
96 is_baked_ = true;
97 is_dirty_ = false;
98 resource_mode_ = MTL_TEXTURE_MODE_EXTERNAL;
99}
100
102{
103 /* Unbind if bound. */
104 if (is_bound_) {
106 if (ctx != nullptr) {
107 ctx->state_manager->texture_unbind(this);
108 }
109 }
110
111 /* Free memory. */
112 this->reset();
113}
114
116
117/* -------------------------------------------------------------------- */
118void gpu::MTLTexture::bake_mip_swizzle_view()
119{
120 if (texture_view_dirty_flags_) {
121
122 /* Optimization: only generate texture view for mipmapped textures if base level > 0
123 * and max level does not match the existing number of mips.
124 * Only apply this if mipmap is the only change, and we have not previously generated
125 * a texture view. For textures which are created as views, this should also be skipped. */
126 if (resource_mode_ != MTL_TEXTURE_MODE_TEXTURE_VIEW &&
127 texture_view_dirty_flags_ == TEXTURE_VIEW_MIP_DIRTY && mip_swizzle_view_ == nil)
128 {
129
130 if (mip_texture_base_level_ == 0 && mip_texture_max_level_ == mtl_max_mips_) {
131 texture_view_dirty_flags_ = TEXTURE_VIEW_NOT_DIRTY;
132 return;
133 }
134 }
135
136 /* if a texture view was previously created we release it. */
137 if (mip_swizzle_view_ != nil) {
138 [mip_swizzle_view_ release];
139 mip_swizzle_view_ = nil;
140 }
141
142 /* Use source texture to determine range limits. If we are using a GPU texture view, the range
143 * check should only validate the range */
144 const gpu::Texture *tex_view_src = this;
145 if (resource_mode_ == MTL_TEXTURE_MODE_TEXTURE_VIEW) {
146 tex_view_src = source_texture_;
147 }
148
149 /* Determine num slices */
150 int max_slices = 1;
151 int num_slices = 1;
152 switch (type_) {
154 max_slices = tex_view_src->height_get();
155 num_slices = h_;
156 break;
158 max_slices = tex_view_src->depth_get();
159 num_slices = d_;
160 break;
161 case GPU_TEXTURE_CUBE:
162 max_slices = 6;
163 num_slices = 6;
164 break;
166 /* d_ is equal to array levels * 6, including face count. */
167 max_slices = tex_view_src->depth_get();
168 num_slices = d_;
169 break;
170 default:
171 num_slices = 1;
172 break;
173 }
174
175 /* Determine texture view format. If texture view is used as a stencil view, we instead provide
176 * the equivalent format for performing stencil reads/samples. */
177 MTLPixelFormat texture_view_pixel_format = gpu_texture_format_to_metal(format_);
178 if (texture_view_stencil_) {
179 switch (texture_view_pixel_format) {
180 case MTLPixelFormatDepth24Unorm_Stencil8:
181 texture_view_pixel_format = MTLPixelFormatX24_Stencil8;
182 break;
183 case MTLPixelFormatDepth32Float_Stencil8:
184 texture_view_pixel_format = MTLPixelFormatX32_Stencil8;
185 break;
186 default:
187 BLI_assert_msg(false, "Texture format does not support stencil views.");
188 break;
189 }
190 }
191
192 /* NOTE: Texture type for cube maps can be overridden as a 2D array. This is done
193 * via modifying this textures type flags. */
194 MTLTextureType texture_view_texture_type = to_metal_type(type_);
195
196 /* Ensure we have texture view usage flagged.
197 * NOTE: This check exists in high level GPU API, however does not cover internal Metal backend
198 * uses of texture views such as when required to support SRGB enablement toggle during
199 * rendering. */
201 (texture_view_pixel_format == texture_.pixelFormat) ||
202 (internal_gpu_image_usage_flags_ & GPU_TEXTURE_USAGE_FORMAT_VIEW),
203 "Usage Flag GPU_TEXTURE_USAGE_FORMAT_VIEW must be specified if a texture view is "
204 "created with a different format to its source texture.");
205
206 int range_len = min_ii((mip_texture_max_level_ - mip_texture_base_level_) + 1,
207 (int)texture_.mipmapLevelCount - mip_texture_base_level_);
208 BLI_assert(range_len > 0);
209 BLI_assert(mip_texture_base_level_ < texture_.mipmapLevelCount);
210 BLI_assert(mip_texture_base_layer_ < max_slices);
211 UNUSED_VARS_NDEBUG(max_slices);
212 mip_swizzle_view_ = [texture_
213 newTextureViewWithPixelFormat:texture_view_pixel_format
214 textureType:texture_view_texture_type
215 levels:NSMakeRange(mip_texture_base_level_, range_len)
216 slices:NSMakeRange(mip_texture_base_layer_, num_slices)
217 swizzle:mtl_swizzle_mask_];
219 "Updating texture view - MIP TEXTURE BASE LEVEL: %d, MAX LEVEL: %d (Range len: %d)",
220 mip_texture_base_level_,
221 min_ii(mip_texture_max_level_, (int)texture_.mipmapLevelCount),
222 range_len);
223#ifndef NDEBUG
224 mip_swizzle_view_.label = [NSString
225 stringWithFormat:
226 @"MipSwizzleView_%s__format=%u_type=%u_baselevel=%u_numlevels=%u_swizzle='%c%c%c%c'",
227 [[texture_ label] UTF8String],
228 (uint)texture_view_pixel_format,
229 (uint)texture_view_texture_type,
230 (uint)mip_texture_base_level_,
231 (uint)range_len,
232 tex_swizzle_mask_[0],
233 tex_swizzle_mask_[1],
234 tex_swizzle_mask_[2],
235 tex_swizzle_mask_[3]];
236#else
237 mip_swizzle_view_.label = [texture_ label];
238#endif
239 texture_view_dirty_flags_ = TEXTURE_VIEW_NOT_DIRTY;
240 }
241}
242
245
246id<MTLTexture> gpu::MTLTexture::get_metal_handle()
247{
248
249 /* Verify VBO texture shares same buffer. */
250 if (resource_mode_ == MTL_TEXTURE_MODE_VBO) {
251 id<MTLBuffer> buf = vert_buffer_->get_metal_buffer();
252
253 /* Source vertex buffer has been re-generated, require re-initialization. */
254 if (buf != vert_buffer_mtl_) {
256 "MTLTexture '%p' using MTL_TEXTURE_MODE_VBO requires re-generation due to updated "
257 "Vertex-Buffer.",
258 this);
259 /* Clear state. */
260 this->reset();
261
262 /* Re-initialize. */
263 this->init_internal(vert_buffer_);
264
265 /* Update for assertion check below. */
266 buf = vert_buffer_->get_metal_buffer();
267 }
268
269 /* Ensure buffer is valid.
270 * Fetch-vert buffer handle directly in-case it changed above. */
271 BLI_assert(vert_buffer_mtl_ != nil);
272 BLI_assert(vert_buffer_->get_metal_buffer() == vert_buffer_mtl_);
273 }
274
275 /* ensure up to date and baked. */
276 this->ensure_baked();
277
278 if (is_baked_) {
279 /* For explicit texture views, ensure we always return the texture view. */
280 if (resource_mode_ == MTL_TEXTURE_MODE_TEXTURE_VIEW) {
281 BLI_assert_msg(mip_swizzle_view_, "Texture view should always have a valid handle.");
282 }
283
284 if (mip_swizzle_view_ != nil || texture_view_dirty_flags_) {
285 bake_mip_swizzle_view();
286
287 /* Optimization: If texture view does not change mip parameters, no texture view will be
288 * baked. This is because texture views remove the ability to perform lossless compression.
289 */
290 if (mip_swizzle_view_ != nil) {
291 return mip_swizzle_view_;
292 }
293 }
294 return texture_;
295 }
296 return nil;
297}
298
299id<MTLTexture> gpu::MTLTexture::get_metal_handle_base()
300{
301
302 /* ensure up to date and baked. */
303 this->ensure_baked();
304
305 /* For explicit texture views, always return the texture view. */
306 if (resource_mode_ == MTL_TEXTURE_MODE_TEXTURE_VIEW) {
307 BLI_assert_msg(mip_swizzle_view_, "Texture view should always have a valid handle.");
308 if (mip_swizzle_view_ != nil || texture_view_dirty_flags_) {
309 bake_mip_swizzle_view();
310 }
311 BLI_assert(mip_swizzle_view_ != nil);
312 return mip_swizzle_view_;
313 }
314
315 /* Return base handle. */
316 if (is_baked_) {
317 return texture_;
318 }
319 return nil;
320}
321
322void gpu::MTLTexture::blit(id<MTLBlitCommandEncoder> blit_encoder,
323 uint src_x_offset,
324 uint src_y_offset,
325 uint src_z_offset,
326 uint src_slice,
327 uint src_mip,
328 gpu::MTLTexture *dst,
329 uint dst_x_offset,
330 uint dst_y_offset,
331 uint dst_z_offset,
332 uint dst_slice,
333 uint dst_mip,
334 uint width,
335 uint height,
336 uint depth)
337{
338
339 BLI_assert(dst);
340 BLI_assert(width > 0 && height > 0 && depth > 0);
341 MTLSize src_size = MTLSizeMake(width, height, depth);
342 MTLOrigin src_origin = MTLOriginMake(src_x_offset, src_y_offset, src_z_offset);
343 MTLOrigin dst_origin = MTLOriginMake(dst_x_offset, dst_y_offset, dst_z_offset);
344
345 if (this->format_get() != dst->format_get()) {
347 "gpu::MTLTexture: Cannot copy between two textures of different types using a "
348 "blit encoder. TODO: Support this operation");
349 return;
350 }
351
352 /* TODO(Metal): Verify if we want to use the one with modified base-level/texture view
353 * or not. */
354 [blit_encoder copyFromTexture:this->get_metal_handle_base()
355 sourceSlice:src_slice
356 sourceLevel:src_mip
357 sourceOrigin:src_origin
358 sourceSize:src_size
359 toTexture:dst->get_metal_handle_base()
360 destinationSlice:dst_slice
361 destinationLevel:dst_mip
362 destinationOrigin:dst_origin];
363}
364
365void gpu::MTLTexture::blit(gpu::MTLTexture *dst,
366 uint src_x_offset,
367 uint src_y_offset,
368 uint dst_x_offset,
369 uint dst_y_offset,
370 uint src_mip,
371 uint dst_mip,
372 uint dst_slice,
373 int width,
374 int height)
375{
376 BLI_assert(this->type_get() == dst->type_get());
377
378 gpu::Shader *shader = fullscreen_blit_sh_get();
379 BLI_assert(shader != nullptr);
381
382 /* Fetch restore framebuffer and blit target framebuffer from destination texture. */
383 gpu::FrameBuffer *restore_fb = GPU_framebuffer_active_get();
384 gpu::FrameBuffer *blit_target_fb = dst->get_blit_framebuffer(dst_slice, dst_mip);
385 BLI_assert(blit_target_fb);
386 GPU_framebuffer_bind(blit_target_fb);
387
388 /* Execute graphics draw call to perform the blit. */
389 Batch *quad = GPU_batch_preset_quad();
391
392 float w = dst->width_get();
393 float h = dst->height_get();
394
395 GPU_shader_uniform_2f(shader, "fullscreen", w, h);
396 GPU_shader_uniform_2f(shader, "src_offset", src_x_offset, src_y_offset);
397 GPU_shader_uniform_2f(shader, "dst_offset", dst_x_offset, dst_y_offset);
398 GPU_shader_uniform_2f(shader, "size", width, height);
399
400 GPU_shader_uniform_1i(shader, "mip", src_mip);
401 GPU_batch_texture_bind(quad, "imageTexture", this);
402
403 /* Caching previous pipeline state. */
404 bool depth_write_prev = GPU_depth_mask_get();
405 uint stencil_mask_prev = GPU_stencil_mask_get();
406 GPUStencilTest stencil_test_prev = GPU_stencil_test_get();
407 GPUFaceCullTest culling_test_prev = GPU_face_culling_get();
408 GPUBlend blend_prev = GPU_blend_get();
409 GPUDepthTest depth_test_prev = GPU_depth_test_get();
410 GPU_scissor_test(false);
411
412 /* Apply state for blit draw call. */
417 GPU_depth_mask(false);
420
422
423 /* restoring old pipeline state. */
424 GPU_depth_mask(depth_write_prev);
425 GPU_stencil_write_mask_set(stencil_mask_prev);
426 GPU_stencil_test(stencil_test_prev);
427 GPU_face_culling(culling_test_prev);
428 GPU_depth_mask(depth_write_prev);
429 GPU_blend(blend_prev);
430 GPU_depth_test(depth_test_prev);
431
432 if (restore_fb != nullptr) {
433 GPU_framebuffer_bind(restore_fb);
434 }
435 else {
437 }
438}
439
440gpu::FrameBuffer *gpu::MTLTexture::get_blit_framebuffer(int dst_slice, uint dst_mip)
441{
442
443 /* Check if layer has changed. */
444 bool update_attachments = false;
445 if (!blit_fb_) {
446 blit_fb_ = GPU_framebuffer_create("gpu_blit");
447 update_attachments = true;
448 }
449
450 /* Check if current blit FB has the correct attachment properties. */
451 if (blit_fb_) {
452 if (blit_fb_slice_ != dst_slice || blit_fb_mip_ != dst_mip) {
453 update_attachments = true;
454 }
455 }
456
457 if (update_attachments) {
458 if (format_flag_ & GPU_FORMAT_DEPTH || format_flag_ & GPU_FORMAT_STENCIL) {
459 /* DEPTH TEX */
461 &blit_fb_,
462 {GPU_ATTACHMENT_TEXTURE_LAYER_MIP(this, int(dst_slice), int(dst_mip)),
464 }
465 else {
466 /* COLOR TEX */
468 &blit_fb_,
470 GPU_ATTACHMENT_TEXTURE_LAYER_MIP(this, int(dst_slice), int(dst_mip))});
471 }
472 blit_fb_slice_ = dst_slice;
473 blit_fb_mip_ = dst_mip;
474 }
475
476 BLI_assert(blit_fb_);
477 return blit_fb_;
478}
479
480MTLSamplerState gpu::MTLTexture::get_sampler_state()
481{
482 MTLSamplerState sampler_state;
483 sampler_state.state = this->sampler_state;
484 /* Add more parameters as needed */
485 return sampler_state;
486}
487
489 int mip, int offset[3], int extent[3], eGPUDataFormat type, const void *data)
490{
491 /* Fetch active context. */
493 BLI_assert(ctx);
494
495 /* Do not update texture view. */
496 BLI_assert(resource_mode_ != MTL_TEXTURE_MODE_TEXTURE_VIEW);
497
498 /* Ensure mipmaps. */
499 this->ensure_mipmaps(mip);
500
501 /* Ensure texture is baked. */
502 this->ensure_baked();
503
504 /* Safety checks. */
505 BLI_assert(mip >= mip_min_ && mip <= mip_max_);
506 BLI_assert(mip < texture_.mipmapLevelCount);
507 BLI_assert(texture_.mipmapLevelCount >= mip_max_);
508
509 std::unique_ptr<uint16_t, MEM_freeN_smart_ptr_deleter> clamped_half_buffer = nullptr;
510
511 if (data != nullptr && type == GPU_DATA_FLOAT && is_half_float(format_)) {
512 size_t pixel_count = max_ii(extent[0], 1) * max_ii(extent[1], 1) * max_ii(extent[2], 1);
513 size_t total_component_count = to_component_len(format_) * pixel_count;
514
515 clamped_half_buffer.reset(
516 (uint16_t *)MEM_mallocN_aligned(sizeof(uint16_t) * total_component_count, 128, __func__));
517
518 Span<float> src(static_cast<const float *>(data), total_component_count);
519 MutableSpan<uint16_t> dst(static_cast<uint16_t *>(clamped_half_buffer.get()),
520 total_component_count);
521
522 constexpr int64_t chunk_size = 4 * 1024 * 1024;
523
525 IndexRange(total_component_count), chunk_size, [&](const IndexRange range) {
526 /* Doing float to half conversion manually to avoid implementation specific behavior
527 * regarding Inf and NaNs. Use make finite version to avoid unexpected black pixels on
528 * certain implementation. For platform parity we clamp these infinite values to finite
529 * values. */
531 src.slice(range).data(), dst.slice(range).data(), range.size());
532 });
533 data = clamped_half_buffer.get();
534 type = GPU_DATA_HALF_FLOAT;
535 }
536
537 /* DEPTH FLAG - Depth formats cannot use direct BLIT - pass off to their own routine which will
538 * do a depth-only render. */
539 bool is_depth_format = (format_flag_ & GPU_FORMAT_DEPTH);
540 if (is_depth_format) {
541 switch (type_) {
542
543 case GPU_TEXTURE_2D:
544 update_sub_depth_2d(mip, offset, extent, type, data);
545 return;
546 default:
548 "gpu::MTLTexture::update_sub not yet supported for other depth "
549 "configurations");
550 return;
551 }
552 }
553
554 const bool is_compressed = (format_flag_ & GPU_FORMAT_COMPRESSED);
555
556 @autoreleasepool {
557 /* Determine totalsize of INPUT Data. */
558 int num_channels = to_component_len(format_);
559 size_t input_bytes_per_pixel = to_bytesize(format_, type);
560 size_t totalsize = 0;
561
562 /* If unpack row length is used, size of input data uses the unpack row length, rather than the
563 * image length. */
564 size_t expected_update_w = ((ctx->pipeline_state.unpack_row_length == 0) ?
565 extent[0] :
567
568 /* Ensure calculated total size isn't larger than remaining image data size. */
569 if (is_compressed) {
570 /* Calculate size requirement for incoming compressed texture data. */
571 totalsize = ((expected_update_w + 3) / 4) * ((extent[1] + 3) / 4) * to_block_size(format_);
572 }
573 else {
574 switch (this->dimensions_count()) {
575 case 1:
576 totalsize = input_bytes_per_pixel * max_ulul(expected_update_w, 1);
577 break;
578 case 2:
579 totalsize = input_bytes_per_pixel * max_ulul(expected_update_w, 1) * (size_t)extent[1];
580 break;
581 case 3:
582 totalsize = input_bytes_per_pixel * max_ulul(expected_update_w, 1) * (size_t)extent[1] *
583 (size_t)extent[2];
584 break;
585 default:
586 BLI_assert(false);
587 break;
588 }
589 }
590
591 /* Early exit if update size is zero. update_sub sometimes has a zero-sized
592 * extent when called from texture painting. */
593 if (totalsize <= 0 || extent[0] <= 0) {
595 "MTLTexture::update_sub called with extent size of zero for one or more dimensions. "
596 "(%d, %d, %d) - DimCount: %u",
597 extent[0],
598 extent[1],
599 extent[2],
600 this->dimensions_count());
601 return;
602 }
603
604 /* When unpack row length is used, provided data does not necessarily contain padding for last
605 * row, so we only include up to the end of updated data. */
606 if (ctx->pipeline_state.unpack_row_length > 0) {
608 totalsize -= (ctx->pipeline_state.unpack_row_length - extent[0]) * input_bytes_per_pixel;
609 }
610
611 /* Check */
612 BLI_assert(totalsize > 0);
613
614 /* Determine expected destination data size. */
615 MTLPixelFormat destination_format = gpu_texture_format_to_metal(format_);
616 size_t expected_dst_bytes_per_pixel = get_mtl_format_bytesize(destination_format);
617 int destination_num_channels = get_mtl_format_num_components(destination_format);
618
619 /* Prepare specialization struct (For texture update routine). */
620 TextureUpdateRoutineSpecialisation compute_specialization_kernel = {
621 tex_data_format_to_msl_type_str(type), /* INPUT DATA FORMAT */
622 tex_data_format_to_msl_texture_template_type(type), /* TEXTURE DATA FORMAT */
623 num_channels,
624 destination_num_channels,
625 false /* Not a clear. */
626 };
627
628 /* Determine whether we can do direct BLIT or not. For compressed textures,
629 * always assume a direct blit (input data pretends to be float, but it is
630 * not). */
631 bool can_use_direct_blit = true;
632 if (!is_compressed && (expected_dst_bytes_per_pixel != input_bytes_per_pixel ||
633 num_channels != destination_num_channels))
634 {
635 can_use_direct_blit = false;
636 }
637
638 if (is_depth_format) {
640 /* Workaround for crash in validation layer when blitting to sampler2DDepth target with
641 * dimensions (1, 1, 1); */
642 if (extent[0] == 1 && extent[1] == 1 && extent[2] == 1 && totalsize == 4) {
643 can_use_direct_blit = false;
644 }
645 }
646 }
647
648 if (format_ == TextureFormat::SRGBA_8_8_8_8 && !can_use_direct_blit) {
650 "SRGB data upload does not work correctly using compute upload. "
651 "texname '%s'",
652 name_);
653 }
654
655 /* Safety Checks. */
657 type == GPU_DATA_2_10_10_10_REV || is_compressed)
658 {
659 BLI_assert(can_use_direct_blit &&
660 "Special input data type must be a 1-1 mapping with destination texture as it "
661 "cannot easily be split");
662 }
663
664 /* Debug and verification. */
665 if (!can_use_direct_blit) {
666 /* Check mip compatibility. */
667 if (mip != 0) {
669 "Updating texture layers other than mip=0 when data is mismatched is not "
670 "possible in METAL on macOS using texture->write\n");
671 return;
672 }
673
674 /* Check Format write-ability. */
675 if (mtl_format_get_writeable_view_format(destination_format) == MTLPixelFormatInvalid) {
677 "Updating texture -- destination MTLPixelFormat '%d' does not support write "
678 "operations, and no suitable TextureView format exists.\n",
679 *(int *)(&destination_format));
680 return;
681 }
682 }
683
684 /* Common Properties. */
685 MTLPixelFormat compatible_write_format = mtl_format_get_writeable_view_format(
686 destination_format);
687
688 /* Some texture formats are not writeable so we need to use a texture view. */
689 if (compatible_write_format == MTLPixelFormatInvalid) {
690 MTL_LOG_ERROR("Cannot use compute update blit with texture-view format: %d\n",
691 *((int *)&compatible_write_format));
692 return;
693 }
694
695 /* Fetch allocation from memory pool. */
697 totalsize, true, data);
698 id<MTLBuffer> staging_buffer = temp_allocation->get_metal_buffer();
699 BLI_assert(staging_buffer != nil);
700
701 /* Prepare command encoders. */
702 id<MTLBlitCommandEncoder> blit_encoder = nil;
703 id<MTLComputeCommandEncoder> compute_encoder = nil;
704 id<MTLTexture> staging_texture = nil;
705 id<MTLTexture> texture_handle = nil;
706
707 /* Use staging texture. */
708 bool use_staging_texture = false;
709
710 if (can_use_direct_blit) {
712 BLI_assert(blit_encoder != nil);
713
714 /* If we need to use a texture view to write texture data as the source
715 * format is unwritable, if our texture has not been initialized with
716 * texture view support, use a staging texture. */
717 if ((compatible_write_format != destination_format) &&
718 !(internal_gpu_image_usage_flags_ & GPU_TEXTURE_USAGE_FORMAT_VIEW))
719 {
720 use_staging_texture = true;
721 }
722 }
723 else {
724 compute_encoder = ctx->main_command_buffer.ensure_begin_compute_encoder();
725 BLI_assert(compute_encoder != nil);
726
727 /* For compute, we should use a stating texture to avoid texture write usage,
728 * if it has not been specified for the texture. Using shader-write disables
729 * lossless texture compression, so this is best to avoid where possible. */
730 if (!(internal_gpu_image_usage_flags_ & GPU_TEXTURE_USAGE_SHADER_WRITE)) {
731 use_staging_texture = true;
732 }
733 if (compatible_write_format != destination_format) {
734 if (!(internal_gpu_image_usage_flags_ & GPU_TEXTURE_USAGE_FORMAT_VIEW)) {
735 use_staging_texture = true;
736 }
737 }
738 }
739
740 /* Allocate stating texture if needed. */
741 if (use_staging_texture) {
742 /* Create staging texture to avoid shader-write limiting optimization. */
743 BLI_assert(texture_descriptor_ != nullptr);
744 MTLTextureUsage original_usage = texture_descriptor_.usage;
745 texture_descriptor_.usage = original_usage | MTLTextureUsageShaderWrite |
746 MTLTextureUsagePixelFormatView;
747 staging_texture = [ctx->device newTextureWithDescriptor:texture_descriptor_];
748 staging_texture.label = @"Staging texture";
749 texture_descriptor_.usage = original_usage;
750
751 /* Create texture view if needed. */
752 texture_handle = ((compatible_write_format == destination_format)) ?
753 [staging_texture retain] :
754 [staging_texture newTextureViewWithPixelFormat:compatible_write_format];
755 }
756 else {
757 /* Use texture view. */
758 if (compatible_write_format != destination_format) {
759 BLI_assert(internal_gpu_image_usage_flags_ & GPU_TEXTURE_USAGE_FORMAT_VIEW);
760 texture_handle = [texture_ newTextureViewWithPixelFormat:compatible_write_format];
761 }
762 else {
763 texture_handle = texture_;
764 [texture_handle retain];
765 }
766 }
767
768 switch (type_) {
769
770 /* 1D */
771 case GPU_TEXTURE_1D:
773 if (can_use_direct_blit) {
774 /* Use Blit based update. */
775 size_t bytes_per_row = expected_dst_bytes_per_pixel *
776 ((ctx->pipeline_state.unpack_row_length == 0) ?
777 extent[0] :
779 size_t bytes_per_image = bytes_per_row;
780 if (is_compressed) {
781 size_t block_size = to_block_size(format_);
782 size_t blocks_x = divide_ceil_u(extent[0], 4);
783 bytes_per_row = blocks_x * block_size;
784 bytes_per_image = bytes_per_row;
785 }
786 int max_array_index = ((type_ == GPU_TEXTURE_1D_ARRAY) ? extent[1] : 1);
787 for (int array_index = 0; array_index < max_array_index; array_index++) {
788
789 size_t buffer_array_offset = (bytes_per_image * (size_t)array_index);
790 [blit_encoder
791 copyFromBuffer:staging_buffer
792 sourceOffset:buffer_array_offset
793 sourceBytesPerRow:bytes_per_row
794 sourceBytesPerImage:bytes_per_image
795 sourceSize:MTLSizeMake(extent[0], 1, 1)
796 toTexture:texture_handle
797 destinationSlice:((type_ == GPU_TEXTURE_1D_ARRAY) ? (array_index + offset[1]) :
798 0)
799 destinationLevel:mip
800 destinationOrigin:MTLOriginMake(offset[0], 0, 0)];
801 }
802 }
803 else {
804 /* Use Compute Based update. */
805 if (type_ == GPU_TEXTURE_1D) {
806 id<MTLComputePipelineState> pso = texture_update_1d_get_kernel(
807 compute_specialization_kernel);
808 TextureUpdateParams params = {mip,
809 {extent[0], 1, 1},
810 {offset[0], 0, 0},
811 ((ctx->pipeline_state.unpack_row_length == 0) ?
812 extent[0] :
814
815 /* Bind resources via compute state for optimal state caching performance. */
817 cs.bind_pso(pso);
818 cs.bind_compute_bytes(&params, sizeof(params), 0);
819 cs.bind_compute_buffer(staging_buffer, 0, 1);
820 cs.bind_compute_texture(texture_handle, 0);
821 [compute_encoder
822 dispatchThreads:MTLSizeMake(extent[0], 1, 1) /* Width, Height, Layer */
823 threadsPerThreadgroup:MTLSizeMake(64, 1, 1)];
824 }
825 else if (type_ == GPU_TEXTURE_1D_ARRAY) {
826 id<MTLComputePipelineState> pso = texture_update_1d_array_get_kernel(
827 compute_specialization_kernel);
828 TextureUpdateParams params = {mip,
829 {extent[0], extent[1], 1},
830 {offset[0], offset[1], 0},
831 ((ctx->pipeline_state.unpack_row_length == 0) ?
832 extent[0] :
834
835 /* Bind resources via compute state for optimal state caching performance. */
837 cs.bind_pso(pso);
838 cs.bind_compute_bytes(&params, sizeof(params), 0);
839 cs.bind_compute_buffer(staging_buffer, 0, 1);
840 cs.bind_compute_texture(texture_handle, 0);
841 [compute_encoder
842 dispatchThreads:MTLSizeMake(extent[0], extent[1], 1) /* Width, layers, nil */
843 threadsPerThreadgroup:MTLSizeMake(8, 8, 1)];
844 }
845 }
846 } break;
847
848 /* 2D */
849 case GPU_TEXTURE_2D:
851 if (can_use_direct_blit) {
852 /* Use Blit encoder update. */
853 size_t bytes_per_row = expected_dst_bytes_per_pixel *
854 ((ctx->pipeline_state.unpack_row_length == 0) ?
855 extent[0] :
857 size_t bytes_per_image = bytes_per_row * extent[1];
858 if (is_compressed) {
859 size_t block_size = to_block_size(format_);
860 size_t blocks_x = divide_ceil_u(extent[0], 4);
861 size_t blocks_y = divide_ceil_u(extent[1], 4);
862 bytes_per_row = blocks_x * block_size;
863 bytes_per_image = bytes_per_row * blocks_y;
864 }
865
866 size_t texture_array_relative_offset = 0;
867 int base_slice = (type_ == GPU_TEXTURE_2D_ARRAY) ? offset[2] : 0;
868 int final_slice = base_slice + ((type_ == GPU_TEXTURE_2D_ARRAY) ? extent[2] : 1);
869
870 for (int array_slice = base_slice; array_slice < final_slice; array_slice++) {
871
872 if (array_slice > 0) {
874 BLI_assert(array_slice < d_);
875 }
876
877 [blit_encoder copyFromBuffer:staging_buffer
878 sourceOffset:texture_array_relative_offset
879 sourceBytesPerRow:bytes_per_row
880 sourceBytesPerImage:bytes_per_image
881 sourceSize:MTLSizeMake(extent[0], extent[1], 1)
882 toTexture:texture_handle
883 destinationSlice:array_slice
884 destinationLevel:mip
885 destinationOrigin:MTLOriginMake(offset[0], offset[1], 0)];
886
887 texture_array_relative_offset += bytes_per_image;
888 }
889 }
890 else {
891 /* Use Compute texture update. */
892 if (type_ == GPU_TEXTURE_2D) {
893 id<MTLComputePipelineState> pso = texture_update_2d_get_kernel(
894 compute_specialization_kernel);
895 TextureUpdateParams params = {mip,
896 {extent[0], extent[1], 1},
897 {offset[0], offset[1], 0},
898 ((ctx->pipeline_state.unpack_row_length == 0) ?
899 extent[0] :
901
902 /* Bind resources via compute state for optimal state caching performance. */
904 cs.bind_pso(pso);
905 cs.bind_compute_bytes(&params, sizeof(params), 0);
906 cs.bind_compute_buffer(staging_buffer, 0, 1);
907 cs.bind_compute_texture(texture_handle, 0);
908 [compute_encoder
909 dispatchThreads:MTLSizeMake(
910 extent[0], extent[1], 1) /* Width, Height, Layer */
911 threadsPerThreadgroup:MTLSizeMake(8, 8, 1)];
912 }
913 else if (type_ == GPU_TEXTURE_2D_ARRAY) {
914 id<MTLComputePipelineState> pso = texture_update_2d_array_get_kernel(
915 compute_specialization_kernel);
916 TextureUpdateParams params = {mip,
917 {extent[0], extent[1], extent[2]},
918 {offset[0], offset[1], offset[2]},
919 ((ctx->pipeline_state.unpack_row_length == 0) ?
920 extent[0] :
922
923 /* Bind resources via compute state for optimal state caching performance. */
925 cs.bind_pso(pso);
926 cs.bind_compute_bytes(&params, sizeof(params), 0);
927 cs.bind_compute_buffer(staging_buffer, 0, 1);
928 cs.bind_compute_texture(texture_handle, 0);
929 [compute_encoder dispatchThreads:MTLSizeMake(extent[0],
930 extent[1],
931 extent[2]) /* Width, Height, Layer */
932 threadsPerThreadgroup:MTLSizeMake(4, 4, 4)];
933 }
934 }
935
936 } break;
937
938 /* 3D */
939 case GPU_TEXTURE_3D: {
940 if (can_use_direct_blit) {
941 size_t bytes_per_row = expected_dst_bytes_per_pixel *
942 ((ctx->pipeline_state.unpack_row_length == 0) ?
943 extent[0] :
945 size_t bytes_per_image = bytes_per_row * extent[1];
946 [blit_encoder copyFromBuffer:staging_buffer
947 sourceOffset:0
948 sourceBytesPerRow:bytes_per_row
949 sourceBytesPerImage:bytes_per_image
950 sourceSize:MTLSizeMake(extent[0], extent[1], extent[2])
951 toTexture:texture_handle
952 destinationSlice:0
953 destinationLevel:mip
954 destinationOrigin:MTLOriginMake(offset[0], offset[1], offset[2])];
955 }
956 else {
957 id<MTLComputePipelineState> pso = texture_update_3d_get_kernel(
958 compute_specialization_kernel);
959 TextureUpdateParams params = {mip,
960 {extent[0], extent[1], extent[2]},
961 {offset[0], offset[1], offset[2]},
962 ((ctx->pipeline_state.unpack_row_length == 0) ?
963 extent[0] :
965
966 /* Bind resources via compute state for optimal state caching performance. */
968 cs.bind_pso(pso);
969 cs.bind_compute_bytes(&params, sizeof(params), 0);
970 cs.bind_compute_buffer(staging_buffer, 0, 1);
971 cs.bind_compute_texture(texture_handle, 0);
972 [compute_encoder
973 dispatchThreads:MTLSizeMake(
974 extent[0], extent[1], extent[2]) /* Width, Height, Depth */
975 threadsPerThreadgroup:MTLSizeMake(4, 4, 4)];
976 }
977 } break;
978
979 /* CUBE */
980 case GPU_TEXTURE_CUBE: {
981 if (can_use_direct_blit) {
982 size_t bytes_per_row = expected_dst_bytes_per_pixel *
983 ((ctx->pipeline_state.unpack_row_length == 0) ?
984 extent[0] :
986 size_t bytes_per_image = bytes_per_row * extent[1];
987 size_t texture_array_relative_offset = 0;
988
989 /* Iterate over all cube faces in range (offset[2], offset[2] + extent[2]). */
990 for (int i = 0; i < extent[2]; i++) {
991 int face_index = offset[2] + i;
992
993 [blit_encoder copyFromBuffer:staging_buffer
994 sourceOffset:texture_array_relative_offset
995 sourceBytesPerRow:bytes_per_row
996 sourceBytesPerImage:bytes_per_image
997 sourceSize:MTLSizeMake(extent[0], extent[1], 1)
998 toTexture:texture_handle
999 destinationSlice:face_index /* = cubeFace+arrayIndex*6 */
1000 destinationLevel:mip
1001 destinationOrigin:MTLOriginMake(offset[0], offset[1], 0)];
1002 texture_array_relative_offset += bytes_per_image;
1003 }
1004 }
1005 else {
1007 "TODO(Metal): Support compute texture update for GPU_TEXTURE_CUBE %d, %d, %d\n",
1008 w_,
1009 h_,
1010 d_);
1011 }
1012 } break;
1013
1015 if (can_use_direct_blit) {
1016
1017 size_t bytes_per_row = expected_dst_bytes_per_pixel *
1018 ((ctx->pipeline_state.unpack_row_length == 0) ?
1019 extent[0] :
1021 size_t bytes_per_image = bytes_per_row * extent[1];
1022
1023 /* Upload to all faces between offset[2] (which is zero in most cases) AND extent[2]. */
1024 size_t texture_array_relative_offset = 0;
1025 for (int i = 0; i < extent[2]; i++) {
1026 int face_index = offset[2] + i;
1027 [blit_encoder copyFromBuffer:staging_buffer
1028 sourceOffset:texture_array_relative_offset
1029 sourceBytesPerRow:bytes_per_row
1030 sourceBytesPerImage:bytes_per_image
1031 sourceSize:MTLSizeMake(extent[0], extent[1], 1)
1032 toTexture:texture_handle
1033 destinationSlice:face_index /* = cubeFace+arrayIndex*6. */
1034 destinationLevel:mip
1035 destinationOrigin:MTLOriginMake(offset[0], offset[1], 0)];
1036 texture_array_relative_offset += bytes_per_image;
1037 }
1038 }
1039 else {
1041 "TODO(Metal): Support compute texture update for GPU_TEXTURE_CUBE_ARRAY %d, %d, "
1042 "%d\n",
1043 w_,
1044 h_,
1045 d_);
1046 }
1047 } break;
1048
1049 case GPU_TEXTURE_BUFFER: {
1050 /* TODO(Metal): Support Data upload to TEXTURE BUFFER
1051 * Data uploads generally happen via VertBuf instead. */
1052 BLI_assert(false);
1053 } break;
1054
1055 case GPU_TEXTURE_ARRAY:
1056 /* Not an actual format - modifier flag for others. */
1057 return;
1058 }
1059
1060 /* If staging texture was used, copy contents to original texture. */
1061 if (use_staging_texture) {
1062 /* When using staging texture, copy results into existing texture. */
1063 BLI_assert(staging_texture != nil);
1064 blit_encoder = ctx->main_command_buffer.ensure_begin_blit_encoder();
1065
1066 /* Copy modified staging texture region back to original texture.
1067 * Differing blit dimensions based on type. */
1068 switch (type_) {
1069 case GPU_TEXTURE_1D:
1070 case GPU_TEXTURE_1D_ARRAY: {
1071 int base_slice = (type_ == GPU_TEXTURE_1D_ARRAY) ? offset[1] : 0;
1072 int final_slice = base_slice + ((type_ == GPU_TEXTURE_1D_ARRAY) ? extent[1] : 1);
1073 for (int array_index = base_slice; array_index < final_slice; array_index++) {
1074 [blit_encoder copyFromTexture:staging_texture
1075 sourceSlice:array_index
1076 sourceLevel:mip
1077 sourceOrigin:MTLOriginMake(offset[0], 0, 0)
1078 sourceSize:MTLSizeMake(extent[0], 1, 1)
1079 toTexture:texture_
1080 destinationSlice:array_index
1081 destinationLevel:mip
1082 destinationOrigin:MTLOriginMake(offset[0], 0, 0)];
1083 }
1084 } break;
1085 case GPU_TEXTURE_2D:
1086 case GPU_TEXTURE_2D_ARRAY: {
1087 int base_slice = (type_ == GPU_TEXTURE_2D_ARRAY) ? offset[2] : 0;
1088 int final_slice = base_slice + ((type_ == GPU_TEXTURE_2D_ARRAY) ? extent[2] : 1);
1089 for (int array_index = base_slice; array_index < final_slice; array_index++) {
1090 [blit_encoder copyFromTexture:staging_texture
1091 sourceSlice:array_index
1092 sourceLevel:mip
1093 sourceOrigin:MTLOriginMake(offset[0], offset[1], 0)
1094 sourceSize:MTLSizeMake(extent[0], extent[1], 1)
1095 toTexture:texture_
1096 destinationSlice:array_index
1097 destinationLevel:mip
1098 destinationOrigin:MTLOriginMake(offset[0], offset[1], 0)];
1099 }
1100 } break;
1101 case GPU_TEXTURE_3D: {
1102 [blit_encoder copyFromTexture:staging_texture
1103 sourceSlice:0
1104 sourceLevel:mip
1105 sourceOrigin:MTLOriginMake(offset[0], offset[1], offset[2])
1106 sourceSize:MTLSizeMake(extent[0], extent[1], extent[2])
1107 toTexture:texture_
1108 destinationSlice:0
1109 destinationLevel:mip
1110 destinationOrigin:MTLOriginMake(offset[0], offset[1], offset[2])];
1111 } break;
1112 case GPU_TEXTURE_CUBE:
1114 /* Iterate over all cube faces in range (offset[2], offset[2] + extent[2]). */
1115 for (int i = 0; i < extent[2]; i++) {
1116 int face_index = offset[2] + i;
1117 [blit_encoder copyFromTexture:staging_texture
1118 sourceSlice:face_index
1119 sourceLevel:mip
1120 sourceOrigin:MTLOriginMake(offset[0], offset[1], 0)
1121 sourceSize:MTLSizeMake(extent[0], extent[1], 1)
1122 toTexture:texture_
1123 destinationSlice:face_index
1124 destinationLevel:mip
1125 destinationOrigin:MTLOriginMake(offset[0], offset[1], 0)];
1126 }
1127 } break;
1128 case GPU_TEXTURE_ARRAY:
1129 case GPU_TEXTURE_BUFFER:
1131 break;
1132 }
1133
1134 [staging_texture release];
1135 }
1136
1137 /* Finalize Blit Encoder. */
1138 if (can_use_direct_blit) {
1139 /* Textures which use MTLStorageModeManaged need to have updated contents
1140 * synced back to CPU to avoid an automatic flush overwriting contents. */
1141 if (texture_.storageMode == MTLStorageModeManaged) {
1142 [blit_encoder synchronizeResource:texture_];
1143 }
1144 [blit_encoder optimizeContentsForGPUAccess:texture_];
1145 }
1146 else {
1147 /* Textures which use MTLStorageModeManaged need to have updated contents
1148 * synced back to CPU to avoid an automatic flush overwriting contents. */
1149 blit_encoder = ctx->main_command_buffer.ensure_begin_blit_encoder();
1150 if (texture_.storageMode == MTLStorageModeManaged) {
1151
1152 [blit_encoder synchronizeResource:texture_];
1153 }
1154 [blit_encoder optimizeContentsForGPUAccess:texture_];
1155 }
1156
1157 /* Decrement texture reference counts. This ensures temporary texture views are released. */
1158 [texture_handle release];
1159
1160 ctx->main_command_buffer.submit(false);
1161
1162 /* Release temporary staging buffer allocation.
1163 * NOTE: Allocation will be tracked with command submission and released once no longer in use.
1164 */
1165 temp_allocation->free();
1166 }
1167}
1168
1169void MTLTexture::update_sub(int offset[3],
1170 int extent[3],
1172 GPUPixelBuffer *pixbuf)
1173{
1174 /* Update texture from pixel buffer. */
1176 BLI_assert(pixbuf != nullptr);
1177
1178 /* Fetch pixel buffer metal buffer. */
1179 MTLPixelBuffer *mtl_pix_buf = static_cast<MTLPixelBuffer *>(unwrap(pixbuf));
1180 id<MTLBuffer> buffer = mtl_pix_buf->get_metal_buffer();
1181 BLI_assert(buffer != nil);
1182 if (buffer == nil) {
1183 return;
1184 }
1185
1186 /* Ensure texture is ready. */
1187 this->ensure_baked();
1188 BLI_assert(texture_ != nil);
1189
1190 /* Calculate dimensions. */
1191 int num_image_channels = to_component_len(format_);
1192
1193 size_t bits_per_pixel = num_image_channels * to_bytesize(format);
1194 size_t bytes_per_row = bits_per_pixel * extent[0];
1195 size_t bytes_per_image = bytes_per_row * extent[1];
1196
1197 /* Currently only required for 2D textures. */
1198 if (type_ == GPU_TEXTURE_2D) {
1199
1200 /* Create blit command encoder to copy data. */
1201 MTLContext *ctx = MTLContext::get();
1202 BLI_assert(ctx);
1203
1204 id<MTLBlitCommandEncoder> blit_encoder = ctx->main_command_buffer.ensure_begin_blit_encoder();
1205 [blit_encoder copyFromBuffer:buffer
1206 sourceOffset:0
1207 sourceBytesPerRow:bytes_per_row
1208 sourceBytesPerImage:bytes_per_image
1209 sourceSize:MTLSizeMake(extent[0], extent[1], 1)
1210 toTexture:texture_
1211 destinationSlice:0
1212 destinationLevel:0
1213 destinationOrigin:MTLOriginMake(offset[0], offset[1], 0)];
1214
1215 if (texture_.storageMode == MTLStorageModeManaged) {
1216 [blit_encoder synchronizeResource:texture_];
1217 }
1218 [blit_encoder optimizeContentsForGPUAccess:texture_];
1219 }
1220 else {
1221 BLI_assert(false);
1222 }
1223}
1224
1225void gpu::MTLTexture::ensure_mipmaps(int miplvl)
1226{
1227
1228 /* Do not update texture view. */
1229 BLI_assert(resource_mode_ != MTL_TEXTURE_MODE_TEXTURE_VIEW);
1230
1231 /* Clamp level to maximum. */
1232 int effective_h = (type_ == GPU_TEXTURE_1D_ARRAY) ? 0 : h_;
1233 int effective_d = (type_ != GPU_TEXTURE_3D) ? 0 : d_;
1234 int max_dimension = max_iii(w_, effective_h, effective_d);
1235 int max_miplvl = floor(log2(max_dimension));
1236 miplvl = min_ii(max_miplvl, miplvl);
1237
1238 /* Increase mipmap level. */
1239 if (mipmaps_ < miplvl) {
1240 mipmaps_ = miplvl;
1241
1242 /* Check if baked. */
1243 if (is_baked_ && mipmaps_ > mtl_max_mips_) {
1244 BLI_assert_msg(false,
1245 "Texture requires a higher mipmap level count. Please specify the required "
1246 "amount upfront.");
1247 is_dirty_ = true;
1248 MTL_LOG_WARNING("Texture requires regenerating due to increase in mip-count");
1249 }
1250 }
1251 this->mip_range_set(0, mipmaps_);
1252}
1253
1255{
1256 /* Compressed textures allow users to provide their own custom mipmaps. And
1257 * we can't generate them at runtime anyway. */
1259 return;
1260 }
1261
1262 /* Fetch Active Context. */
1263 MTLContext *ctx = MTLContext::get();
1264 BLI_assert(ctx);
1265
1266 if (!ctx->device) {
1267 MTL_LOG_ERROR("Cannot Generate mip-maps -- metal device invalid\n");
1268 BLI_assert(false);
1269 return;
1270 }
1271
1272 /* Ensure mipmaps. */
1273 this->ensure_mipmaps(mtl_max_mips_);
1274
1275 /* Ensure texture is baked. */
1276 this->ensure_baked();
1277 BLI_assert_msg(is_baked_ && texture_, "MTLTexture is not valid");
1278
1279 if (mipmaps_ == 1 || mtl_max_mips_ == 1) {
1280 /* Nothing to do. */
1281 return;
1282 }
1283
1284 /* Verify if we can perform mipmap generation. */
1285 if (format_ == TextureFormat::SFLOAT_32_DEPTH || format_ == TextureFormat::UNORM_16_DEPTH ||
1286 format_ == TextureFormat::SFLOAT_32_DEPTH_UINT_8)
1287 {
1288 MTL_LOG_WARNING("Cannot generate mipmaps for textures using DEPTH formats");
1289 return;
1290 }
1291
1292 @autoreleasepool {
1293 /* Fetch active BlitCommandEncoder. */
1294 id<MTLBlitCommandEncoder> enc = ctx->main_command_buffer.ensure_begin_blit_encoder();
1295 if (G.debug & G_DEBUG_GPU) {
1296 [enc insertDebugSignpost:@"Generate MipMaps"];
1297 }
1298 [enc generateMipmapsForTexture:texture_];
1299 has_generated_mips_ = true;
1300 }
1301}
1302
1304{
1305 /* Safety Checks. */
1306 gpu::MTLTexture *mt_src = this;
1307 gpu::MTLTexture *mt_dst = static_cast<gpu::MTLTexture *>(dst);
1308 BLI_assert((mt_dst->w_ == mt_src->w_) && (mt_dst->h_ == mt_src->h_) &&
1309 (mt_dst->d_ == mt_src->d_));
1310 BLI_assert(mt_dst->format_ == mt_src->format_);
1311 BLI_assert(mt_dst->type_ == mt_src->type_);
1312
1313 UNUSED_VARS_NDEBUG(mt_src);
1314
1315 /* Fetch active context. */
1316 MTLContext *ctx = MTLContext::get();
1317 BLI_assert(ctx);
1318
1319 /* Ensure texture is baked. */
1320 this->ensure_baked();
1321
1322 @autoreleasepool {
1323 /* Setup blit encoder. */
1324 id<MTLBlitCommandEncoder> blit_encoder = ctx->main_command_buffer.ensure_begin_blit_encoder();
1325 BLI_assert(blit_encoder != nil);
1326
1327 /* TODO(Metal): Consider supporting multiple mip levels IF the GL implementation
1328 * follows, currently it does not. */
1329 int mip = 0;
1330
1331 /* NOTE: mip_size_get() won't override any dimension that is equal to 0. */
1332 int extent[3] = {1, 1, 1};
1333 this->mip_size_get(mip, extent);
1334
1335 switch (mt_dst->type_) {
1338 case GPU_TEXTURE_3D: {
1339 /* Do full texture copy for 3D textures */
1340 BLI_assert(mt_dst->d_ == d_);
1341 [blit_encoder copyFromTexture:this->get_metal_handle_base()
1342 toTexture:mt_dst->get_metal_handle_base()];
1343 [blit_encoder optimizeContentsForGPUAccess:mt_dst->get_metal_handle_base()];
1344 } break;
1345 default: {
1346 int slice = 0;
1347 this->blit(blit_encoder,
1348 0,
1349 0,
1350 0,
1351 slice,
1352 mip,
1353 mt_dst,
1354 0,
1355 0,
1356 0,
1357 slice,
1358 mip,
1359 extent[0],
1360 extent[1],
1361 extent[2]);
1362 } break;
1363 }
1364 }
1365}
1366
1367void gpu::MTLTexture::clear(eGPUDataFormat data_format, const void *data)
1368{
1369 /* Ensure texture is baked. */
1370 this->ensure_baked();
1371
1372 /* If render-pass clear is not supported, use compute-based clear. */
1373 bool do_render_pass_clear = true;
1375 do_render_pass_clear = false;
1376 }
1377 /* If texture is buffer-backed, clear directly on buffer.
1378 * NOTE: This us currently only true for fallback atomic textures. */
1379 if (backing_buffer_ != nullptr) {
1380 uint channel_len = to_component_len(format_);
1381 uint channel_size = to_bytesize(data_format);
1382 bool fast_buf_clear = true;
1383 const uchar *val = reinterpret_cast<const uchar *>(data);
1384 for (int i = 1; i < channel_size * channel_len; i++) {
1385 fast_buf_clear = fast_buf_clear && (val[i] == val[0]);
1386 }
1387 if (fast_buf_clear) {
1388 /* Fetch active context. */
1389 MTLContext *ctx = MTLContext::get();
1390 BLI_assert(ctx);
1391
1392 /* Begin compute encoder. */
1393 id<MTLBlitCommandEncoder> blit_encoder =
1395 [blit_encoder fillBuffer:backing_buffer_->get_metal_buffer()
1396 range:NSMakeRange(0, backing_buffer_->get_size())
1397 value:val[0]];
1398 }
1399 else {
1400 BLI_assert_msg(false,
1401 "Non-repeating-byte-pattern clear for buffer-backed textures not supported!");
1402 }
1403 return;
1404 }
1405
1406 if (do_render_pass_clear) {
1407 /* Create clear frame-buffer for fast clear. */
1409 FrameBuffer *fb = this->get_blit_framebuffer(-1, 0);
1410 fb->bind(true);
1411 fb->clear_attachment(this->attachment_type(0), data_format, data);
1412 GPU_framebuffer_bind(prev_fb);
1413 }
1414 else {
1416 /* Prepare specialization struct (For texture clear routine). */
1417 int num_channels = to_component_len(format_);
1418 TextureUpdateRoutineSpecialisation compute_specialization_kernel = {
1419 tex_data_format_to_msl_type_str(data_format), /* INPUT DATA FORMAT */
1420 tex_data_format_to_msl_texture_template_type(data_format), /* TEXTURE DATA FORMAT */
1421 num_channels,
1422 num_channels,
1423 true /* Operation is a clear. */
1424 };
1425
1426 /* Determine size of source data clear. */
1427 uint clear_data_size = to_bytesize(format_, data_format);
1428
1429 /* Fetch active context. */
1430 MTLContext *ctx = MTLContext::get();
1431 BLI_assert(ctx);
1432
1433 /* Determine writeable texture handle. */
1434 id<MTLTexture> texture_handle = texture_;
1435
1436 /* Begin compute encoder. */
1437 id<MTLComputeCommandEncoder> compute_encoder =
1439
1440 /* Perform clear operation based on texture type. */
1441 switch (type_) {
1442 case GPU_TEXTURE_1D: {
1443 id<MTLComputePipelineState> pso = texture_update_1d_get_kernel(
1444 compute_specialization_kernel);
1445 TextureUpdateParams params = {0,
1446 {w_, 1, 1},
1447 {0, 0, 0},
1448 ((ctx->pipeline_state.unpack_row_length == 0) ?
1449 w_ :
1451
1452 /* Bind resources via compute state for optimal state caching performance. */
1454 cs.bind_pso(pso);
1455 cs.bind_compute_bytes(&params, sizeof(params), 0);
1456 cs.bind_compute_bytes(data, clear_data_size, 1);
1457 cs.bind_compute_texture(texture_handle, 0);
1458 [compute_encoder dispatchThreads:MTLSizeMake(w_, 1, 1) /* Width, Height, Layer */
1459 threadsPerThreadgroup:MTLSizeMake(64, 1, 1)];
1460 } break;
1461 case GPU_TEXTURE_1D_ARRAY: {
1462 id<MTLComputePipelineState> pso = texture_update_1d_array_get_kernel(
1463 compute_specialization_kernel);
1464 TextureUpdateParams params = {0,
1465 {w_, h_, 1},
1466 {0, 0, 0},
1467 ((ctx->pipeline_state.unpack_row_length == 0) ?
1468 w_ :
1470
1471 /* Bind resources via compute state for optimal state caching performance. */
1473 cs.bind_pso(pso);
1474 cs.bind_compute_bytes(&params, sizeof(params), 0);
1475 cs.bind_compute_bytes(data, clear_data_size, 1);
1476 cs.bind_compute_texture(texture_handle, 0);
1477 [compute_encoder dispatchThreads:MTLSizeMake(w_, h_, 1) /* Width, layers, nil */
1478 threadsPerThreadgroup:MTLSizeMake(8, 8, 1)];
1479 } break;
1480 default: {
1482 "gpu::MTLTexture::clear requires compute pass for texture"
1483 "type: %d, but this is not yet supported",
1484 (int)type_);
1485 } break;
1486 }
1487
1488 /* Textures which use MTLStorageModeManaged need to have updated contents
1489 * synced back to CPU to avoid an automatic flush overwriting contents. */
1490 id<MTLBlitCommandEncoder> blit_encoder = ctx->main_command_buffer.ensure_begin_blit_encoder();
1491 if (texture_.storageMode == MTLStorageModeManaged) {
1492 [blit_encoder synchronizeResource:texture_];
1493 }
1494 [blit_encoder optimizeContentsForGPUAccess:texture_];
1495 }
1496}
1497static MTLTextureSwizzle swizzle_to_mtl(const char swizzle)
1498{
1499 switch (swizzle) {
1500 default:
1501 case 'x':
1502 case 'r':
1503 return MTLTextureSwizzleRed;
1504 case 'y':
1505 case 'g':
1506 return MTLTextureSwizzleGreen;
1507 case 'z':
1508 case 'b':
1509 return MTLTextureSwizzleBlue;
1510 case 'w':
1511 case 'a':
1512 return MTLTextureSwizzleAlpha;
1513 case '0':
1514 return MTLTextureSwizzleZero;
1515 case '1':
1516 return MTLTextureSwizzleOne;
1517 }
1518}
1519
1520void gpu::MTLTexture::swizzle_set(const char swizzle_mask[4])
1521{
1522 if (memcmp(tex_swizzle_mask_, swizzle_mask, 4) != 0) {
1523 memcpy(tex_swizzle_mask_, swizzle_mask, 4);
1524
1525 /* Creating the swizzle mask and flagging as dirty if changed. */
1526 MTLTextureSwizzleChannels new_swizzle_mask = MTLTextureSwizzleChannelsMake(
1527 swizzle_to_mtl(swizzle_mask[0]),
1528 swizzle_to_mtl(swizzle_mask[1]),
1529 swizzle_to_mtl(swizzle_mask[2]),
1530 swizzle_to_mtl(swizzle_mask[3]));
1531
1532 mtl_swizzle_mask_ = new_swizzle_mask;
1533 texture_view_dirty_flags_ |= TEXTURE_VIEW_SWIZZLE_DIRTY;
1534 }
1535}
1536
1538{
1540
1541 /* NOTE:
1542 * - mip_min_ and mip_max_ are used to Clamp LODs during sampling.
1543 * - For the time being, we are going to just need to generate a FULL mipmap chain
1544 * as we do not know ahead of time whether mipmaps will be used.
1545 *
1546 * TODO(Metal): Add texture initialization flag to determine whether mipmaps are used
1547 * or not. Will be important for saving memory for big textures. */
1548 mip_min_ = min;
1549 mip_max_ = max;
1550
1552 max > 1)
1553 {
1554
1556 " MTLTexture of type TEXTURE_1D_ARRAY or TEXTURE_BUFFER cannot have a mipcount "
1557 "greater than 1\n");
1558 mip_min_ = 0;
1559 mip_max_ = 0;
1560 mipmaps_ = 0;
1561 BLI_assert(false);
1562 }
1563
1564 /* Mip range for texture view. */
1565 mip_texture_base_level_ = mip_min_;
1566 mip_texture_max_level_ = mip_max_;
1567 texture_view_dirty_flags_ |= TEXTURE_VIEW_MIP_DIRTY;
1568}
1569
1571{
1572 /* Prepare Array for return data. */
1574 BLI_assert(mip <= mipmaps_);
1576
1577 /* NOTE: mip_size_get() won't override any dimension that is equal to 0. */
1578 int extent[3] = {1, 1, 1};
1579 this->mip_size_get(mip, extent);
1580
1581 size_t sample_len = extent[0] * max_ii(extent[1], 1) * max_ii(extent[2], 1);
1582 size_t sample_size = to_bytesize(format_, type);
1583 size_t texture_size = sample_len * sample_size;
1584 int num_channels = to_component_len(format_);
1585
1586 void *data = MEM_mallocN(texture_size + 8, "GPU_texture_read");
1587
1588 /* Ensure texture is baked. */
1589 if (is_baked_) {
1590 this->read_internal(
1591 mip, 0, 0, 0, extent[0], extent[1], extent[2], type, num_channels, texture_size + 8, data);
1592 }
1593 else {
1594 /* Clear return values? */
1595 MTL_LOG_WARNING("MTLTexture::read - reading from texture with no image data");
1596 }
1597
1598 return data;
1599}
1600
1601/* Fetch the raw buffer data from a texture and copy to CPU host ptr. */
1602void gpu::MTLTexture::read_internal(int mip,
1603 int x_off,
1604 int y_off,
1605 int z_off,
1606 int width,
1607 int height,
1608 int depth,
1609 eGPUDataFormat desired_output_format,
1610 int num_output_components,
1611 size_t debug_data_size,
1612 void *r_data)
1613{
1614 /* Verify textures are baked. */
1615 if (!is_baked_) {
1616 MTL_LOG_WARNING("gpu::MTLTexture::read_internal - Trying to read from a non-baked texture!");
1617 return;
1618 }
1619 /* Fetch active context. */
1620 MTLContext *ctx = MTLContext::get();
1621 BLI_assert(ctx);
1622
1623 /* Calculate Desired output size. */
1624 int num_channels = to_component_len(format_);
1625 BLI_assert(num_output_components <= num_channels);
1626 size_t desired_output_bpp = num_output_components * to_bytesize(desired_output_format);
1627
1628 /* Calculate Metal data output for trivial copy. */
1629 size_t image_bpp = get_mtl_format_bytesize(texture_.pixelFormat);
1630 uint image_components = get_mtl_format_num_components(texture_.pixelFormat);
1631 bool is_depth_format = (format_flag_ & GPU_FORMAT_DEPTH);
1632
1633 /* Verify if we need to use compute read. */
1634 eGPUDataFormat data_format = to_texture_data_format(this->format_get());
1635 bool format_conversion_needed = (data_format != desired_output_format);
1636 bool can_use_simple_read = (desired_output_bpp == image_bpp) && (!format_conversion_needed) &&
1637 (num_output_components == image_components);
1638
1639 /* Depth must be read using the compute shader -- Some safety checks to verify that params are
1640 * correct. */
1641 if (is_depth_format) {
1642 can_use_simple_read = false;
1643 /* TODO(Metal): Stencil data write not yet supported, so force components to one. */
1644 image_components = 1;
1645 BLI_assert(num_output_components == 1);
1646 BLI_assert(image_components == 1);
1647 BLI_assert(data_format == GPU_DATA_FLOAT || data_format == GPU_DATA_UINT_24_8_DEPRECATED);
1648 BLI_assert(validate_data_format(format_, data_format));
1649 }
1650
1651 /* SPECIAL Workaround for R11G11B10, TextureFormat::UNORM_10_10_10_2,
1652 * TextureFormat::UINT_10_10_10_2 textures requesting a read using:
1653 * GPU_DATA_10_11_11_REV. */
1654 if (desired_output_format == GPU_DATA_10_11_11_REV ||
1655 desired_output_format == GPU_DATA_2_10_10_10_REV)
1656 {
1657 BLI_assert(format_ == TextureFormat::UFLOAT_11_11_10 ||
1658 format_ == TextureFormat::UNORM_10_10_10_2 ||
1659 format_ == TextureFormat::UINT_10_10_10_2);
1660
1661 /* override parameters - we'll be able to use simple copy, as bpp will match at 4 bytes. */
1662 image_bpp = sizeof(int);
1663 image_components = 1;
1664 desired_output_bpp = sizeof(int);
1665 num_output_components = 1;
1666
1667 data_format = GPU_DATA_INT;
1668 format_conversion_needed = false;
1669 can_use_simple_read = true;
1670 }
1671
1672 /* Determine size of output data. */
1673 size_t bytes_per_row = desired_output_bpp * width;
1674 size_t bytes_per_image = bytes_per_row * height;
1675 size_t total_bytes = bytes_per_image * max_ii(depth, 1);
1676
1677 if (can_use_simple_read) {
1678 /* DEBUG check that if direct copy is being used, then both the expected output size matches
1679 * the METAL texture size. */
1680 BLI_assert(
1681 ((num_output_components * to_bytesize(desired_output_format)) == desired_output_bpp) &&
1682 (desired_output_bpp == image_bpp));
1683 }
1684 /* DEBUG check that the allocated data size matches the bytes we expect. */
1685 BLI_assert(total_bytes <= debug_data_size);
1686 UNUSED_VARS_NDEBUG(debug_data_size);
1687
1688 /* Fetch allocation from scratch buffer. */
1689 gpu::MTLBuffer *dest_buf = MTLContext::get_global_memory_manager()->allocate_aligned(
1690 total_bytes, 256, true);
1691 BLI_assert(dest_buf != nullptr);
1692
1693 id<MTLBuffer> destination_buffer = dest_buf->get_metal_buffer();
1694 BLI_assert(destination_buffer != nil);
1695 void *destination_buffer_host_ptr = dest_buf->get_host_ptr();
1696 BLI_assert(destination_buffer_host_ptr != nullptr);
1697
1698 /* Prepare specialization struct (For non-trivial texture read routine). */
1699 int depth_format_mode = 0;
1700 if (is_depth_format) {
1701 depth_format_mode = 1;
1702 switch (desired_output_format) {
1703 case GPU_DATA_FLOAT:
1704 depth_format_mode = 1;
1705 break;
1707 depth_format_mode = 2;
1708 break;
1709 case GPU_DATA_UINT:
1710 depth_format_mode = 4;
1711 break;
1712 default:
1713 BLI_assert_msg(false, "Unhandled depth read format case");
1714 break;
1715 }
1716 }
1717
1718 TextureReadRoutineSpecialisation compute_specialization_kernel = {
1719 tex_data_format_to_msl_texture_template_type(data_format), /* TEXTURE DATA TYPE */
1720 tex_data_format_to_msl_type_str(desired_output_format), /* OUTPUT DATA TYPE */
1721 num_channels, /* TEXTURE COMPONENT COUNT */
1722 num_output_components, /* OUTPUT DATA COMPONENT COUNT */
1723 depth_format_mode};
1724
1725 bool copy_successful = false;
1726 @autoreleasepool {
1727
1728 /* TODO(Metal): Verify whether we need some form of barrier here to ensure reads
1729 * happen after work with associated texture is finished. */
1730 GPU_finish();
1731
1733 id<MTLTexture> read_texture = texture_;
1734 /* Use texture-view handle if reading from a GPU texture view. */
1735 if (resource_mode_ == MTL_TEXTURE_MODE_TEXTURE_VIEW) {
1736 read_texture = this->get_metal_handle();
1737 }
1738 /* Create Texture View for SRGB special case to bypass internal type conversion. */
1739 if (format_ == TextureFormat::SRGBA_8_8_8_8) {
1740 BLI_assert(internal_gpu_image_usage_flags_ & GPU_TEXTURE_USAGE_FORMAT_VIEW);
1741 read_texture = [read_texture newTextureViewWithPixelFormat:MTLPixelFormatRGBA8Unorm];
1742 }
1743
1744 /* Perform per-texture type read. */
1745 switch (type_) {
1746 case GPU_TEXTURE_1D: {
1747 if (can_use_simple_read) {
1748 /* Use Blit Encoder READ. */
1749 id<MTLBlitCommandEncoder> enc = ctx->main_command_buffer.ensure_begin_blit_encoder();
1750 if (G.debug & G_DEBUG_GPU) {
1751 [enc insertDebugSignpost:@"GPUTextureRead1D"];
1752 }
1753 [enc copyFromTexture:read_texture
1754 sourceSlice:0
1755 sourceLevel:mip
1756 sourceOrigin:MTLOriginMake(x_off, 0, 0)
1757 sourceSize:MTLSizeMake(width, 1, 1)
1758 toBuffer:destination_buffer
1759 destinationOffset:0
1760 destinationBytesPerRow:bytes_per_row
1761 destinationBytesPerImage:bytes_per_image];
1762 copy_successful = true;
1763 }
1764 else {
1765
1766 /* Use Compute READ. */
1767 id<MTLComputeCommandEncoder> compute_encoder =
1768 ctx->main_command_buffer.ensure_begin_compute_encoder();
1769 id<MTLComputePipelineState> pso = texture_read_1d_get_kernel(
1770 compute_specialization_kernel);
1771 TextureReadParams params = {
1772 mip,
1773 {width, 1, 1},
1774 {x_off, 0, 0},
1775 };
1776
1777 /* Bind resources via compute state for optimal state caching performance. */
1778 MTLComputeState &cs = ctx->main_command_buffer.get_compute_state();
1779 cs.bind_pso(pso);
1780 cs.bind_compute_bytes(&params, sizeof(params), 0);
1781 cs.bind_compute_buffer(destination_buffer, 0, 1);
1782 cs.bind_compute_texture(read_texture, 0);
1783 [compute_encoder dispatchThreads:MTLSizeMake(width, 1, 1) /* Width, Height, Layer */
1784 threadsPerThreadgroup:MTLSizeMake(8, 8, 1)];
1785 copy_successful = true;
1786 }
1787 } break;
1788
1789 case GPU_TEXTURE_1D_ARRAY: {
1790 if (can_use_simple_read) {
1791 /* Use Blit Encoder READ. */
1792 id<MTLBlitCommandEncoder> enc = ctx->main_command_buffer.ensure_begin_blit_encoder();
1793 if (G.debug & G_DEBUG_GPU) {
1794 [enc insertDebugSignpost:@"GPUTextureRead1DArray"];
1795 }
1796
1797 int base_slice = y_off;
1798 int final_slice = base_slice + height;
1799 size_t texture_array_relative_offset = 0;
1800
1801 for (int array_slice = base_slice; array_slice < final_slice; array_slice++) {
1802 [enc copyFromTexture:read_texture
1803 sourceSlice:base_slice
1804 sourceLevel:mip
1805 sourceOrigin:MTLOriginMake(x_off, 0, 0)
1806 sourceSize:MTLSizeMake(width, 1, 1)
1807 toBuffer:destination_buffer
1808 destinationOffset:texture_array_relative_offset
1809 destinationBytesPerRow:bytes_per_row
1810 destinationBytesPerImage:bytes_per_row];
1811 texture_array_relative_offset += bytes_per_row;
1812 }
1813 copy_successful = true;
1814 }
1815 else {
1816 /* Use Compute READ. */
1817 id<MTLComputeCommandEncoder> compute_encoder =
1818 ctx->main_command_buffer.ensure_begin_compute_encoder();
1819 id<MTLComputePipelineState> pso = texture_read_1d_array_get_kernel(
1820 compute_specialization_kernel);
1821 TextureReadParams params = {
1822 mip,
1823 {width, height, 1},
1824 {x_off, y_off, 0},
1825 };
1826
1827 /* Bind resources via compute state for optimal state caching performance. */
1828 MTLComputeState &cs = ctx->main_command_buffer.get_compute_state();
1829 cs.bind_pso(pso);
1830 cs.bind_compute_bytes(&params, sizeof(params), 0);
1831 cs.bind_compute_buffer(destination_buffer, 0, 1);
1832 cs.bind_compute_texture(read_texture, 0);
1833 [compute_encoder dispatchThreads:MTLSizeMake(width, height, 1) /* Width, Height, Layer */
1834 threadsPerThreadgroup:MTLSizeMake(8, 8, 1)];
1835 copy_successful = true;
1836 }
1837 } break;
1838
1839 case GPU_TEXTURE_2D: {
1840 if (can_use_simple_read) {
1841 /* Use Blit Encoder READ. */
1842 id<MTLBlitCommandEncoder> enc = ctx->main_command_buffer.ensure_begin_blit_encoder();
1843 if (G.debug & G_DEBUG_GPU) {
1844 [enc insertDebugSignpost:@"GPUTextureRead2D"];
1845 }
1846 [enc copyFromTexture:read_texture
1847 sourceSlice:0
1848 sourceLevel:mip
1849 sourceOrigin:MTLOriginMake(x_off, y_off, 0)
1850 sourceSize:MTLSizeMake(width, height, 1)
1851 toBuffer:destination_buffer
1852 destinationOffset:0
1853 destinationBytesPerRow:bytes_per_row
1854 destinationBytesPerImage:bytes_per_image];
1855 copy_successful = true;
1856 }
1857 else {
1858
1859 /* Use Compute READ. */
1860 id<MTLComputeCommandEncoder> compute_encoder =
1861 ctx->main_command_buffer.ensure_begin_compute_encoder();
1862 id<MTLComputePipelineState> pso = texture_read_2d_get_kernel(
1863 compute_specialization_kernel);
1864 TextureReadParams params = {
1865 mip,
1866 {width, height, 1},
1867 {x_off, y_off, 0},
1868 };
1869
1870 /* Bind resources via compute state for optimal state caching performance. */
1871 MTLComputeState &cs = ctx->main_command_buffer.get_compute_state();
1872 cs.bind_pso(pso);
1873 cs.bind_compute_bytes(&params, sizeof(params), 0);
1874 cs.bind_compute_buffer(destination_buffer, 0, 1);
1875 cs.bind_compute_texture(read_texture, 0);
1876 [compute_encoder dispatchThreads:MTLSizeMake(width, height, 1) /* Width, Height, Layer */
1877 threadsPerThreadgroup:MTLSizeMake(8, 8, 1)];
1878 copy_successful = true;
1879 }
1880 } break;
1881
1882 case GPU_TEXTURE_2D_ARRAY: {
1883 if (can_use_simple_read) {
1884 /* Use Blit Encoder READ. */
1885 id<MTLBlitCommandEncoder> enc = ctx->main_command_buffer.ensure_begin_blit_encoder();
1886 if (G.debug & G_DEBUG_GPU) {
1887 [enc insertDebugSignpost:@"GPUTextureRead2DArray"];
1888 }
1889 int base_slice = z_off;
1890 int final_slice = base_slice + depth;
1891 size_t texture_array_relative_offset = 0;
1892
1893 for (int array_slice = base_slice; array_slice < final_slice; array_slice++) {
1894 [enc copyFromTexture:read_texture
1895 sourceSlice:array_slice
1896 sourceLevel:mip
1897 sourceOrigin:MTLOriginMake(x_off, y_off, 0)
1898 sourceSize:MTLSizeMake(width, height, 1)
1899 toBuffer:destination_buffer
1900 destinationOffset:texture_array_relative_offset
1901 destinationBytesPerRow:bytes_per_row
1902 destinationBytesPerImage:bytes_per_image];
1903 texture_array_relative_offset += bytes_per_image;
1904 }
1905 copy_successful = true;
1906 }
1907 else {
1908
1909 /* Use Compute READ */
1910 id<MTLComputeCommandEncoder> compute_encoder =
1911 ctx->main_command_buffer.ensure_begin_compute_encoder();
1912 id<MTLComputePipelineState> pso = texture_read_2d_array_get_kernel(
1913 compute_specialization_kernel);
1914 TextureReadParams params = {
1915 mip,
1916 {width, height, depth},
1917 {x_off, y_off, z_off},
1918 };
1919
1920 /* Bind resources via compute state for optimal state caching performance. */
1921 MTLComputeState &cs = ctx->main_command_buffer.get_compute_state();
1922 cs.bind_pso(pso);
1923 cs.bind_compute_bytes(&params, sizeof(params), 0);
1924 cs.bind_compute_buffer(destination_buffer, 0, 1);
1925 cs.bind_compute_texture(read_texture, 0);
1926 [compute_encoder
1927 dispatchThreads:MTLSizeMake(width, height, depth) /* Width, Height, Layer */
1928 threadsPerThreadgroup:MTLSizeMake(8, 8, 1)];
1929 copy_successful = true;
1930 }
1931 } break;
1932
1933 case GPU_TEXTURE_3D: {
1934 if (can_use_simple_read) {
1935 /* Use Blit Encoder READ. */
1936 id<MTLBlitCommandEncoder> enc = ctx->main_command_buffer.ensure_begin_blit_encoder();
1937 if (G.debug & G_DEBUG_GPU) {
1938 [enc insertDebugSignpost:@"GPUTextureRead3D"];
1939 }
1940 [enc copyFromTexture:read_texture
1941 sourceSlice:0
1942 sourceLevel:mip
1943 sourceOrigin:MTLOriginMake(x_off, y_off, z_off)
1944 sourceSize:MTLSizeMake(width, height, depth)
1945 toBuffer:destination_buffer
1946 destinationOffset:0
1947 destinationBytesPerRow:bytes_per_row
1948 destinationBytesPerImage:bytes_per_image];
1949 copy_successful = true;
1950 }
1951 else {
1952
1953 /* Use Compute READ. */
1954 id<MTLComputeCommandEncoder> compute_encoder =
1955 ctx->main_command_buffer.ensure_begin_compute_encoder();
1956 id<MTLComputePipelineState> pso = texture_read_3d_get_kernel(
1957 compute_specialization_kernel);
1958 TextureReadParams params = {
1959 mip,
1960 {width, height, depth},
1961 {x_off, y_off, z_off},
1962 };
1963
1964 /* Bind resources via compute state for optimal state caching performance. */
1965 MTLComputeState &cs = ctx->main_command_buffer.get_compute_state();
1966 cs.bind_pso(pso);
1967 cs.bind_compute_bytes(&params, sizeof(params), 0);
1968 cs.bind_compute_buffer(destination_buffer, 0, 1);
1969 cs.bind_compute_texture(read_texture, 0);
1970 [compute_encoder
1971 dispatchThreads:MTLSizeMake(width, height, depth) /* Width, Height, Layer */
1972 threadsPerThreadgroup:MTLSizeMake(4, 4, 4)];
1973 copy_successful = true;
1974 }
1975 } break;
1976
1977 case GPU_TEXTURE_CUBE:
1979 BLI_assert_msg(z_off == 0 || type_ == GPU_TEXTURE_CUBE_ARRAY,
1980 "z_off > 0 is only supported by TEXTURE CUBE ARRAY reads.");
1981 BLI_assert_msg(depth <= 6 || type_ == GPU_TEXTURE_CUBE_ARRAY,
1982 "depth > 6 is only supported by TEXTURE CUBE ARRAY reads. ");
1983 if (can_use_simple_read) {
1984 id<MTLBlitCommandEncoder> enc = ctx->main_command_buffer.ensure_begin_blit_encoder();
1985 if (G.debug & G_DEBUG_GPU) {
1986 [enc insertDebugSignpost:@"GPUTextureReadCubeArray"];
1987 }
1988
1989 /* NOTE: Depth should have a minimum value of 1 as we read at least one slice. */
1990 int base_slice = z_off;
1991 int final_slice = base_slice + depth;
1992 size_t texture_array_relative_offset = 0;
1993
1994 for (int array_slice = base_slice; array_slice < final_slice; array_slice++) {
1995 [enc copyFromTexture:read_texture
1996 sourceSlice:array_slice
1997 sourceLevel:mip
1998 sourceOrigin:MTLOriginMake(x_off, y_off, 0)
1999 sourceSize:MTLSizeMake(width, height, 1)
2000 toBuffer:destination_buffer
2001 destinationOffset:texture_array_relative_offset
2002 destinationBytesPerRow:bytes_per_row
2003 destinationBytesPerImage:bytes_per_image];
2004
2005 texture_array_relative_offset += bytes_per_image;
2006 }
2007 MTL_LOG_DEBUG("Copying texture data to buffer GPU_TEXTURE_CUBE_ARRAY");
2008 copy_successful = true;
2009 }
2010 else {
2011 MTL_LOG_ERROR("TODO(Metal): unsupported compute copy of texture cube array");
2012 }
2013 } break;
2014
2015 default:
2017 "gpu::MTLTexture::read_internal simple-copy not yet supported for texture "
2018 "type: %d",
2019 (int)type_);
2020 break;
2021 }
2022
2023 if (copy_successful) {
2024
2025 /* Use Blit encoder to synchronize results back to CPU. */
2026 if (dest_buf->get_resource_options() == MTLResourceStorageModeManaged) {
2027 id<MTLBlitCommandEncoder> enc = ctx->main_command_buffer.ensure_begin_blit_encoder();
2028 if (G.debug & G_DEBUG_GPU) {
2029 [enc insertDebugSignpost:@"GPUTextureRead-syncResource"];
2030 }
2031 [enc synchronizeResource:destination_buffer];
2032 }
2033
2034 /* Ensure GPU copy commands have completed. */
2035 GPU_finish();
2036
2037 /* Copy data from Shared Memory into ptr. */
2038 memcpy(r_data, destination_buffer_host_ptr, total_bytes);
2039 MTL_LOG_DEBUG("gpu::MTLTexture::read_internal success! %lu bytes read", total_bytes);
2040 }
2041 else {
2043 "gpu::MTLTexture::read_internal not yet supported for this config -- data "
2044 "format different (src %lu bytes, dst %lu bytes) (src format: %d, dst format: %d), or "
2045 "varying component counts (src %d, dst %d)",
2046 image_bpp,
2047 desired_output_bpp,
2048 (int)data_format,
2049 (int)desired_output_format,
2050 image_components,
2051 num_output_components);
2052 }
2053
2054 /* Release destination buffer. */
2055 dest_buf->free();
2056 }
2057}
2058
2060{
2061 this->prepare_internal();
2062 /* TODO(jbakker): Other limit checks should be added as well. When a texture violates a limit it
2063 * is not backed by a texture and will crash when used. */
2064 const int limit = GPU_max_texture_3d_size();
2065 if ((type_ == GPU_TEXTURE_3D) && (w_ > limit || h_ > limit || d_ > limit)) {
2066 return false;
2067 }
2068 return true;
2069}
2070
2072{
2073 MTLPixelFormat mtl_format = gpu_texture_format_to_metal(this->format_);
2074 mtl_max_mips_ = 1;
2075 mipmaps_ = 0;
2076 this->mip_range_set(0, 0);
2077
2078 /* Create texture from VertBuf's buffer. */
2079 MTLVertBuf *mtl_vbo = static_cast<MTLVertBuf *>(vbo);
2080 mtl_vbo->bind();
2081 mtl_vbo->flag_used();
2082
2083 /* Get Metal Buffer. */
2084 id<MTLBuffer> source_buffer = mtl_vbo->get_metal_buffer();
2085 BLI_assert(source_buffer);
2086
2087 /* Verify size. */
2088 if (w_ <= 0) {
2089 MTL_LOG_WARNING("Allocating texture buffer of width 0!");
2090 w_ = 1;
2091 }
2092
2093 /* Verify Texture and vertex buffer alignment. */
2095 size_t bytes_per_pixel = get_mtl_format_bytesize(mtl_format);
2096 size_t bytes_per_row = bytes_per_pixel * w_;
2097
2098 MTLContext *mtl_ctx = MTLContext::get();
2099 uint32_t align_requirement = uint32_t(
2100 [mtl_ctx->device minimumLinearTextureAlignmentForPixelFormat:mtl_format]);
2101
2102 /* If stride is larger than bytes per pixel, but format has multiple attributes,
2103 * split attributes across several pixels. */
2104 if (format->stride > bytes_per_pixel && format->attr_len > 1) {
2105
2106 /* We need to increase the number of pixels available to store additional attributes.
2107 * First ensure that the total stride of the vertex format fits uniformly into
2108 * multiple pixels. If these sizes are different, then attributes are of differing
2109 * sizes and this operation is unsupported. */
2110 if (bytes_per_pixel * format->attr_len != format->stride) {
2111 BLI_assert_msg(false,
2112 "Cannot split attributes across multiple pixels as attribute format sizes do "
2113 "not match.");
2114 return false;
2115 }
2116
2117 /* Provide a single pixel per attribute. */
2118 /* Increase bytes per row to ensure there are enough bytes for all vertex attribute data. */
2119 bytes_per_row *= format->attr_len;
2120 BLI_assert(bytes_per_row == format->stride * w_);
2121
2122 /* Multiply width of image to provide one attribute per pixel. */
2123 w_ *= format->attr_len;
2124 BLI_assert(bytes_per_row == bytes_per_pixel * w_);
2125 BLI_assert_msg(w_ == mtl_vbo->vertex_len * format->attr_len,
2126 "Image should contain one pixel for each attribute in every vertex.");
2127 }
2128 else {
2129 /* Verify per-vertex size aligns with texture size. */
2130 BLI_assert(bytes_per_pixel == format->stride &&
2131 "Pixel format stride MUST match the texture format stride -- These being different "
2132 "is likely caused by Metal's VBO padding to a minimum of 4-bytes per-vertex."
2133 " If multiple attributes are used. Each attribute is to be packed into its own "
2134 "individual pixel when stride length is exceeded. ");
2135 }
2136
2137 /* Create texture descriptor. */
2139 texture_descriptor_ = [[MTLTextureDescriptor alloc] init];
2140 texture_descriptor_.pixelFormat = mtl_format;
2141 texture_descriptor_.textureType = MTLTextureTypeTextureBuffer;
2142 texture_descriptor_.width = w_;
2143 texture_descriptor_.height = 1;
2144 texture_descriptor_.depth = 1;
2145 texture_descriptor_.arrayLength = 1;
2146 texture_descriptor_.mipmapLevelCount = mtl_max_mips_;
2147 texture_descriptor_.usage =
2148 MTLTextureUsageShaderRead | MTLTextureUsageShaderWrite |
2149 MTLTextureUsagePixelFormatView; /* TODO(Metal): Optimize usage flags. */
2150 texture_descriptor_.storageMode = [source_buffer storageMode];
2151 texture_descriptor_.sampleCount = 1;
2152 texture_descriptor_.cpuCacheMode = [source_buffer cpuCacheMode];
2153 texture_descriptor_.hazardTrackingMode = [source_buffer hazardTrackingMode];
2154
2155 texture_ = [source_buffer
2156 newTextureWithDescriptor:texture_descriptor_
2157 offset:0
2158 bytesPerRow:ceil_to_multiple_ul(bytes_per_row, align_requirement)];
2159 aligned_w_ = bytes_per_row / bytes_per_pixel;
2160
2161 BLI_assert(texture_);
2162 texture_.label = [NSString stringWithUTF8String:this->get_name()];
2163 is_baked_ = true;
2164 is_dirty_ = false;
2165 resource_mode_ = MTL_TEXTURE_MODE_VBO;
2166
2167 /* Track Status. */
2168 vert_buffer_ = mtl_vbo;
2169 vert_buffer_mtl_ = source_buffer;
2170
2171 return true;
2172}
2173
2175 int mip_offset,
2176 int layer_offset,
2177 bool use_stencil)
2178{
2179 BLI_assert(src);
2180
2181 /* Zero initialize. */
2182 this->prepare_internal();
2183
2184 /* Flag as using texture view. */
2185 resource_mode_ = MTL_TEXTURE_MODE_TEXTURE_VIEW;
2186 source_texture_ = src;
2187 mip_texture_base_level_ = mip_offset;
2188 mip_texture_base_layer_ = layer_offset;
2189 texture_view_dirty_flags_ |= TEXTURE_VIEW_MIP_DIRTY;
2190
2191 /* Assign usage. */
2192 internal_gpu_image_usage_flags_ = GPU_texture_usage(src);
2193
2194 /* Assign texture as view. */
2195 gpu::MTLTexture *mtltex = static_cast<gpu::MTLTexture *>(src);
2196 mtltex->ensure_baked();
2197 texture_ = mtltex->texture_;
2198 BLI_assert(texture_);
2199 [texture_ retain];
2200
2201 /* Flag texture as baked -- we do not need explicit initialization. */
2202 is_baked_ = true;
2203 is_dirty_ = false;
2204
2205 /* Stencil view support. */
2206 texture_view_stencil_ = false;
2207 if (use_stencil) {
2208 BLI_assert(ELEM(format_, TextureFormat::SFLOAT_32_DEPTH_UINT_8));
2209 texture_view_stencil_ = true;
2210 }
2211
2212 /* Bake mip swizzle view. */
2213 bake_mip_swizzle_view();
2214 return true;
2215}
2216
2218
2219/* -------------------------------------------------------------------- */
2222
2224{
2225 return is_baked_;
2226}
2227
2228/* Prepare texture parameters after initialization, but before baking. */
2229void gpu::MTLTexture::prepare_internal()
2230{
2231 /* Take a copy of the flags so that any modifications we make won't effect the texture
2232 * cache/pool match finding test. */
2233 internal_gpu_image_usage_flags_ = gpu_image_usage_flags_;
2234
2235 /* Metal: Texture clearing is done using frame-buffer clear. This has no performance impact or
2236 * bandwidth implications for lossless compression and is considered best-practice.
2237 *
2238 * Attachment usage also required for depth-stencil attachment targets, for depth-update support.
2239 * NOTE: Emulated atomic textures cannot support render-target usage. For clearing, the backing
2240 * buffer is cleared instead.
2241 */
2242 if (!((internal_gpu_image_usage_flags_ & GPU_TEXTURE_USAGE_ATOMIC) &&
2243 !MTLBackend::get_capabilities().supports_texture_atomics))
2244 {
2245 /* Force attachment usage - see comment above. */
2246 internal_gpu_image_usage_flags_ |= GPU_TEXTURE_USAGE_ATTACHMENT;
2247 }
2248
2249 /* Derive maximum number of mip levels by default.
2250 * TODO(Metal): This can be removed if max mip counts are specified upfront. */
2251 if (type_ == GPU_TEXTURE_1D || type_ == GPU_TEXTURE_1D_ARRAY || type_ == GPU_TEXTURE_BUFFER) {
2252 mtl_max_mips_ = 1;
2253 }
2254 else {
2255 /* Require correct explicit mipmap level counts. */
2256 mtl_max_mips_ = mipmaps_;
2257 }
2258}
2259
2260void gpu::MTLTexture::ensure_baked()
2261{
2262
2263 /* If properties have changed, re-bake. */
2264 id<MTLTexture> previous_texture = nil;
2265 bool copy_previous_contents = false;
2266
2267 if (is_baked_ && is_dirty_) {
2268 copy_previous_contents = true;
2269 previous_texture = texture_;
2270 [previous_texture retain];
2271 this->reset();
2272 }
2273
2274 if (!is_baked_) {
2275 MTLContext *ctx = MTLContext::get();
2276 BLI_assert(ctx);
2277
2278 /* Ensure texture mode is valid. */
2279 BLI_assert(resource_mode_ != MTL_TEXTURE_MODE_EXTERNAL);
2280 BLI_assert(resource_mode_ != MTL_TEXTURE_MODE_TEXTURE_VIEW);
2281 BLI_assert(resource_mode_ != MTL_TEXTURE_MODE_VBO);
2282
2283 /* Format and mip levels (TODO(Metal): Optimize mipmaps counts, specify up-front). */
2284 MTLPixelFormat mtl_format = gpu_texture_format_to_metal(format_);
2285
2286 /* SRGB textures require a texture view for reading data and when rendering with SRGB
2287 * disabled. Enabling the texture_view or texture_read usage flags disables lossless
2288 * compression, so the situations in which it is used should be limited. */
2289 if (format_ == TextureFormat::SRGBA_8_8_8_8) {
2290 internal_gpu_image_usage_flags_ |= GPU_TEXTURE_USAGE_FORMAT_VIEW;
2291 }
2292
2293 /* Create texture descriptor. */
2294 switch (type_) {
2295
2296 /* 1D */
2297 case GPU_TEXTURE_1D:
2298 case GPU_TEXTURE_1D_ARRAY: {
2299 BLI_assert(w_ > 0);
2300 texture_descriptor_ = [[MTLTextureDescriptor alloc] init];
2301 texture_descriptor_.pixelFormat = mtl_format;
2302 texture_descriptor_.textureType = (type_ == GPU_TEXTURE_1D_ARRAY) ? MTLTextureType1DArray :
2303 MTLTextureType1D;
2304 texture_descriptor_.width = w_;
2305 texture_descriptor_.height = 1;
2306 texture_descriptor_.depth = 1;
2307 texture_descriptor_.arrayLength = (type_ == GPU_TEXTURE_1D_ARRAY) ? h_ : 1;
2308 texture_descriptor_.mipmapLevelCount = (mtl_max_mips_ > 0) ? mtl_max_mips_ : 1;
2309 texture_descriptor_.usage = mtl_usage_from_gpu(internal_gpu_image_usage_flags_);
2310 texture_descriptor_.storageMode = MTLStorageModePrivate;
2311 texture_descriptor_.sampleCount = 1;
2312 texture_descriptor_.cpuCacheMode = MTLCPUCacheModeDefaultCache;
2313 texture_descriptor_.hazardTrackingMode = MTLHazardTrackingModeDefault;
2314 } break;
2315
2316 /* 2D */
2317 case GPU_TEXTURE_2D:
2318 case GPU_TEXTURE_2D_ARRAY: {
2319 BLI_assert(w_ > 0 && h_ > 0);
2320 texture_descriptor_ = [[MTLTextureDescriptor alloc] init];
2321 texture_descriptor_.pixelFormat = mtl_format;
2322 texture_descriptor_.textureType = (type_ == GPU_TEXTURE_2D_ARRAY) ? MTLTextureType2DArray :
2323 MTLTextureType2D;
2324 texture_descriptor_.width = w_;
2325 texture_descriptor_.height = h_;
2326 texture_descriptor_.depth = 1;
2327 texture_descriptor_.arrayLength = (type_ == GPU_TEXTURE_2D_ARRAY) ? d_ : 1;
2328 texture_descriptor_.mipmapLevelCount = (mtl_max_mips_ > 0) ? mtl_max_mips_ : 1;
2329 texture_descriptor_.usage = mtl_usage_from_gpu(internal_gpu_image_usage_flags_);
2330 texture_descriptor_.storageMode = MTLStorageModePrivate;
2331 texture_descriptor_.sampleCount = 1;
2332 texture_descriptor_.cpuCacheMode = MTLCPUCacheModeDefaultCache;
2333 texture_descriptor_.hazardTrackingMode = MTLHazardTrackingModeDefault;
2334 } break;
2335
2336 /* 3D */
2337 case GPU_TEXTURE_3D: {
2338 BLI_assert(w_ > 0 && h_ > 0 && d_ > 0);
2339 texture_descriptor_ = [[MTLTextureDescriptor alloc] init];
2340 texture_descriptor_.pixelFormat = mtl_format;
2341 texture_descriptor_.textureType = MTLTextureType3D;
2342 texture_descriptor_.width = w_;
2343 texture_descriptor_.height = h_;
2344 texture_descriptor_.depth = d_;
2345 texture_descriptor_.arrayLength = 1;
2346 texture_descriptor_.mipmapLevelCount = (mtl_max_mips_ > 0) ? mtl_max_mips_ : 1;
2347 texture_descriptor_.usage = mtl_usage_from_gpu(internal_gpu_image_usage_flags_);
2348 texture_descriptor_.storageMode = MTLStorageModePrivate;
2349 texture_descriptor_.sampleCount = 1;
2350 texture_descriptor_.cpuCacheMode = MTLCPUCacheModeDefaultCache;
2351 texture_descriptor_.hazardTrackingMode = MTLHazardTrackingModeDefault;
2352 } break;
2353
2354 /* CUBE TEXTURES */
2355 case GPU_TEXTURE_CUBE:
2357 /* NOTE: For a cube-map 'Texture::d_' refers to total number of faces,
2358 * not just array slices. */
2359 BLI_assert(w_ > 0 && h_ > 0);
2360 texture_descriptor_ = [[MTLTextureDescriptor alloc] init];
2361 texture_descriptor_.pixelFormat = mtl_format;
2362 texture_descriptor_.textureType = (type_ == GPU_TEXTURE_CUBE_ARRAY) ?
2363 MTLTextureTypeCubeArray :
2364 MTLTextureTypeCube;
2365 texture_descriptor_.width = w_;
2366 texture_descriptor_.height = h_;
2367 texture_descriptor_.depth = 1;
2368 texture_descriptor_.arrayLength = (type_ == GPU_TEXTURE_CUBE_ARRAY) ? d_ / 6 : 1;
2369 texture_descriptor_.mipmapLevelCount = (mtl_max_mips_ > 0) ? mtl_max_mips_ : 1;
2370 texture_descriptor_.usage = mtl_usage_from_gpu(internal_gpu_image_usage_flags_);
2371 texture_descriptor_.storageMode = MTLStorageModePrivate;
2372 texture_descriptor_.sampleCount = 1;
2373 texture_descriptor_.cpuCacheMode = MTLCPUCacheModeDefaultCache;
2374 texture_descriptor_.hazardTrackingMode = MTLHazardTrackingModeDefault;
2375 } break;
2376
2377 /* GPU_TEXTURE_BUFFER */
2378 case GPU_TEXTURE_BUFFER: {
2379 texture_descriptor_ = [[MTLTextureDescriptor alloc] init];
2380 texture_descriptor_.pixelFormat = mtl_format;
2381 texture_descriptor_.textureType = MTLTextureTypeTextureBuffer;
2382 texture_descriptor_.width = w_;
2383 texture_descriptor_.height = 1;
2384 texture_descriptor_.depth = 1;
2385 texture_descriptor_.arrayLength = 1;
2386 texture_descriptor_.mipmapLevelCount = (mtl_max_mips_ > 0) ? mtl_max_mips_ : 1;
2387 texture_descriptor_.usage = mtl_usage_from_gpu(internal_gpu_image_usage_flags_);
2388 texture_descriptor_.storageMode = MTLStorageModePrivate;
2389 texture_descriptor_.sampleCount = 1;
2390 texture_descriptor_.cpuCacheMode = MTLCPUCacheModeDefaultCache;
2391 texture_descriptor_.hazardTrackingMode = MTLHazardTrackingModeDefault;
2392 } break;
2393
2394 default: {
2395 MTL_LOG_ERROR("[METAL] Error: Cannot create texture with unknown type: %d\n", type_);
2396 return;
2397 } break;
2398 }
2399
2400 /* Determine Resource Mode. */
2401 resource_mode_ = MTL_TEXTURE_MODE_DEFAULT;
2402
2403 /* Override storage mode if memoryless attachments are being used.
2404 * NOTE: Memoryless textures can only be supported on TBDR GPUs. */
2405 if (internal_gpu_image_usage_flags_ & GPU_TEXTURE_USAGE_MEMORYLESS) {
2406 const bool is_tile_based_arch = (GPU_platform_architecture() == GPU_ARCHITECTURE_TBDR);
2407 if (is_tile_based_arch) {
2408 texture_descriptor_.storageMode = MTLStorageModeMemoryless;
2409 }
2410 }
2411
2416 bool native_texture_atomics = MTLBackend::get_capabilities().supports_texture_atomics;
2417 if ((internal_gpu_image_usage_flags_ & GPU_TEXTURE_USAGE_ATOMIC) && !native_texture_atomics) {
2418
2419 /* Validate format support. */
2421 "Texture atomic fallback support is only available for GPU_TEXTURE_2D, "
2422 "GPU_TEXTURE_2D_ARRAY and GPU_TEXTURE_3D.");
2423
2424 /* Re-assign 2D resolution to encompass all texture layers.
2425 * Texture access is handled by remapping to a linear ID and using this in the destination
2426 * texture. based on original with: LinearPxID = x + y*layer_w + z*(layer_h*layer_w);
2427 * tx_2d.y = LinearPxID/2D_tex_width;
2428 * tx_2d.x = LinearPxID - (tx_2d.y*2D_tex_width); */
2430 /* Maximum 2D texture dimensions will be (16384, 16384) on all target platforms. */
2431 const uint max_width = 16384;
2432 const uint max_height = 16384;
2433 const uint pixels_res = w_ * h_ * d_;
2434
2435 uint new_w = 0, new_h = 0;
2436 if (pixels_res <= max_width) {
2437 new_w = pixels_res;
2438 new_h = 1;
2439 }
2440 else {
2441 new_w = max_width;
2442 new_h = ((pixels_res % new_w) == 0) ? (pixels_res / new_w) : ((pixels_res / new_w) + 1);
2443 }
2444
2445 texture_descriptor_.width = new_w;
2446 texture_descriptor_.height = new_h;
2447
2448 UNUSED_VARS_NDEBUG(max_height);
2449 BLI_assert_msg(texture_descriptor_.width <= max_width &&
2450 texture_descriptor_.height <= max_height,
2451 "Atomic fallback support texture is too large.");
2452 }
2453
2454 /* Allocate buffer for texture data. */
2455 size_t bytes_per_pixel = get_mtl_format_bytesize(mtl_format);
2456 size_t bytes_per_row = bytes_per_pixel * texture_descriptor_.width;
2457 size_t total_bytes = bytes_per_row * texture_descriptor_.height;
2458
2460 total_bytes, (internal_gpu_image_usage_flags_ & GPU_TEXTURE_USAGE_HOST_READ));
2461 BLI_assert(backing_buffer_ != nullptr);
2462
2463 /* NOTE: Fallback buffer-backed texture always set to Texture2D. */
2464 texture_descriptor_.textureType = MTLTextureType2D;
2465 texture_descriptor_.depth = 1;
2466 texture_descriptor_.arrayLength = 1;
2467
2468 /* Write texture dimensions to metadata. This is required to remap 2D Array/3D sample
2469 * coordinates into 2D texture space. */
2470 tex_buffer_metadata_[0] = w_;
2471 tex_buffer_metadata_[1] = h_;
2472 tex_buffer_metadata_[2] = d_;
2473
2474 /* Texture allocation with buffer as backing storage. Bytes per row must satisfy alignment
2475 * rules for device. */
2476 uint32_t align_requirement = uint32_t(
2477 [ctx->device minimumLinearTextureAlignmentForPixelFormat:mtl_format]);
2478 size_t aligned_bytes_per_row = ceil_to_multiple_ul(bytes_per_row, align_requirement);
2479 texture_ = [backing_buffer_->get_metal_buffer()
2480 newTextureWithDescriptor:texture_descriptor_
2481 offset:0
2482 bytesPerRow:aligned_bytes_per_row];
2483 /* Aligned width. */
2484 tex_buffer_metadata_[3] = bytes_per_row / bytes_per_pixel;
2485
2486#ifndef NDEBUG
2487 texture_.label = [NSString
2488 stringWithFormat:@"AtomicBufferBackedTexture_%s", this->get_name()];
2489#endif
2490 }
2491 else {
2492
2493 /* Standard texture allocation. */
2494 texture_ = [ctx->device newTextureWithDescriptor:texture_descriptor_];
2495
2496#ifndef NDEBUG
2497 if (internal_gpu_image_usage_flags_ & GPU_TEXTURE_USAGE_MEMORYLESS) {
2498 texture_.label = [NSString stringWithFormat:@"MemorylessTexture_%s", this->get_name()];
2499 }
2500 else {
2501 texture_.label = [NSString stringWithFormat:@"Texture_%s", this->get_name()];
2502 }
2503#endif
2504 }
2505
2506 BLI_assert(texture_);
2507 is_baked_ = true;
2508 is_dirty_ = false;
2509 }
2510
2511 /* Re-apply previous contents. */
2512 if (copy_previous_contents) {
2513 /* TODO(Metal): May need to copy previous contents of texture into new texture. */
2514 [previous_texture release];
2515 }
2516}
2517
2518void gpu::MTLTexture::reset()
2519{
2520 MTL_LOG_DEBUG("Texture %s reset. Size %d, %d, %d", this->get_name(), w_, h_, d_);
2521 /* Delete associated METAL resources. */
2522 if (texture_ != nil) {
2523 [texture_ release];
2524 texture_ = nil;
2525 is_baked_ = false;
2526 is_dirty_ = true;
2527 }
2528
2529 /* Release backing Metal buffer, if used. */
2530 if (backing_buffer_ != nullptr) {
2531 backing_buffer_->free();
2532 backing_buffer_ = nullptr;
2533 }
2534
2535 /* Release backing storage buffer, if used. */
2536 if (storage_buffer_ != nullptr) {
2537 delete storage_buffer_;
2538 storage_buffer_ = nullptr;
2539 }
2540
2541 if (texture_no_srgb_ != nil) {
2542 [texture_no_srgb_ release];
2543 texture_no_srgb_ = nil;
2544 }
2545
2546 if (mip_swizzle_view_ != nil) {
2547 [mip_swizzle_view_ release];
2548 mip_swizzle_view_ = nil;
2549 }
2550
2551 /* Blit framebuffer. */
2552 if (blit_fb_) {
2553 GPU_framebuffer_free(blit_fb_);
2554 blit_fb_ = nullptr;
2555 }
2556
2557 /* Descriptor. */
2558 if (texture_descriptor_ != nullptr) {
2559 [texture_descriptor_ release];
2560 texture_descriptor_ = nullptr;
2561 }
2562
2563 /* Reset mipmap state. */
2564 has_generated_mips_ = false;
2565
2566 BLI_assert(texture_ == nil);
2567 BLI_assert(mip_swizzle_view_ == nil);
2568}
2569
2571
2572/* -------------------------------------------------------------------- */
2576{
2578 backing_buffer_ != nullptr,
2579 "Resource must have been created as a buffer backed resource to support SSBO wrapping.");
2580 /* Ensure texture resource is up to date. */
2581 this->ensure_baked();
2582 if (storage_buffer_ == nil) {
2583 BLI_assert(texture_ != nullptr);
2584 id<MTLBuffer> backing_buffer = [texture_ buffer];
2585 BLI_assert(backing_buffer != nil);
2586 storage_buffer_ = new MTLStorageBuf(this, [backing_buffer length]);
2587 }
2588 return storage_buffer_;
2589}
2590
2591
2592/* -------------------------------------------------------------------- */
2596{
2597 return (format_ == TextureFormat::SRGBA_8_8_8_8);
2598}
2599
2600id<MTLTexture> MTLTexture::get_non_srgb_handle()
2601{
2602 id<MTLTexture> base_tex = get_metal_handle_base();
2603 BLI_assert(base_tex != nil);
2604 if (texture_no_srgb_ == nil) {
2605 texture_no_srgb_ = [base_tex newTextureViewWithPixelFormat:MTLPixelFormatRGBA8Unorm];
2606 }
2607 return texture_no_srgb_;
2608}
2609
2611/* -------------------------------------------------------------------- */
2614
2616{
2617 /* Ensure buffer satisfies the alignment of 256 bytes for copying
2618 * data between buffers and textures. As specified in:
2619 * https://developer.apple.com/metal/Metal-Feature-Set-Tables.pdf */
2620 BLI_assert(size >= 256);
2621 buffer_ = nil;
2622}
2623
2625{
2626 if (buffer_) {
2627 [buffer_ release];
2628 buffer_ = nil;
2629 }
2630}
2631
2633{
2634 /* Duplicate the existing buffer and release original to ensure we do not directly modify data
2635 * in-flight on the GPU. */
2636 MTLContext *ctx = MTLContext::get();
2637 BLI_assert(ctx);
2638 MTLResourceOptions resource_options = ([ctx->device hasUnifiedMemory]) ?
2639 MTLResourceStorageModeShared :
2640 MTLResourceStorageModeManaged;
2641
2642 if (buffer_ != nil) {
2643 id<MTLBuffer> new_buffer = [ctx->device newBufferWithBytes:[buffer_ contents]
2645 options:resource_options];
2646 [buffer_ release];
2647 buffer_ = new_buffer;
2648 }
2649 else {
2650 buffer_ = [ctx->device newBufferWithLength:size_ options:resource_options];
2651 }
2652
2653 return [buffer_ contents];
2654}
2655
2657{
2658 if (buffer_ == nil) {
2659 return;
2660 }
2661
2662 /* Ensure changes are synchronized. */
2663 if (buffer_.resourceOptions & MTLResourceStorageModeManaged) {
2664 [buffer_ didModifyRange:NSMakeRange(0, size_)];
2665 }
2666}
2667
2669{
2670 GPUPixelBufferNativeHandle native_handle;
2671
2672 /* Only supported with unified memory currently. */
2673 MTLContext *ctx = MTLContext::get();
2674 BLI_assert(ctx);
2675 if (![ctx->device hasUnifiedMemory]) {
2676 return native_handle;
2677 }
2678
2679 /* Just get pointer to unified memory. No need to unmap. */
2680 map();
2681 native_handle.handle = reinterpret_cast<int64_t>(buffer_);
2682 native_handle.size = size_;
2683
2684 return native_handle;
2685}
2686
2688{
2689 return size_;
2690}
2691
2693{
2694 return buffer_;
2695}
2696
2698
2699} // namespace blender::gpu
@ G_DEBUG_GPU
#define BLI_assert_unreachable()
Definition BLI_assert.h:93
#define BLI_assert(a)
Definition BLI_assert.h:46
#define BLI_assert_msg(a, msg)
Definition BLI_assert.h:53
MINLINE int min_ii(int a, int b)
MINLINE uint divide_ceil_u(uint a, uint b)
MINLINE int max_ii(int a, int b)
MINLINE int max_iii(int a, int b, int c)
MINLINE uint64_t ceil_to_multiple_ul(uint64_t a, uint64_t b)
unsigned char uchar
unsigned int uint
#define UNUSED_VARS_NDEBUG(...)
#define ELEM(...)
GHOST C-API function and type declarations.
#define GPU_batch_texture_bind(batch, name, tex)
Definition GPU_batch.hh:288
void GPU_batch_draw(blender::gpu::Batch *batch)
void GPU_batch_set_shader(blender::gpu::Batch *batch, blender::gpu::Shader *shader, const blender::gpu::shader::SpecializationConstants *constants_state=nullptr)
blender::gpu::Batch * GPU_batch_preset_quad()
int GPU_max_texture_3d_size()
blender::gpu::FrameBuffer * GPU_framebuffer_create(const char *name)
void GPU_framebuffer_restore()
#define GPU_ATTACHMENT_NONE
void GPU_framebuffer_free(blender::gpu::FrameBuffer *fb)
#define GPU_framebuffer_ensure_config(_fb,...)
#define GPU_ATTACHMENT_TEXTURE_LAYER_MIP(_texture, _layer, _mip)
void GPU_framebuffer_bind(blender::gpu::FrameBuffer *fb)
blender::gpu::FrameBuffer * GPU_framebuffer_active_get()
@ GPU_ARCHITECTURE_TBDR
GPUArchitectureType GPU_platform_architecture()
void GPU_shader_uniform_1i(blender::gpu::Shader *sh, const char *name, int value)
void GPU_shader_uniform_2f(blender::gpu::Shader *sh, const char *name, float x, float y)
GPUStencilTest GPU_stencil_test_get()
Definition gpu_state.cc:250
void GPU_scissor_test(bool enable)
Definition gpu_state.cc:188
GPUDepthTest
Definition GPU_state.hh:110
@ GPU_DEPTH_ALWAYS
Definition GPU_state.hh:112
void GPU_finish()
Definition gpu_state.cc:310
GPUBlend GPU_blend_get()
Definition gpu_state.cc:226
GPUBlend
Definition GPU_state.hh:84
@ GPU_BLEND_NONE
Definition GPU_state.hh:85
void GPU_depth_mask(bool depth)
Definition gpu_state.cc:110
void GPU_face_culling(GPUFaceCullTest culling)
Definition gpu_state.cc:47
void GPU_blend(GPUBlend blend)
Definition gpu_state.cc:42
void GPU_stencil_write_mask_set(uint write_mask)
Definition gpu_state.cc:210
void GPU_depth_test(GPUDepthTest test)
Definition gpu_state.cc:68
void GPU_stencil_test(GPUStencilTest test)
Definition gpu_state.cc:73
GPUStencilTest
Definition GPU_state.hh:120
@ GPU_STENCIL_ALWAYS
Definition GPU_state.hh:122
void GPU_stencil_reference_set(uint reference)
Definition gpu_state.cc:205
GPUDepthTest GPU_depth_test_get()
Definition gpu_state.cc:244
GPUFaceCullTest GPU_face_culling_get()
Definition gpu_state.cc:52
uint GPU_stencil_mask_get()
Definition gpu_state.cc:238
GPUFaceCullTest
Definition GPU_state.hh:135
@ GPU_CULL_NONE
Definition GPU_state.hh:136
bool GPU_depth_mask_get()
Definition gpu_state.cc:287
eGPUDataFormat
@ GPU_DATA_HALF_FLOAT
@ GPU_DATA_INT
@ GPU_DATA_10_11_11_REV
@ GPU_DATA_UINT
@ GPU_DATA_UINT_24_8_DEPRECATED
@ GPU_DATA_2_10_10_10_REV
@ GPU_DATA_FLOAT
@ GPU_TEXTURE_USAGE_SHADER_WRITE
@ GPU_TEXTURE_USAGE_HOST_READ
@ GPU_TEXTURE_USAGE_MEMORYLESS
@ GPU_TEXTURE_USAGE_ATTACHMENT
@ GPU_TEXTURE_USAGE_ATOMIC
@ GPU_TEXTURE_USAGE_FORMAT_VIEW
eGPUTextureUsage GPU_texture_usage(const blender::gpu::Texture *texture)
const GPUVertFormat * GPU_vertbuf_get_format(const blender::gpu::VertBuf *verts)
BMesh const char void * data
return true
void init()
long long int int64_t
static DBVT_INLINE btScalar size(const btDbvtVolume &a)
Definition btDbvt.cpp:52
SIMD_FORCE_INLINE const btScalar & w() const
Return the w value.
Definition btQuadWord.h:119
void reset()
clear internal cached data and reset random seed
constexpr int64_t size() const
constexpr MutableSpan slice(const int64_t start, const int64_t size) const
Definition BLI_span.hh:573
constexpr T * data() const
Definition BLI_span.hh:539
constexpr Span slice(int64_t start, int64_t size) const
Definition BLI_span.hh:137
constexpr const T * data() const
Definition BLI_span.hh:215
static MTLCapabilities & get_capabilities()
gpu::MTLBuffer * allocate_with_data(uint64_t size, bool cpu_visible, const void *data=nullptr)
gpu::MTLBuffer * allocate(uint64_t size, bool cpu_visible)
gpu::MTLBuffer * allocate_aligned(uint64_t size, uint alignment, bool cpu_visible)
id< MTLBuffer > get_metal_buffer() const
id< MTLComputeCommandEncoder > ensure_begin_compute_encoder()
id< MTLBlitCommandEncoder > ensure_begin_blit_encoder()
void bind_compute_bytes(const void *bytes, uint64_t length, uint index)
void bind_compute_texture(id< MTLTexture > tex, uint slot)
void bind_compute_buffer(id< MTLBuffer > buffer, uint64_t buffer_offset, uint index)
void bind_pso(id< MTLComputePipelineState > pso)
static MTLContext * get()
MTLContextGlobalShaderPipelineState pipeline_state
MTLCommandBufferManager main_command_buffer
static MTLBufferPool * get_global_memory_manager()
GPUPixelBufferNativeHandle get_native_handle() override
id< MTLBuffer > get_metal_buffer()
void * read(int mip, eGPUDataFormat type) override
void copy_to(Texture *dst) override
void update_sub(int mip, int offset[3], int extent[3], eGPUDataFormat type, const void *data) override
MTLTexture(const char *name)
void clear(eGPUDataFormat format, const void *data) override
MTLStorageBuf * get_storagebuf()
void mip_range_set(int min, int max) override
void generate_mipmap() override
bool init_internal() override
void swizzle_set(const char swizzle_mask[4]) override
virtual void texture_unbind(Texture *tex)=0
eGPUTextureUsage gpu_image_usage_flags_
char name_[DEBUG_NAME_LEN]
bool init_2D(int w, int h, int layers, int mip_len, TextureFormat format)
GPUAttachmentType attachment_type(int slot) const
void mip_size_get(int mip, int r_size[3]) const
Texture(const char *name)
GPUTextureFormatFlag format_flag_
CCL_NAMESPACE_BEGIN struct Options options
blender::gpu::Batch * quad
#define log2
#define floor
float length(VecOp< float, D >) RET
BLI_INLINE float fb(float length, float L)
uiWidgetBaseParameters params[MAX_WIDGET_BASE_BATCH]
format
void * MEM_mallocN(size_t len, const char *str)
Definition mallocn.cc:128
void * MEM_mallocN_aligned(size_t len, size_t alignment, const char *str)
Definition mallocn.cc:138
MINLINE unsigned long long max_ulul(unsigned long long a, unsigned long long b)
#define G(x, y, z)
#define MTL_LOG_WARNING(info,...)
Definition mtl_debug.hh:42
#define MTL_LOG_DEBUG(info,...)
Definition mtl_debug.hh:49
#define MTL_LOG_ERROR(info,...)
Definition mtl_debug.hh:34
std::string get_name(const VolumeGridData &grid)
size_t get_mtl_format_bytesize(MTLPixelFormat tex_format)
MTLPixelFormat gpu_texture_format_to_metal(TextureFormat tex_format)
static Context * unwrap(GPUContext *ctx)
bool is_half_float(TextureFormat format)
std::string tex_data_format_to_msl_type_str(eGPUDataFormat type)
eGPUDataFormat to_texture_data_format(TextureFormat tex_format)
std::string tex_data_format_to_msl_texture_template_type(eGPUDataFormat type)
MTLPixelFormat mtl_format_get_writeable_view_format(MTLPixelFormat format)
static MTLTextureSwizzle swizzle_to_mtl(const char swizzle)
size_t to_block_size(TextureFormat data_type)
int get_mtl_format_num_components(MTLPixelFormat tex_format)
int to_bytesize(const DataFormat format)
MTLTextureUsage mtl_usage_from_gpu(eGPUTextureUsage usage)
int to_component_len(TextureFormat format)
constexpr bool validate_data_format(TextureFormat tex_format, eGPUDataFormat data_format)
MTLTextureType to_metal_type(GPUTextureType type)
eGPUTextureUsage gpu_usage_from_mtl(MTLTextureUsage mtl_usage)
void float_to_half_make_finite_array(const float *src, uint16_t *dst, size_t length)
Definition math_half.cc:274
void parallel_for(const IndexRange range, const int64_t grain_size, const Function &function, const TaskSizeHints &size_hints=detail::TaskSizeHints_Static(1))
Definition BLI_task.hh:93
static void init(bNodeTree *, bNode *node)
const char * name
#define min(a, b)
Definition sort.cc:36
i
Definition text_draw.cc:230
max
Definition text_draw.cc:251