Blender V4.5
mtl_texture.mm
Go to the documentation of this file.
1/* SPDX-FileCopyrightText: 2022-2023 Blender Authors
2 *
3 * SPDX-License-Identifier: GPL-2.0-or-later */
4
8
9#include "BKE_global.hh"
10
11#include "DNA_userdef_types.h"
12
13#include "GPU_batch.hh"
14#include "GPU_batch_presets.hh"
15#include "GPU_capabilities.hh"
16#include "GPU_framebuffer.hh"
17#include "GPU_immediate.hh"
18#include "GPU_platform.hh"
19#include "GPU_state.hh"
20
21#include "mtl_backend.hh"
22#include "mtl_common.hh"
23#include "mtl_context.hh"
24#include "mtl_debug.hh"
25#include "mtl_storage_buffer.hh"
26#include "mtl_texture.hh"
27#include "mtl_vertex_buffer.hh"
28
29#include "GHOST_C-api.h"
30
31namespace blender::gpu {
32
33/* -------------------------------------------------------------------- */
36
37void gpu::MTLTexture::mtl_texture_init()
38{
39 BLI_assert(MTLContext::get() != nullptr);
40
41 /* Status. */
42 is_baked_ = false;
43 is_dirty_ = false;
44 resource_mode_ = MTL_TEXTURE_MODE_DEFAULT;
45 mtl_max_mips_ = 1;
46
47 /* Metal properties. */
48 texture_ = nil;
49 mip_swizzle_view_ = nil;
50
51 /* Binding information. */
52 is_bound_ = false;
53
54 /* VBO. */
55 vert_buffer_ = nullptr;
56 vert_buffer_mtl_ = nil;
57
58 /* Default Swizzle. */
59 tex_swizzle_mask_[0] = 'r';
60 tex_swizzle_mask_[1] = 'g';
61 tex_swizzle_mask_[2] = 'b';
62 tex_swizzle_mask_[3] = 'a';
63 mtl_swizzle_mask_ = MTLTextureSwizzleChannelsMake(
64 MTLTextureSwizzleRed, MTLTextureSwizzleGreen, MTLTextureSwizzleBlue, MTLTextureSwizzleAlpha);
65}
66
67gpu::MTLTexture::MTLTexture(const char *name) : Texture(name)
68{
69 /* Common Initialization. */
70 mtl_texture_init();
71}
72
75 eGPUTextureType type,
76 id<MTLTexture> metal_texture)
77 : Texture(name)
78{
79 /* Common Initialization. */
80 mtl_texture_init();
81
82 /* Prep texture from METAL handle. */
83 BLI_assert(metal_texture != nil);
85 type_ = type;
86 init_2D((int)metal_texture.width, (int)metal_texture.height, 0, 1, format);
87
88 /* Assign MTLTexture. */
89 texture_ = metal_texture;
90 [texture_ retain];
91 internal_gpu_image_usage_flags_ = gpu_usage_from_mtl(metal_texture.usage);
92 gpu_image_usage_flags_ = internal_gpu_image_usage_flags_;
93
94 /* Flag as Baked. */
95 is_baked_ = true;
96 is_dirty_ = false;
97 resource_mode_ = MTL_TEXTURE_MODE_EXTERNAL;
98}
99
101{
102 /* Unbind if bound. */
103 if (is_bound_) {
105 if (ctx != nullptr) {
106 ctx->state_manager->texture_unbind(this);
107 }
108 }
109
110 /* Free memory. */
111 this->reset();
112}
113
115
116/* -------------------------------------------------------------------- */
117void gpu::MTLTexture::bake_mip_swizzle_view()
118{
119 if (texture_view_dirty_flags_) {
120
121 /* Optimization: only generate texture view for mipmapped textures if base level > 0
122 * and max level does not match the existing number of mips.
123 * Only apply this if mipmap is the only change, and we have not previously generated
124 * a texture view. For textures which are created as views, this should also be skipped. */
125 if (resource_mode_ != MTL_TEXTURE_MODE_TEXTURE_VIEW &&
126 texture_view_dirty_flags_ == TEXTURE_VIEW_MIP_DIRTY && mip_swizzle_view_ == nil)
127 {
128
129 if (mip_texture_base_level_ == 0 && mip_texture_max_level_ == mtl_max_mips_) {
130 texture_view_dirty_flags_ = TEXTURE_VIEW_NOT_DIRTY;
131 return;
132 }
133 }
134
135 /* if a texture view was previously created we release it. */
136 if (mip_swizzle_view_ != nil) {
137 [mip_swizzle_view_ release];
138 mip_swizzle_view_ = nil;
139 }
140
141 /* Use source texture to determine range limits. If we are using a GPU texture view, the range
142 * check should only validate the range */
143 const gpu::Texture *tex_view_src = this;
144 if (resource_mode_ == MTL_TEXTURE_MODE_TEXTURE_VIEW) {
145 tex_view_src = unwrap(source_texture_);
146 }
147
148 /* Determine num slices */
149 int max_slices = 1;
150 int num_slices = 1;
151 switch (type_) {
153 max_slices = tex_view_src->height_get();
154 num_slices = h_;
155 break;
157 max_slices = tex_view_src->depth_get();
158 num_slices = d_;
159 break;
160 case GPU_TEXTURE_CUBE:
161 max_slices = 6;
162 num_slices = 6;
163 break;
165 /* d_ is equal to array levels * 6, including face count. */
166 max_slices = tex_view_src->depth_get();
167 num_slices = d_;
168 break;
169 default:
170 num_slices = 1;
171 break;
172 }
173
174 /* Determine texture view format. If texture view is used as a stencil view, we instead provide
175 * the equivalent format for performing stencil reads/samples. */
176 MTLPixelFormat texture_view_pixel_format = gpu_texture_format_to_metal(format_);
177 if (texture_view_stencil_) {
178 switch (texture_view_pixel_format) {
179 case MTLPixelFormatDepth24Unorm_Stencil8:
180 texture_view_pixel_format = MTLPixelFormatX24_Stencil8;
181 break;
182 case MTLPixelFormatDepth32Float_Stencil8:
183 texture_view_pixel_format = MTLPixelFormatX32_Stencil8;
184 break;
185 default:
186 BLI_assert_msg(false, "Texture format does not support stencil views.");
187 break;
188 }
189 }
190
191 /* NOTE: Texture type for cube maps can be overridden as a 2D array. This is done
192 * via modifying this textures type flags. */
193 MTLTextureType texture_view_texture_type = to_metal_type(type_);
194
195 /* Ensure we have texture view usage flagged.
196 * NOTE: This check exists in high level GPU API, however does not cover internal Metal backend
197 * uses of texture views such as when required to support SRGB enablement toggle during
198 * rendering. */
200 (texture_view_pixel_format == texture_.pixelFormat) ||
201 (internal_gpu_image_usage_flags_ & GPU_TEXTURE_USAGE_FORMAT_VIEW),
202 "Usage Flag GPU_TEXTURE_USAGE_FORMAT_VIEW must be specified if a texture view is "
203 "created with a different format to its source texture.");
204
205 int range_len = min_ii((mip_texture_max_level_ - mip_texture_base_level_) + 1,
206 (int)texture_.mipmapLevelCount - mip_texture_base_level_);
207 BLI_assert(range_len > 0);
208 BLI_assert(mip_texture_base_level_ < texture_.mipmapLevelCount);
209 BLI_assert(mip_texture_base_layer_ < max_slices);
210 UNUSED_VARS_NDEBUG(max_slices);
211 mip_swizzle_view_ = [texture_
212 newTextureViewWithPixelFormat:texture_view_pixel_format
213 textureType:texture_view_texture_type
214 levels:NSMakeRange(mip_texture_base_level_, range_len)
215 slices:NSMakeRange(mip_texture_base_layer_, num_slices)
216 swizzle:mtl_swizzle_mask_];
218 "Updating texture view - MIP TEXTURE BASE LEVEL: %d, MAX LEVEL: %d (Range len: %d)",
219 mip_texture_base_level_,
220 min_ii(mip_texture_max_level_, (int)texture_.mipmapLevelCount),
221 range_len);
222#ifndef NDEBUG
223 mip_swizzle_view_.label = [NSString
224 stringWithFormat:
225 @"MipSwizzleView_%s__format=%u_type=%u_baselevel=%u_numlevels=%u_swizzle='%c%c%c%c'",
226 [[texture_ label] UTF8String],
227 (uint)texture_view_pixel_format,
228 (uint)texture_view_texture_type,
229 (uint)mip_texture_base_level_,
230 (uint)range_len,
231 tex_swizzle_mask_[0],
232 tex_swizzle_mask_[1],
233 tex_swizzle_mask_[2],
234 tex_swizzle_mask_[3]];
235#else
236 mip_swizzle_view_.label = [texture_ label];
237#endif
238 texture_view_dirty_flags_ = TEXTURE_VIEW_NOT_DIRTY;
239 }
240}
241
244
245id<MTLTexture> gpu::MTLTexture::get_metal_handle()
246{
247
248 /* Verify VBO texture shares same buffer. */
249 if (resource_mode_ == MTL_TEXTURE_MODE_VBO) {
250 id<MTLBuffer> buf = vert_buffer_->get_metal_buffer();
251
252 /* Source vertex buffer has been re-generated, require re-initialization. */
253 if (buf != vert_buffer_mtl_) {
255 "MTLTexture '%p' using MTL_TEXTURE_MODE_VBO requires re-generation due to updated "
256 "Vertex-Buffer.",
257 this);
258 /* Clear state. */
259 this->reset();
260
261 /* Re-initialize. */
262 this->init_internal(vert_buffer_);
263
264 /* Update for assertion check below. */
265 buf = vert_buffer_->get_metal_buffer();
266 }
267
268 /* Ensure buffer is valid.
269 * Fetch-vert buffer handle directly in-case it changed above. */
270 BLI_assert(vert_buffer_mtl_ != nil);
271 BLI_assert(vert_buffer_->get_metal_buffer() == vert_buffer_mtl_);
272 }
273
274 /* ensure up to date and baked. */
275 this->ensure_baked();
276
277 if (is_baked_) {
278 /* For explicit texture views, ensure we always return the texture view. */
279 if (resource_mode_ == MTL_TEXTURE_MODE_TEXTURE_VIEW) {
280 BLI_assert_msg(mip_swizzle_view_, "Texture view should always have a valid handle.");
281 }
282
283 if (mip_swizzle_view_ != nil || texture_view_dirty_flags_) {
284 bake_mip_swizzle_view();
285
286 /* Optimization: If texture view does not change mip parameters, no texture view will be
287 * baked. This is because texture views remove the ability to perform lossless compression.
288 */
289 if (mip_swizzle_view_ != nil) {
290 return mip_swizzle_view_;
291 }
292 }
293 return texture_;
294 }
295 return nil;
296}
297
298id<MTLTexture> gpu::MTLTexture::get_metal_handle_base()
299{
300
301 /* ensure up to date and baked. */
302 this->ensure_baked();
303
304 /* For explicit texture views, always return the texture view. */
305 if (resource_mode_ == MTL_TEXTURE_MODE_TEXTURE_VIEW) {
306 BLI_assert_msg(mip_swizzle_view_, "Texture view should always have a valid handle.");
307 if (mip_swizzle_view_ != nil || texture_view_dirty_flags_) {
308 bake_mip_swizzle_view();
309 }
310 BLI_assert(mip_swizzle_view_ != nil);
311 return mip_swizzle_view_;
312 }
313
314 /* Return base handle. */
315 if (is_baked_) {
316 return texture_;
317 }
318 return nil;
319}
320
321void gpu::MTLTexture::blit(id<MTLBlitCommandEncoder> blit_encoder,
322 uint src_x_offset,
323 uint src_y_offset,
324 uint src_z_offset,
325 uint src_slice,
326 uint src_mip,
327 gpu::MTLTexture *dst,
328 uint dst_x_offset,
329 uint dst_y_offset,
330 uint dst_z_offset,
331 uint dst_slice,
332 uint dst_mip,
333 uint width,
334 uint height,
335 uint depth)
336{
337
338 BLI_assert(dst);
339 BLI_assert(width > 0 && height > 0 && depth > 0);
340 MTLSize src_size = MTLSizeMake(width, height, depth);
341 MTLOrigin src_origin = MTLOriginMake(src_x_offset, src_y_offset, src_z_offset);
342 MTLOrigin dst_origin = MTLOriginMake(dst_x_offset, dst_y_offset, dst_z_offset);
343
344 if (this->format_get() != dst->format_get()) {
346 "gpu::MTLTexture: Cannot copy between two textures of different types using a "
347 "blit encoder. TODO: Support this operation");
348 return;
349 }
350
351 /* TODO(Metal): Verify if we want to use the one with modified base-level/texture view
352 * or not. */
353 [blit_encoder copyFromTexture:this->get_metal_handle_base()
354 sourceSlice:src_slice
355 sourceLevel:src_mip
356 sourceOrigin:src_origin
357 sourceSize:src_size
358 toTexture:dst->get_metal_handle_base()
359 destinationSlice:dst_slice
360 destinationLevel:dst_mip
361 destinationOrigin:dst_origin];
362}
363
364void gpu::MTLTexture::blit(gpu::MTLTexture *dst,
365 uint src_x_offset,
366 uint src_y_offset,
367 uint dst_x_offset,
368 uint dst_y_offset,
369 uint src_mip,
370 uint dst_mip,
371 uint dst_slice,
372 int width,
373 int height)
374{
375 BLI_assert(this->type_get() == dst->type_get());
376
377 GPUShader *shader = fullscreen_blit_sh_get();
378 BLI_assert(shader != nullptr);
380
381 /* Fetch restore framebuffer and blit target framebuffer from destination texture. */
382 GPUFrameBuffer *restore_fb = GPU_framebuffer_active_get();
383 GPUFrameBuffer *blit_target_fb = dst->get_blit_framebuffer(dst_slice, dst_mip);
384 BLI_assert(blit_target_fb);
385 GPU_framebuffer_bind(blit_target_fb);
386
387 /* Execute graphics draw call to perform the blit. */
388 Batch *quad = GPU_batch_preset_quad();
390
391 float w = dst->width_get();
392 float h = dst->height_get();
393
394 GPU_shader_uniform_2f(shader, "fullscreen", w, h);
395 GPU_shader_uniform_2f(shader, "src_offset", src_x_offset, src_y_offset);
396 GPU_shader_uniform_2f(shader, "dst_offset", dst_x_offset, dst_y_offset);
397 GPU_shader_uniform_2f(shader, "size", width, height);
398
399 GPU_shader_uniform_1i(shader, "mip", src_mip);
400 GPU_batch_texture_bind(quad, "imageTexture", wrap(this));
401
402 /* Caching previous pipeline state. */
403 bool depth_write_prev = GPU_depth_mask_get();
404 uint stencil_mask_prev = GPU_stencil_mask_get();
405 eGPUStencilTest stencil_test_prev = GPU_stencil_test_get();
406 eGPUFaceCullTest culling_test_prev = GPU_face_culling_get();
407 eGPUBlend blend_prev = GPU_blend_get();
408 eGPUDepthTest depth_test_prev = GPU_depth_test_get();
409 GPU_scissor_test(false);
410
411 /* Apply state for blit draw call. */
416 GPU_depth_mask(false);
419
421
422 /* restoring old pipeline state. */
423 GPU_depth_mask(depth_write_prev);
424 GPU_stencil_write_mask_set(stencil_mask_prev);
425 GPU_stencil_test(stencil_test_prev);
426 GPU_face_culling(culling_test_prev);
427 GPU_depth_mask(depth_write_prev);
428 GPU_blend(blend_prev);
429 GPU_depth_test(depth_test_prev);
430
431 if (restore_fb != nullptr) {
432 GPU_framebuffer_bind(restore_fb);
433 }
434 else {
436 }
437}
438
439GPUFrameBuffer *gpu::MTLTexture::get_blit_framebuffer(int dst_slice, uint dst_mip)
440{
441
442 /* Check if layer has changed. */
443 bool update_attachments = false;
444 if (!blit_fb_) {
445 blit_fb_ = GPU_framebuffer_create("gpu_blit");
446 update_attachments = true;
447 }
448
449 /* Check if current blit FB has the correct attachment properties. */
450 if (blit_fb_) {
451 if (blit_fb_slice_ != dst_slice || blit_fb_mip_ != dst_mip) {
452 update_attachments = true;
453 }
454 }
455
456 if (update_attachments) {
457 if (format_flag_ & GPU_FORMAT_DEPTH || format_flag_ & GPU_FORMAT_STENCIL) {
458 /* DEPTH TEX */
460 &blit_fb_,
462 wrap(static_cast<Texture *>(this)), int(dst_slice), int(dst_mip)),
464 }
465 else {
466 /* COLOR TEX */
468 &blit_fb_,
471 wrap(static_cast<Texture *>(this)), int(dst_slice), int(dst_mip))});
472 }
473 blit_fb_slice_ = dst_slice;
474 blit_fb_mip_ = dst_mip;
475 }
476
477 BLI_assert(blit_fb_);
478 return blit_fb_;
479}
480
481MTLSamplerState gpu::MTLTexture::get_sampler_state()
482{
483 MTLSamplerState sampler_state;
484 sampler_state.state = this->sampler_state;
485 /* Add more parameters as needed */
486 return sampler_state;
487}
488
490 int mip, int offset[3], int extent[3], eGPUDataFormat type, const void *data)
491{
492 /* Fetch active context. */
494 BLI_assert(ctx);
495
496 /* Do not update texture view. */
497 BLI_assert(resource_mode_ != MTL_TEXTURE_MODE_TEXTURE_VIEW);
498
499 /* Ensure mipmaps. */
500 this->ensure_mipmaps(mip);
501
502 /* Ensure texture is baked. */
503 this->ensure_baked();
504
505 /* Safety checks. */
506 BLI_assert(mip >= mip_min_ && mip <= mip_max_);
507 BLI_assert(mip < texture_.mipmapLevelCount);
508 BLI_assert(texture_.mipmapLevelCount >= mip_max_);
509
510 /* DEPTH FLAG - Depth formats cannot use direct BLIT - pass off to their own routine which will
511 * do a depth-only render. */
512 bool is_depth_format = (format_flag_ & GPU_FORMAT_DEPTH);
513 if (is_depth_format) {
514 switch (type_) {
515
516 case GPU_TEXTURE_2D:
517 update_sub_depth_2d(mip, offset, extent, type, data);
518 return;
519 default:
521 "gpu::MTLTexture::update_sub not yet supported for other depth "
522 "configurations");
523 return;
524 }
525 }
526
527 const bool is_compressed = (format_flag_ & GPU_FORMAT_COMPRESSED);
528
529 @autoreleasepool {
530 /* Determine totalsize of INPUT Data. */
531 int num_channels = to_component_len(format_);
532 size_t input_bytes_per_pixel = to_bytesize(format_, type);
533 size_t totalsize = 0;
534
535 /* If unpack row length is used, size of input data uses the unpack row length, rather than the
536 * image length. */
537 size_t expected_update_w = ((ctx->pipeline_state.unpack_row_length == 0) ?
538 extent[0] :
540
541 /* Ensure calculated total size isn't larger than remaining image data size. */
542 if (is_compressed) {
543 /* Calculate size requirement for incoming compressed texture data. */
544 totalsize = ((expected_update_w + 3) / 4) * ((extent[1] + 3) / 4) * to_block_size(format_);
545 }
546 else {
547 switch (this->dimensions_count()) {
548 case 1:
549 totalsize = input_bytes_per_pixel * max_ulul(expected_update_w, 1);
550 break;
551 case 2:
552 totalsize = input_bytes_per_pixel * max_ulul(expected_update_w, 1) * (size_t)extent[1];
553 break;
554 case 3:
555 totalsize = input_bytes_per_pixel * max_ulul(expected_update_w, 1) * (size_t)extent[1] *
556 (size_t)extent[2];
557 break;
558 default:
559 BLI_assert(false);
560 break;
561 }
562 }
563
564 /* Early exit if update size is zero. update_sub sometimes has a zero-sized
565 * extent when called from texture painting. */
566 if (totalsize <= 0 || extent[0] <= 0) {
568 "MTLTexture::update_sub called with extent size of zero for one or more dimensions. "
569 "(%d, %d, %d) - DimCount: %u",
570 extent[0],
571 extent[1],
572 extent[2],
573 this->dimensions_count());
574 return;
575 }
576
577 /* When unpack row length is used, provided data does not necessarily contain padding for last
578 * row, so we only include up to the end of updated data. */
579 if (ctx->pipeline_state.unpack_row_length > 0) {
581 totalsize -= (ctx->pipeline_state.unpack_row_length - extent[0]) * input_bytes_per_pixel;
582 }
583
584 /* Check */
585 BLI_assert(totalsize > 0);
586
587 /* Determine expected destination data size. */
588 MTLPixelFormat destination_format = gpu_texture_format_to_metal(format_);
589 size_t expected_dst_bytes_per_pixel = get_mtl_format_bytesize(destination_format);
590 int destination_num_channels = get_mtl_format_num_components(destination_format);
591
592 /* Prepare specialization struct (For texture update routine). */
593 TextureUpdateRoutineSpecialisation compute_specialization_kernel = {
594 tex_data_format_to_msl_type_str(type), /* INPUT DATA FORMAT */
595 tex_data_format_to_msl_texture_template_type(type), /* TEXTURE DATA FORMAT */
596 num_channels,
597 destination_num_channels,
598 false /* Not a clear. */
599 };
600
601 /* Determine whether we can do direct BLIT or not. For compressed textures,
602 * always assume a direct blit (input data pretends to be float, but it is
603 * not). */
604 bool can_use_direct_blit = true;
605 if (!is_compressed && (expected_dst_bytes_per_pixel != input_bytes_per_pixel ||
606 num_channels != destination_num_channels))
607 {
608 can_use_direct_blit = false;
609 }
610
611 if (is_depth_format) {
613 /* Workaround for crash in validation layer when blitting to sampler2DDepth target with
614 * dimensions (1, 1, 1); */
615 if (extent[0] == 1 && extent[1] == 1 && extent[2] == 1 && totalsize == 4) {
616 can_use_direct_blit = false;
617 }
618 }
619 }
620
621 if (format_ == GPU_SRGB8_A8 && !can_use_direct_blit) {
623 "SRGB data upload does not work correctly using compute upload. "
624 "texname '%s'",
625 name_);
626 }
627
628 /* Safety Checks. */
629 if (type == GPU_DATA_UINT_24_8 || type == GPU_DATA_10_11_11_REV ||
630 type == GPU_DATA_2_10_10_10_REV || is_compressed)
631 {
632 BLI_assert(can_use_direct_blit &&
633 "Special input data type must be a 1-1 mapping with destination texture as it "
634 "cannot easily be split");
635 }
636
637 /* Debug and verification. */
638 if (!can_use_direct_blit) {
639 /* Check mip compatibility. */
640 if (mip != 0) {
642 "Updating texture layers other than mip=0 when data is mismatched is not "
643 "possible in METAL on macOS using texture->write\n");
644 return;
645 }
646
647 /* Check Format write-ability. */
648 if (mtl_format_get_writeable_view_format(destination_format) == MTLPixelFormatInvalid) {
650 "Updating texture -- destination MTLPixelFormat '%d' does not support write "
651 "operations, and no suitable TextureView format exists.\n",
652 *(int *)(&destination_format));
653 return;
654 }
655 }
656
657 /* Common Properties. */
658 MTLPixelFormat compatible_write_format = mtl_format_get_writeable_view_format(
659 destination_format);
660
661 /* Some texture formats are not writeable so we need to use a texture view. */
662 if (compatible_write_format == MTLPixelFormatInvalid) {
663 MTL_LOG_ERROR("Cannot use compute update blit with texture-view format: %d\n",
664 *((int *)&compatible_write_format));
665 return;
666 }
667
668 /* Fetch allocation from memory pool. */
670 totalsize, true, data);
671 id<MTLBuffer> staging_buffer = temp_allocation->get_metal_buffer();
672 BLI_assert(staging_buffer != nil);
673
674 /* Prepare command encoders. */
675 id<MTLBlitCommandEncoder> blit_encoder = nil;
676 id<MTLComputeCommandEncoder> compute_encoder = nil;
677 id<MTLTexture> staging_texture = nil;
678 id<MTLTexture> texture_handle = nil;
679
680 /* Use staging texture. */
681 bool use_staging_texture = false;
682
683 if (can_use_direct_blit) {
685 BLI_assert(blit_encoder != nil);
686
687 /* If we need to use a texture view to write texture data as the source
688 * format is unwritable, if our texture has not been initialized with
689 * texture view support, use a staging texture. */
690 if ((compatible_write_format != destination_format) &&
691 !(internal_gpu_image_usage_flags_ & GPU_TEXTURE_USAGE_FORMAT_VIEW))
692 {
693 use_staging_texture = true;
694 }
695 }
696 else {
697 compute_encoder = ctx->main_command_buffer.ensure_begin_compute_encoder();
698 BLI_assert(compute_encoder != nil);
699
700 /* For compute, we should use a stating texture to avoid texture write usage,
701 * if it has not been specified for the texture. Using shader-write disables
702 * lossless texture compression, so this is best to avoid where possible. */
703 if (!(internal_gpu_image_usage_flags_ & GPU_TEXTURE_USAGE_SHADER_WRITE)) {
704 use_staging_texture = true;
705 }
706 if (compatible_write_format != destination_format) {
707 if (!(internal_gpu_image_usage_flags_ & GPU_TEXTURE_USAGE_FORMAT_VIEW)) {
708 use_staging_texture = true;
709 }
710 }
711 }
712
713 /* Allocate stating texture if needed. */
714 if (use_staging_texture) {
715 /* Create staging texture to avoid shader-write limiting optimization. */
716 BLI_assert(texture_descriptor_ != nullptr);
717 MTLTextureUsage original_usage = texture_descriptor_.usage;
718 texture_descriptor_.usage = original_usage | MTLTextureUsageShaderWrite |
719 MTLTextureUsagePixelFormatView;
720 staging_texture = [ctx->device newTextureWithDescriptor:texture_descriptor_];
721 staging_texture.label = @"Staging texture";
722 texture_descriptor_.usage = original_usage;
723
724 /* Create texture view if needed. */
725 texture_handle = ((compatible_write_format == destination_format)) ?
726 [staging_texture retain] :
727 [staging_texture newTextureViewWithPixelFormat:compatible_write_format];
728 }
729 else {
730 /* Use texture view. */
731 if (compatible_write_format != destination_format) {
732 BLI_assert(internal_gpu_image_usage_flags_ & GPU_TEXTURE_USAGE_FORMAT_VIEW);
733 texture_handle = [texture_ newTextureViewWithPixelFormat:compatible_write_format];
734 }
735 else {
736 texture_handle = texture_;
737 [texture_handle retain];
738 }
739 }
740
741 switch (type_) {
742
743 /* 1D */
744 case GPU_TEXTURE_1D:
746 if (can_use_direct_blit) {
747 /* Use Blit based update. */
748 size_t bytes_per_row = expected_dst_bytes_per_pixel *
749 ((ctx->pipeline_state.unpack_row_length == 0) ?
750 extent[0] :
752 size_t bytes_per_image = bytes_per_row;
753 if (is_compressed) {
754 size_t block_size = to_block_size(format_);
755 size_t blocks_x = divide_ceil_u(extent[0], 4);
756 bytes_per_row = blocks_x * block_size;
757 bytes_per_image = bytes_per_row;
758 }
759 int max_array_index = ((type_ == GPU_TEXTURE_1D_ARRAY) ? extent[1] : 1);
760 for (int array_index = 0; array_index < max_array_index; array_index++) {
761
762 size_t buffer_array_offset = (bytes_per_image * (size_t)array_index);
763 [blit_encoder
764 copyFromBuffer:staging_buffer
765 sourceOffset:buffer_array_offset
766 sourceBytesPerRow:bytes_per_row
767 sourceBytesPerImage:bytes_per_image
768 sourceSize:MTLSizeMake(extent[0], 1, 1)
769 toTexture:texture_handle
770 destinationSlice:((type_ == GPU_TEXTURE_1D_ARRAY) ? (array_index + offset[1]) :
771 0)
772 destinationLevel:mip
773 destinationOrigin:MTLOriginMake(offset[0], 0, 0)];
774 }
775 }
776 else {
777 /* Use Compute Based update. */
778 if (type_ == GPU_TEXTURE_1D) {
779 id<MTLComputePipelineState> pso = texture_update_1d_get_kernel(
780 compute_specialization_kernel);
781 TextureUpdateParams params = {mip,
782 {extent[0], 1, 1},
783 {offset[0], 0, 0},
784 ((ctx->pipeline_state.unpack_row_length == 0) ?
785 extent[0] :
787
788 /* Bind resources via compute state for optimal state caching performance. */
790 cs.bind_pso(pso);
791 cs.bind_compute_bytes(&params, sizeof(params), 0);
792 cs.bind_compute_buffer(staging_buffer, 0, 1);
793 cs.bind_compute_texture(texture_handle, 0);
794 [compute_encoder
795 dispatchThreads:MTLSizeMake(extent[0], 1, 1) /* Width, Height, Layer */
796 threadsPerThreadgroup:MTLSizeMake(64, 1, 1)];
797 }
798 else if (type_ == GPU_TEXTURE_1D_ARRAY) {
799 id<MTLComputePipelineState> pso = texture_update_1d_array_get_kernel(
800 compute_specialization_kernel);
801 TextureUpdateParams params = {mip,
802 {extent[0], extent[1], 1},
803 {offset[0], offset[1], 0},
804 ((ctx->pipeline_state.unpack_row_length == 0) ?
805 extent[0] :
807
808 /* Bind resources via compute state for optimal state caching performance. */
810 cs.bind_pso(pso);
811 cs.bind_compute_bytes(&params, sizeof(params), 0);
812 cs.bind_compute_buffer(staging_buffer, 0, 1);
813 cs.bind_compute_texture(texture_handle, 0);
814 [compute_encoder
815 dispatchThreads:MTLSizeMake(extent[0], extent[1], 1) /* Width, layers, nil */
816 threadsPerThreadgroup:MTLSizeMake(8, 8, 1)];
817 }
818 }
819 } break;
820
821 /* 2D */
822 case GPU_TEXTURE_2D:
824 if (can_use_direct_blit) {
825 /* Use Blit encoder update. */
826 size_t bytes_per_row = expected_dst_bytes_per_pixel *
827 ((ctx->pipeline_state.unpack_row_length == 0) ?
828 extent[0] :
830 size_t bytes_per_image = bytes_per_row * extent[1];
831 if (is_compressed) {
832 size_t block_size = to_block_size(format_);
833 size_t blocks_x = divide_ceil_u(extent[0], 4);
834 size_t blocks_y = divide_ceil_u(extent[1], 4);
835 bytes_per_row = blocks_x * block_size;
836 bytes_per_image = bytes_per_row * blocks_y;
837 }
838
839 size_t texture_array_relative_offset = 0;
840 int base_slice = (type_ == GPU_TEXTURE_2D_ARRAY) ? offset[2] : 0;
841 int final_slice = base_slice + ((type_ == GPU_TEXTURE_2D_ARRAY) ? extent[2] : 1);
842
843 for (int array_slice = base_slice; array_slice < final_slice; array_slice++) {
844
845 if (array_slice > 0) {
847 BLI_assert(array_slice < d_);
848 }
849
850 [blit_encoder copyFromBuffer:staging_buffer
851 sourceOffset:texture_array_relative_offset
852 sourceBytesPerRow:bytes_per_row
853 sourceBytesPerImage:bytes_per_image
854 sourceSize:MTLSizeMake(extent[0], extent[1], 1)
855 toTexture:texture_handle
856 destinationSlice:array_slice
857 destinationLevel:mip
858 destinationOrigin:MTLOriginMake(offset[0], offset[1], 0)];
859
860 texture_array_relative_offset += bytes_per_image;
861 }
862 }
863 else {
864 /* Use Compute texture update. */
865 if (type_ == GPU_TEXTURE_2D) {
866 id<MTLComputePipelineState> pso = texture_update_2d_get_kernel(
867 compute_specialization_kernel);
868 TextureUpdateParams params = {mip,
869 {extent[0], extent[1], 1},
870 {offset[0], offset[1], 0},
871 ((ctx->pipeline_state.unpack_row_length == 0) ?
872 extent[0] :
874
875 /* Bind resources via compute state for optimal state caching performance. */
877 cs.bind_pso(pso);
878 cs.bind_compute_bytes(&params, sizeof(params), 0);
879 cs.bind_compute_buffer(staging_buffer, 0, 1);
880 cs.bind_compute_texture(texture_handle, 0);
881 [compute_encoder
882 dispatchThreads:MTLSizeMake(
883 extent[0], extent[1], 1) /* Width, Height, Layer */
884 threadsPerThreadgroup:MTLSizeMake(8, 8, 1)];
885 }
886 else if (type_ == GPU_TEXTURE_2D_ARRAY) {
887 id<MTLComputePipelineState> pso = texture_update_2d_array_get_kernel(
888 compute_specialization_kernel);
889 TextureUpdateParams params = {mip,
890 {extent[0], extent[1], extent[2]},
891 {offset[0], offset[1], offset[2]},
892 ((ctx->pipeline_state.unpack_row_length == 0) ?
893 extent[0] :
895
896 /* Bind resources via compute state for optimal state caching performance. */
898 cs.bind_pso(pso);
899 cs.bind_compute_bytes(&params, sizeof(params), 0);
900 cs.bind_compute_buffer(staging_buffer, 0, 1);
901 cs.bind_compute_texture(texture_handle, 0);
902 [compute_encoder dispatchThreads:MTLSizeMake(extent[0],
903 extent[1],
904 extent[2]) /* Width, Height, Layer */
905 threadsPerThreadgroup:MTLSizeMake(4, 4, 4)];
906 }
907 }
908
909 } break;
910
911 /* 3D */
912 case GPU_TEXTURE_3D: {
913 if (can_use_direct_blit) {
914 size_t bytes_per_row = expected_dst_bytes_per_pixel *
915 ((ctx->pipeline_state.unpack_row_length == 0) ?
916 extent[0] :
918 size_t bytes_per_image = bytes_per_row * extent[1];
919 [blit_encoder copyFromBuffer:staging_buffer
920 sourceOffset:0
921 sourceBytesPerRow:bytes_per_row
922 sourceBytesPerImage:bytes_per_image
923 sourceSize:MTLSizeMake(extent[0], extent[1], extent[2])
924 toTexture:texture_handle
925 destinationSlice:0
926 destinationLevel:mip
927 destinationOrigin:MTLOriginMake(offset[0], offset[1], offset[2])];
928 }
929 else {
930 id<MTLComputePipelineState> pso = texture_update_3d_get_kernel(
931 compute_specialization_kernel);
932 TextureUpdateParams params = {mip,
933 {extent[0], extent[1], extent[2]},
934 {offset[0], offset[1], offset[2]},
935 ((ctx->pipeline_state.unpack_row_length == 0) ?
936 extent[0] :
938
939 /* Bind resources via compute state for optimal state caching performance. */
941 cs.bind_pso(pso);
942 cs.bind_compute_bytes(&params, sizeof(params), 0);
943 cs.bind_compute_buffer(staging_buffer, 0, 1);
944 cs.bind_compute_texture(texture_handle, 0);
945 [compute_encoder
946 dispatchThreads:MTLSizeMake(
947 extent[0], extent[1], extent[2]) /* Width, Height, Depth */
948 threadsPerThreadgroup:MTLSizeMake(4, 4, 4)];
949 }
950 } break;
951
952 /* CUBE */
953 case GPU_TEXTURE_CUBE: {
954 if (can_use_direct_blit) {
955 size_t bytes_per_row = expected_dst_bytes_per_pixel *
956 ((ctx->pipeline_state.unpack_row_length == 0) ?
957 extent[0] :
959 size_t bytes_per_image = bytes_per_row * extent[1];
960 size_t texture_array_relative_offset = 0;
961
962 /* Iterate over all cube faces in range (offset[2], offset[2] + extent[2]). */
963 for (int i = 0; i < extent[2]; i++) {
964 int face_index = offset[2] + i;
965
966 [blit_encoder copyFromBuffer:staging_buffer
967 sourceOffset:texture_array_relative_offset
968 sourceBytesPerRow:bytes_per_row
969 sourceBytesPerImage:bytes_per_image
970 sourceSize:MTLSizeMake(extent[0], extent[1], 1)
971 toTexture:texture_handle
972 destinationSlice:face_index /* = cubeFace+arrayIndex*6 */
973 destinationLevel:mip
974 destinationOrigin:MTLOriginMake(offset[0], offset[1], 0)];
975 texture_array_relative_offset += bytes_per_image;
976 }
977 }
978 else {
980 "TODO(Metal): Support compute texture update for GPU_TEXTURE_CUBE %d, %d, %d\n",
981 w_,
982 h_,
983 d_);
984 }
985 } break;
986
988 if (can_use_direct_blit) {
989
990 size_t bytes_per_row = expected_dst_bytes_per_pixel *
991 ((ctx->pipeline_state.unpack_row_length == 0) ?
992 extent[0] :
994 size_t bytes_per_image = bytes_per_row * extent[1];
995
996 /* Upload to all faces between offset[2] (which is zero in most cases) AND extent[2]. */
997 size_t texture_array_relative_offset = 0;
998 for (int i = 0; i < extent[2]; i++) {
999 int face_index = offset[2] + i;
1000 [blit_encoder copyFromBuffer:staging_buffer
1001 sourceOffset:texture_array_relative_offset
1002 sourceBytesPerRow:bytes_per_row
1003 sourceBytesPerImage:bytes_per_image
1004 sourceSize:MTLSizeMake(extent[0], extent[1], 1)
1005 toTexture:texture_handle
1006 destinationSlice:face_index /* = cubeFace+arrayIndex*6. */
1007 destinationLevel:mip
1008 destinationOrigin:MTLOriginMake(offset[0], offset[1], 0)];
1009 texture_array_relative_offset += bytes_per_image;
1010 }
1011 }
1012 else {
1014 "TODO(Metal): Support compute texture update for GPU_TEXTURE_CUBE_ARRAY %d, %d, "
1015 "%d\n",
1016 w_,
1017 h_,
1018 d_);
1019 }
1020 } break;
1021
1022 case GPU_TEXTURE_BUFFER: {
1023 /* TODO(Metal): Support Data upload to TEXTURE BUFFER
1024 * Data uploads generally happen via VertBuf instead. */
1025 BLI_assert(false);
1026 } break;
1027
1028 case GPU_TEXTURE_ARRAY:
1029 /* Not an actual format - modifier flag for others. */
1030 return;
1031 }
1032
1033 /* If staging texture was used, copy contents to original texture. */
1034 if (use_staging_texture) {
1035 /* When using staging texture, copy results into existing texture. */
1036 BLI_assert(staging_texture != nil);
1037 blit_encoder = ctx->main_command_buffer.ensure_begin_blit_encoder();
1038
1039 /* Copy modified staging texture region back to original texture.
1040 * Differing blit dimensions based on type. */
1041 switch (type_) {
1042 case GPU_TEXTURE_1D:
1043 case GPU_TEXTURE_1D_ARRAY: {
1044 int base_slice = (type_ == GPU_TEXTURE_1D_ARRAY) ? offset[1] : 0;
1045 int final_slice = base_slice + ((type_ == GPU_TEXTURE_1D_ARRAY) ? extent[1] : 1);
1046 for (int array_index = base_slice; array_index < final_slice; array_index++) {
1047 [blit_encoder copyFromTexture:staging_texture
1048 sourceSlice:array_index
1049 sourceLevel:mip
1050 sourceOrigin:MTLOriginMake(offset[0], 0, 0)
1051 sourceSize:MTLSizeMake(extent[0], 1, 1)
1052 toTexture:texture_
1053 destinationSlice:array_index
1054 destinationLevel:mip
1055 destinationOrigin:MTLOriginMake(offset[0], 0, 0)];
1056 }
1057 } break;
1058 case GPU_TEXTURE_2D:
1059 case GPU_TEXTURE_2D_ARRAY: {
1060 int base_slice = (type_ == GPU_TEXTURE_2D_ARRAY) ? offset[2] : 0;
1061 int final_slice = base_slice + ((type_ == GPU_TEXTURE_2D_ARRAY) ? extent[2] : 1);
1062 for (int array_index = base_slice; array_index < final_slice; array_index++) {
1063 [blit_encoder copyFromTexture:staging_texture
1064 sourceSlice:array_index
1065 sourceLevel:mip
1066 sourceOrigin:MTLOriginMake(offset[0], offset[1], 0)
1067 sourceSize:MTLSizeMake(extent[0], extent[1], 1)
1068 toTexture:texture_
1069 destinationSlice:array_index
1070 destinationLevel:mip
1071 destinationOrigin:MTLOriginMake(offset[0], offset[1], 0)];
1072 }
1073 } break;
1074 case GPU_TEXTURE_3D: {
1075 [blit_encoder copyFromTexture:staging_texture
1076 sourceSlice:0
1077 sourceLevel:mip
1078 sourceOrigin:MTLOriginMake(offset[0], offset[1], offset[2])
1079 sourceSize:MTLSizeMake(extent[0], extent[1], extent[2])
1080 toTexture:texture_
1081 destinationSlice:0
1082 destinationLevel:mip
1083 destinationOrigin:MTLOriginMake(offset[0], offset[1], offset[2])];
1084 } break;
1085 case GPU_TEXTURE_CUBE:
1087 /* Iterate over all cube faces in range (offset[2], offset[2] + extent[2]). */
1088 for (int i = 0; i < extent[2]; i++) {
1089 int face_index = offset[2] + i;
1090 [blit_encoder copyFromTexture:staging_texture
1091 sourceSlice:face_index
1092 sourceLevel:mip
1093 sourceOrigin:MTLOriginMake(offset[0], offset[1], 0)
1094 sourceSize:MTLSizeMake(extent[0], extent[1], 1)
1095 toTexture:texture_
1096 destinationSlice:face_index
1097 destinationLevel:mip
1098 destinationOrigin:MTLOriginMake(offset[0], offset[1], 0)];
1099 }
1100 } break;
1101 case GPU_TEXTURE_ARRAY:
1102 case GPU_TEXTURE_BUFFER:
1104 break;
1105 }
1106
1107 [staging_texture release];
1108 }
1109
1110 /* Finalize Blit Encoder. */
1111 if (can_use_direct_blit) {
1112 /* Textures which use MTLStorageModeManaged need to have updated contents
1113 * synced back to CPU to avoid an automatic flush overwriting contents. */
1114 if (texture_.storageMode == MTLStorageModeManaged) {
1115 [blit_encoder synchronizeResource:texture_];
1116 }
1117 [blit_encoder optimizeContentsForGPUAccess:texture_];
1118 }
1119 else {
1120 /* Textures which use MTLStorageModeManaged need to have updated contents
1121 * synced back to CPU to avoid an automatic flush overwriting contents. */
1122 blit_encoder = ctx->main_command_buffer.ensure_begin_blit_encoder();
1123 if (texture_.storageMode == MTLStorageModeManaged) {
1124
1125 [blit_encoder synchronizeResource:texture_];
1126 }
1127 [blit_encoder optimizeContentsForGPUAccess:texture_];
1128 }
1129
1130 /* Decrement texture reference counts. This ensures temporary texture views are released. */
1131 [texture_handle release];
1132
1133 ctx->main_command_buffer.submit(false);
1134
1135 /* Release temporary staging buffer allocation.
1136 * NOTE: Allocation will be tracked with command submission and released once no longer in use.
1137 */
1138 temp_allocation->free();
1139 }
1140}
1141
1142void MTLTexture::update_sub(int offset[3],
1143 int extent[3],
1145 GPUPixelBuffer *pixbuf)
1146{
1147 /* Update texture from pixel buffer. */
1149 BLI_assert(pixbuf != nullptr);
1150
1151 /* Fetch pixel buffer metal buffer. */
1152 MTLPixelBuffer *mtl_pix_buf = static_cast<MTLPixelBuffer *>(unwrap(pixbuf));
1153 id<MTLBuffer> buffer = mtl_pix_buf->get_metal_buffer();
1154 BLI_assert(buffer != nil);
1155 if (buffer == nil) {
1156 return;
1157 }
1158
1159 /* Ensure texture is ready. */
1160 this->ensure_baked();
1161 BLI_assert(texture_ != nil);
1162
1163 /* Calculate dimensions. */
1164 int num_image_channels = to_component_len(format_);
1165
1166 size_t bits_per_pixel = num_image_channels * to_bytesize(format);
1167 size_t bytes_per_row = bits_per_pixel * extent[0];
1168 size_t bytes_per_image = bytes_per_row * extent[1];
1169
1170 /* Currently only required for 2D textures. */
1171 if (type_ == GPU_TEXTURE_2D) {
1172
1173 /* Create blit command encoder to copy data. */
1174 MTLContext *ctx = MTLContext::get();
1175 BLI_assert(ctx);
1176
1177 id<MTLBlitCommandEncoder> blit_encoder = ctx->main_command_buffer.ensure_begin_blit_encoder();
1178 [blit_encoder copyFromBuffer:buffer
1179 sourceOffset:0
1180 sourceBytesPerRow:bytes_per_row
1181 sourceBytesPerImage:bytes_per_image
1182 sourceSize:MTLSizeMake(extent[0], extent[1], 1)
1183 toTexture:texture_
1184 destinationSlice:0
1185 destinationLevel:0
1186 destinationOrigin:MTLOriginMake(offset[0], offset[1], 0)];
1187
1188 if (texture_.storageMode == MTLStorageModeManaged) {
1189 [blit_encoder synchronizeResource:texture_];
1190 }
1191 [blit_encoder optimizeContentsForGPUAccess:texture_];
1192 }
1193 else {
1194 BLI_assert(false);
1195 }
1196}
1197
1198void gpu::MTLTexture::ensure_mipmaps(int miplvl)
1199{
1200
1201 /* Do not update texture view. */
1202 BLI_assert(resource_mode_ != MTL_TEXTURE_MODE_TEXTURE_VIEW);
1203
1204 /* Clamp level to maximum. */
1205 int effective_h = (type_ == GPU_TEXTURE_1D_ARRAY) ? 0 : h_;
1206 int effective_d = (type_ != GPU_TEXTURE_3D) ? 0 : d_;
1207 int max_dimension = max_iii(w_, effective_h, effective_d);
1208 int max_miplvl = floor(log2(max_dimension));
1209 miplvl = min_ii(max_miplvl, miplvl);
1210
1211 /* Increase mipmap level. */
1212 if (mipmaps_ < miplvl) {
1213 mipmaps_ = miplvl;
1214
1215 /* Check if baked. */
1216 if (is_baked_ && mipmaps_ > mtl_max_mips_) {
1217 BLI_assert_msg(false,
1218 "Texture requires a higher mipmap level count. Please specify the required "
1219 "amount upfront.");
1220 is_dirty_ = true;
1221 MTL_LOG_WARNING("Texture requires regenerating due to increase in mip-count");
1222 }
1223 }
1224 this->mip_range_set(0, mipmaps_);
1225}
1226
1228{
1229 /* Compressed textures allow users to provide their own custom mipmaps. And
1230 * we can't generate them at runtime anyway. */
1232 return;
1233 }
1234
1235 /* Fetch Active Context. */
1236 MTLContext *ctx = MTLContext::get();
1237 BLI_assert(ctx);
1238
1239 if (!ctx->device) {
1240 MTL_LOG_ERROR("Cannot Generate mip-maps -- metal device invalid\n");
1241 BLI_assert(false);
1242 return;
1243 }
1244
1245 /* Ensure mipmaps. */
1246 this->ensure_mipmaps(mtl_max_mips_);
1247
1248 /* Ensure texture is baked. */
1249 this->ensure_baked();
1250 BLI_assert_msg(is_baked_ && texture_, "MTLTexture is not valid");
1251
1252 if (mipmaps_ == 1 || mtl_max_mips_ == 1) {
1253 /* Nothing to do. */
1254 return;
1255 }
1256
1257 /* Verify if we can perform mipmap generation. */
1261 {
1262 MTL_LOG_WARNING("Cannot generate mipmaps for textures using DEPTH formats");
1263 return;
1264 }
1265
1266 @autoreleasepool {
1267 /* Fetch active BlitCommandEncoder. */
1268 id<MTLBlitCommandEncoder> enc = ctx->main_command_buffer.ensure_begin_blit_encoder();
1269 if (G.debug & G_DEBUG_GPU) {
1270 [enc insertDebugSignpost:@"Generate MipMaps"];
1271 }
1272 [enc generateMipmapsForTexture:texture_];
1273 has_generated_mips_ = true;
1274 }
1275}
1276
1278{
1279 /* Safety Checks. */
1280 gpu::MTLTexture *mt_src = this;
1281 gpu::MTLTexture *mt_dst = static_cast<gpu::MTLTexture *>(dst);
1282 BLI_assert((mt_dst->w_ == mt_src->w_) && (mt_dst->h_ == mt_src->h_) &&
1283 (mt_dst->d_ == mt_src->d_));
1284 BLI_assert(mt_dst->format_ == mt_src->format_);
1285 BLI_assert(mt_dst->type_ == mt_src->type_);
1286
1287 UNUSED_VARS_NDEBUG(mt_src);
1288
1289 /* Fetch active context. */
1290 MTLContext *ctx = MTLContext::get();
1291 BLI_assert(ctx);
1292
1293 /* Ensure texture is baked. */
1294 this->ensure_baked();
1295
1296 @autoreleasepool {
1297 /* Setup blit encoder. */
1298 id<MTLBlitCommandEncoder> blit_encoder = ctx->main_command_buffer.ensure_begin_blit_encoder();
1299 BLI_assert(blit_encoder != nil);
1300
1301 /* TODO(Metal): Consider supporting multiple mip levels IF the GL implementation
1302 * follows, currently it does not. */
1303 int mip = 0;
1304
1305 /* NOTE: mip_size_get() won't override any dimension that is equal to 0. */
1306 int extent[3] = {1, 1, 1};
1307 this->mip_size_get(mip, extent);
1308
1309 switch (mt_dst->type_) {
1312 case GPU_TEXTURE_3D: {
1313 /* Do full texture copy for 3D textures */
1314 BLI_assert(mt_dst->d_ == d_);
1315 [blit_encoder copyFromTexture:this->get_metal_handle_base()
1316 toTexture:mt_dst->get_metal_handle_base()];
1317 [blit_encoder optimizeContentsForGPUAccess:mt_dst->get_metal_handle_base()];
1318 } break;
1319 default: {
1320 int slice = 0;
1321 this->blit(blit_encoder,
1322 0,
1323 0,
1324 0,
1325 slice,
1326 mip,
1327 mt_dst,
1328 0,
1329 0,
1330 0,
1331 slice,
1332 mip,
1333 extent[0],
1334 extent[1],
1335 extent[2]);
1336 } break;
1337 }
1338 }
1339}
1340
1341void gpu::MTLTexture::clear(eGPUDataFormat data_format, const void *data)
1342{
1343 /* Ensure texture is baked. */
1344 this->ensure_baked();
1345
1346 /* If render-pass clear is not supported, use compute-based clear. */
1347 bool do_render_pass_clear = true;
1349 do_render_pass_clear = false;
1350 }
1351 /* If texture is buffer-backed, clear directly on buffer.
1352 * NOTE: This us currently only true for fallback atomic textures. */
1353 if (backing_buffer_ != nullptr) {
1354 uint channel_len = to_component_len(format_);
1355 uint channel_size = to_bytesize(data_format);
1356 bool fast_buf_clear = true;
1357 const uchar *val = reinterpret_cast<const uchar *>(data);
1358 for (int i = 1; i < channel_size * channel_len; i++) {
1359 fast_buf_clear = fast_buf_clear && (val[i] == val[0]);
1360 }
1361 if (fast_buf_clear) {
1362 /* Fetch active context. */
1363 MTLContext *ctx = MTLContext::get();
1364 BLI_assert(ctx);
1365
1366 /* Begin compute encoder. */
1367 id<MTLBlitCommandEncoder> blit_encoder =
1369 [blit_encoder fillBuffer:backing_buffer_->get_metal_buffer()
1370 range:NSMakeRange(0, backing_buffer_->get_size())
1371 value:val[0]];
1372 }
1373 else {
1374 BLI_assert_msg(false,
1375 "Non-repeating-byte-pattern clear for buffer-backed textures not supported!");
1376 }
1377 return;
1378 }
1379
1380 if (do_render_pass_clear) {
1381 /* Create clear frame-buffer for fast clear. */
1382 GPUFrameBuffer *prev_fb = GPU_framebuffer_active_get();
1383 FrameBuffer *fb = unwrap(this->get_blit_framebuffer(-1, 0));
1384 fb->bind(true);
1385 fb->clear_attachment(this->attachment_type(0), data_format, data);
1386 GPU_framebuffer_bind(prev_fb);
1387 }
1388 else {
1390 /* Prepare specialization struct (For texture clear routine). */
1391 int num_channels = to_component_len(format_);
1392 TextureUpdateRoutineSpecialisation compute_specialization_kernel = {
1393 tex_data_format_to_msl_type_str(data_format), /* INPUT DATA FORMAT */
1394 tex_data_format_to_msl_texture_template_type(data_format), /* TEXTURE DATA FORMAT */
1395 num_channels,
1396 num_channels,
1397 true /* Operation is a clear. */
1398 };
1399
1400 /* Determine size of source data clear. */
1401 uint clear_data_size = to_bytesize(format_, data_format);
1402
1403 /* Fetch active context. */
1404 MTLContext *ctx = MTLContext::get();
1405 BLI_assert(ctx);
1406
1407 /* Determine writeable texture handle. */
1408 id<MTLTexture> texture_handle = texture_;
1409
1410 /* Begin compute encoder. */
1411 id<MTLComputeCommandEncoder> compute_encoder =
1413
1414 /* Perform clear operation based on texture type. */
1415 switch (type_) {
1416 case GPU_TEXTURE_1D: {
1417 id<MTLComputePipelineState> pso = texture_update_1d_get_kernel(
1418 compute_specialization_kernel);
1419 TextureUpdateParams params = {0,
1420 {w_, 1, 1},
1421 {0, 0, 0},
1422 ((ctx->pipeline_state.unpack_row_length == 0) ?
1423 w_ :
1425
1426 /* Bind resources via compute state for optimal state caching performance. */
1428 cs.bind_pso(pso);
1429 cs.bind_compute_bytes(&params, sizeof(params), 0);
1430 cs.bind_compute_bytes(data, clear_data_size, 1);
1431 cs.bind_compute_texture(texture_handle, 0);
1432 [compute_encoder dispatchThreads:MTLSizeMake(w_, 1, 1) /* Width, Height, Layer */
1433 threadsPerThreadgroup:MTLSizeMake(64, 1, 1)];
1434 } break;
1435 case GPU_TEXTURE_1D_ARRAY: {
1436 id<MTLComputePipelineState> pso = texture_update_1d_array_get_kernel(
1437 compute_specialization_kernel);
1438 TextureUpdateParams params = {0,
1439 {w_, h_, 1},
1440 {0, 0, 0},
1441 ((ctx->pipeline_state.unpack_row_length == 0) ?
1442 w_ :
1444
1445 /* Bind resources via compute state for optimal state caching performance. */
1447 cs.bind_pso(pso);
1448 cs.bind_compute_bytes(&params, sizeof(params), 0);
1449 cs.bind_compute_bytes(data, clear_data_size, 1);
1450 cs.bind_compute_texture(texture_handle, 0);
1451 [compute_encoder dispatchThreads:MTLSizeMake(w_, h_, 1) /* Width, layers, nil */
1452 threadsPerThreadgroup:MTLSizeMake(8, 8, 1)];
1453 } break;
1454 default: {
1456 "gpu::MTLTexture::clear requires compute pass for texture"
1457 "type: %d, but this is not yet supported",
1458 (int)type_);
1459 } break;
1460 }
1461
1462 /* Textures which use MTLStorageModeManaged need to have updated contents
1463 * synced back to CPU to avoid an automatic flush overwriting contents. */
1464 id<MTLBlitCommandEncoder> blit_encoder = ctx->main_command_buffer.ensure_begin_blit_encoder();
1465 if (texture_.storageMode == MTLStorageModeManaged) {
1466 [blit_encoder synchronizeResource:texture_];
1467 }
1468 [blit_encoder optimizeContentsForGPUAccess:texture_];
1469 }
1470}
1471static MTLTextureSwizzle swizzle_to_mtl(const char swizzle)
1472{
1473 switch (swizzle) {
1474 default:
1475 case 'x':
1476 case 'r':
1477 return MTLTextureSwizzleRed;
1478 case 'y':
1479 case 'g':
1480 return MTLTextureSwizzleGreen;
1481 case 'z':
1482 case 'b':
1483 return MTLTextureSwizzleBlue;
1484 case 'w':
1485 case 'a':
1486 return MTLTextureSwizzleAlpha;
1487 case '0':
1488 return MTLTextureSwizzleZero;
1489 case '1':
1490 return MTLTextureSwizzleOne;
1491 }
1492}
1493
1494void gpu::MTLTexture::swizzle_set(const char swizzle_mask[4])
1495{
1496 if (memcmp(tex_swizzle_mask_, swizzle_mask, 4) != 0) {
1497 memcpy(tex_swizzle_mask_, swizzle_mask, 4);
1498
1499 /* Creating the swizzle mask and flagging as dirty if changed. */
1500 MTLTextureSwizzleChannels new_swizzle_mask = MTLTextureSwizzleChannelsMake(
1501 swizzle_to_mtl(swizzle_mask[0]),
1502 swizzle_to_mtl(swizzle_mask[1]),
1503 swizzle_to_mtl(swizzle_mask[2]),
1504 swizzle_to_mtl(swizzle_mask[3]));
1505
1506 mtl_swizzle_mask_ = new_swizzle_mask;
1507 texture_view_dirty_flags_ |= TEXTURE_VIEW_SWIZZLE_DIRTY;
1508 }
1509}
1510
1512{
1514
1515 /* NOTE:
1516 * - mip_min_ and mip_max_ are used to Clamp LODs during sampling.
1517 * - Given functions like Framebuffer::recursive_downsample modifies the mip range
1518 * between each layer, we do not want to be re-baking the texture.
1519 * - For the time being, we are going to just need to generate a FULL mipmap chain
1520 * as we do not know ahead of time whether mipmaps will be used.
1521 *
1522 * TODO(Metal): Add texture initialization flag to determine whether mipmaps are used
1523 * or not. Will be important for saving memory for big textures. */
1524 mip_min_ = min;
1525 mip_max_ = max;
1526
1528 max > 1)
1529 {
1530
1532 " MTLTexture of type TEXTURE_1D_ARRAY or TEXTURE_BUFFER cannot have a mipcount "
1533 "greater than 1\n");
1534 mip_min_ = 0;
1535 mip_max_ = 0;
1536 mipmaps_ = 0;
1537 BLI_assert(false);
1538 }
1539
1540 /* Mip range for texture view. */
1541 mip_texture_base_level_ = mip_min_;
1542 mip_texture_max_level_ = mip_max_;
1543 texture_view_dirty_flags_ |= TEXTURE_VIEW_MIP_DIRTY;
1544}
1545
1547{
1548 /* Prepare Array for return data. */
1550 BLI_assert(mip <= mipmaps_);
1552
1553 /* NOTE: mip_size_get() won't override any dimension that is equal to 0. */
1554 int extent[3] = {1, 1, 1};
1555 this->mip_size_get(mip, extent);
1556
1557 size_t sample_len = extent[0] * max_ii(extent[1], 1) * max_ii(extent[2], 1);
1558 size_t sample_size = to_bytesize(format_, type);
1559 size_t texture_size = sample_len * sample_size;
1560 int num_channels = to_component_len(format_);
1561
1562 void *data = MEM_mallocN(texture_size + 8, "GPU_texture_read");
1563
1564 /* Ensure texture is baked. */
1565 if (is_baked_) {
1566 this->read_internal(
1567 mip, 0, 0, 0, extent[0], extent[1], extent[2], type, num_channels, texture_size + 8, data);
1568 }
1569 else {
1570 /* Clear return values? */
1571 MTL_LOG_WARNING("MTLTexture::read - reading from texture with no image data");
1572 }
1573
1574 return data;
1575}
1576
1577/* Fetch the raw buffer data from a texture and copy to CPU host ptr. */
1578void gpu::MTLTexture::read_internal(int mip,
1579 int x_off,
1580 int y_off,
1581 int z_off,
1582 int width,
1583 int height,
1584 int depth,
1585 eGPUDataFormat desired_output_format,
1586 int num_output_components,
1587 size_t debug_data_size,
1588 void *r_data)
1589{
1590 /* Verify textures are baked. */
1591 if (!is_baked_) {
1592 MTL_LOG_WARNING("gpu::MTLTexture::read_internal - Trying to read from a non-baked texture!");
1593 return;
1594 }
1595 /* Fetch active context. */
1596 MTLContext *ctx = MTLContext::get();
1597 BLI_assert(ctx);
1598
1599 /* Calculate Desired output size. */
1600 int num_channels = to_component_len(format_);
1601 BLI_assert(num_output_components <= num_channels);
1602 size_t desired_output_bpp = num_output_components * to_bytesize(desired_output_format);
1603
1604 /* Calculate Metal data output for trivial copy. */
1605 size_t image_bpp = get_mtl_format_bytesize(texture_.pixelFormat);
1606 uint image_components = get_mtl_format_num_components(texture_.pixelFormat);
1607 bool is_depth_format = (format_flag_ & GPU_FORMAT_DEPTH);
1608
1609 /* Verify if we need to use compute read. */
1610 eGPUDataFormat data_format = to_data_format(this->format_get());
1611 bool format_conversion_needed = (data_format != desired_output_format);
1612 bool can_use_simple_read = (desired_output_bpp == image_bpp) && (!format_conversion_needed) &&
1613 (num_output_components == image_components);
1614
1615 /* Depth must be read using the compute shader -- Some safety checks to verify that params are
1616 * correct. */
1617 if (is_depth_format) {
1618 can_use_simple_read = false;
1619 /* TODO(Metal): Stencil data write not yet supported, so force components to one. */
1620 image_components = 1;
1621 BLI_assert(num_output_components == 1);
1622 BLI_assert(image_components == 1);
1623 BLI_assert(data_format == GPU_DATA_FLOAT || data_format == GPU_DATA_UINT_24_8);
1624 BLI_assert(validate_data_format(format_, data_format));
1625 }
1626
1627 /* SPECIAL Workaround for R11G11B10, GPU_RGB10_A2, GPU_RGB10_A2UI textures requesting a read
1628 * using: GPU_DATA_10_11_11_REV. */
1629 if (desired_output_format == GPU_DATA_10_11_11_REV ||
1630 desired_output_format == GPU_DATA_2_10_10_10_REV)
1631 {
1632 BLI_assert(format_ == GPU_R11F_G11F_B10F || format_ == GPU_RGB10_A2 ||
1633 format_ == GPU_RGB10_A2UI);
1634
1635 /* override parameters - we'll be able to use simple copy, as bpp will match at 4 bytes. */
1636 image_bpp = sizeof(int);
1637 image_components = 1;
1638 desired_output_bpp = sizeof(int);
1639 num_output_components = 1;
1640
1641 data_format = GPU_DATA_INT;
1642 format_conversion_needed = false;
1643 can_use_simple_read = true;
1644 }
1645
1646 /* Determine size of output data. */
1647 size_t bytes_per_row = desired_output_bpp * width;
1648 size_t bytes_per_image = bytes_per_row * height;
1649 size_t total_bytes = bytes_per_image * max_ii(depth, 1);
1650
1651 if (can_use_simple_read) {
1652 /* DEBUG check that if direct copy is being used, then both the expected output size matches
1653 * the METAL texture size. */
1654 BLI_assert(
1655 ((num_output_components * to_bytesize(desired_output_format)) == desired_output_bpp) &&
1656 (desired_output_bpp == image_bpp));
1657 }
1658 /* DEBUG check that the allocated data size matches the bytes we expect. */
1659 BLI_assert(total_bytes <= debug_data_size);
1660 UNUSED_VARS_NDEBUG(debug_data_size);
1661
1662 /* Fetch allocation from scratch buffer. */
1663 gpu::MTLBuffer *dest_buf = MTLContext::get_global_memory_manager()->allocate_aligned(
1664 total_bytes, 256, true);
1665 BLI_assert(dest_buf != nullptr);
1666
1667 id<MTLBuffer> destination_buffer = dest_buf->get_metal_buffer();
1668 BLI_assert(destination_buffer != nil);
1669 void *destination_buffer_host_ptr = dest_buf->get_host_ptr();
1670 BLI_assert(destination_buffer_host_ptr != nullptr);
1671
1672 /* Prepare specialization struct (For non-trivial texture read routine). */
1673 int depth_format_mode = 0;
1674 if (is_depth_format) {
1675 depth_format_mode = 1;
1676 switch (desired_output_format) {
1677 case GPU_DATA_FLOAT:
1678 depth_format_mode = 1;
1679 break;
1680 case GPU_DATA_UINT_24_8:
1681 depth_format_mode = 2;
1682 break;
1683 case GPU_DATA_UINT:
1684 depth_format_mode = 4;
1685 break;
1686 default:
1687 BLI_assert_msg(false, "Unhandled depth read format case");
1688 break;
1689 }
1690 }
1691
1692 TextureReadRoutineSpecialisation compute_specialization_kernel = {
1693 tex_data_format_to_msl_texture_template_type(data_format), /* TEXTURE DATA TYPE */
1694 tex_data_format_to_msl_type_str(desired_output_format), /* OUTPUT DATA TYPE */
1695 num_channels, /* TEXTURE COMPONENT COUNT */
1696 num_output_components, /* OUTPUT DATA COMPONENT COUNT */
1697 depth_format_mode};
1698
1699 bool copy_successful = false;
1700 @autoreleasepool {
1701
1702 /* TODO(Metal): Verify whether we need some form of barrier here to ensure reads
1703 * happen after work with associated texture is finished. */
1704 GPU_finish();
1705
1707 id<MTLTexture> read_texture = texture_;
1708 /* Use texture-view handle if reading from a GPU texture view. */
1709 if (resource_mode_ == MTL_TEXTURE_MODE_TEXTURE_VIEW) {
1710 read_texture = this->get_metal_handle();
1711 }
1712 /* Create Texture View for SRGB special case to bypass internal type conversion. */
1713 if (format_ == GPU_SRGB8_A8) {
1714 BLI_assert(internal_gpu_image_usage_flags_ & GPU_TEXTURE_USAGE_FORMAT_VIEW);
1715 read_texture = [read_texture newTextureViewWithPixelFormat:MTLPixelFormatRGBA8Unorm];
1716 }
1717
1718 /* Perform per-texture type read. */
1719 switch (type_) {
1720 case GPU_TEXTURE_1D: {
1721 if (can_use_simple_read) {
1722 /* Use Blit Encoder READ. */
1723 id<MTLBlitCommandEncoder> enc = ctx->main_command_buffer.ensure_begin_blit_encoder();
1724 if (G.debug & G_DEBUG_GPU) {
1725 [enc insertDebugSignpost:@"GPUTextureRead1D"];
1726 }
1727 [enc copyFromTexture:read_texture
1728 sourceSlice:0
1729 sourceLevel:mip
1730 sourceOrigin:MTLOriginMake(x_off, 0, 0)
1731 sourceSize:MTLSizeMake(width, 1, 1)
1732 toBuffer:destination_buffer
1733 destinationOffset:0
1734 destinationBytesPerRow:bytes_per_row
1735 destinationBytesPerImage:bytes_per_image];
1736 copy_successful = true;
1737 }
1738 else {
1739
1740 /* Use Compute READ. */
1741 id<MTLComputeCommandEncoder> compute_encoder =
1742 ctx->main_command_buffer.ensure_begin_compute_encoder();
1743 id<MTLComputePipelineState> pso = texture_read_1d_get_kernel(
1744 compute_specialization_kernel);
1745 TextureReadParams params = {
1746 mip,
1747 {width, 1, 1},
1748 {x_off, 0, 0},
1749 };
1750
1751 /* Bind resources via compute state for optimal state caching performance. */
1752 MTLComputeState &cs = ctx->main_command_buffer.get_compute_state();
1753 cs.bind_pso(pso);
1754 cs.bind_compute_bytes(&params, sizeof(params), 0);
1755 cs.bind_compute_buffer(destination_buffer, 0, 1);
1756 cs.bind_compute_texture(read_texture, 0);
1757 [compute_encoder dispatchThreads:MTLSizeMake(width, 1, 1) /* Width, Height, Layer */
1758 threadsPerThreadgroup:MTLSizeMake(8, 8, 1)];
1759 copy_successful = true;
1760 }
1761 } break;
1762
1763 case GPU_TEXTURE_1D_ARRAY: {
1764 if (can_use_simple_read) {
1765 /* Use Blit Encoder READ. */
1766 id<MTLBlitCommandEncoder> enc = ctx->main_command_buffer.ensure_begin_blit_encoder();
1767 if (G.debug & G_DEBUG_GPU) {
1768 [enc insertDebugSignpost:@"GPUTextureRead1DArray"];
1769 }
1770
1771 int base_slice = y_off;
1772 int final_slice = base_slice + height;
1773 size_t texture_array_relative_offset = 0;
1774
1775 for (int array_slice = base_slice; array_slice < final_slice; array_slice++) {
1776 [enc copyFromTexture:read_texture
1777 sourceSlice:base_slice
1778 sourceLevel:mip
1779 sourceOrigin:MTLOriginMake(x_off, 0, 0)
1780 sourceSize:MTLSizeMake(width, 1, 1)
1781 toBuffer:destination_buffer
1782 destinationOffset:texture_array_relative_offset
1783 destinationBytesPerRow:bytes_per_row
1784 destinationBytesPerImage:bytes_per_row];
1785 texture_array_relative_offset += bytes_per_row;
1786 }
1787 copy_successful = true;
1788 }
1789 else {
1790 /* Use Compute READ. */
1791 id<MTLComputeCommandEncoder> compute_encoder =
1792 ctx->main_command_buffer.ensure_begin_compute_encoder();
1793 id<MTLComputePipelineState> pso = texture_read_1d_array_get_kernel(
1794 compute_specialization_kernel);
1795 TextureReadParams params = {
1796 mip,
1797 {width, height, 1},
1798 {x_off, y_off, 0},
1799 };
1800
1801 /* Bind resources via compute state for optimal state caching performance. */
1802 MTLComputeState &cs = ctx->main_command_buffer.get_compute_state();
1803 cs.bind_pso(pso);
1804 cs.bind_compute_bytes(&params, sizeof(params), 0);
1805 cs.bind_compute_buffer(destination_buffer, 0, 1);
1806 cs.bind_compute_texture(read_texture, 0);
1807 [compute_encoder dispatchThreads:MTLSizeMake(width, height, 1) /* Width, Height, Layer */
1808 threadsPerThreadgroup:MTLSizeMake(8, 8, 1)];
1809 copy_successful = true;
1810 }
1811 } break;
1812
1813 case GPU_TEXTURE_2D: {
1814 if (can_use_simple_read) {
1815 /* Use Blit Encoder READ. */
1816 id<MTLBlitCommandEncoder> enc = ctx->main_command_buffer.ensure_begin_blit_encoder();
1817 if (G.debug & G_DEBUG_GPU) {
1818 [enc insertDebugSignpost:@"GPUTextureRead2D"];
1819 }
1820 [enc copyFromTexture:read_texture
1821 sourceSlice:0
1822 sourceLevel:mip
1823 sourceOrigin:MTLOriginMake(x_off, y_off, 0)
1824 sourceSize:MTLSizeMake(width, height, 1)
1825 toBuffer:destination_buffer
1826 destinationOffset:0
1827 destinationBytesPerRow:bytes_per_row
1828 destinationBytesPerImage:bytes_per_image];
1829 copy_successful = true;
1830 }
1831 else {
1832
1833 /* Use Compute READ. */
1834 id<MTLComputeCommandEncoder> compute_encoder =
1835 ctx->main_command_buffer.ensure_begin_compute_encoder();
1836 id<MTLComputePipelineState> pso = texture_read_2d_get_kernel(
1837 compute_specialization_kernel);
1838 TextureReadParams params = {
1839 mip,
1840 {width, height, 1},
1841 {x_off, y_off, 0},
1842 };
1843
1844 /* Bind resources via compute state for optimal state caching performance. */
1845 MTLComputeState &cs = ctx->main_command_buffer.get_compute_state();
1846 cs.bind_pso(pso);
1847 cs.bind_compute_bytes(&params, sizeof(params), 0);
1848 cs.bind_compute_buffer(destination_buffer, 0, 1);
1849 cs.bind_compute_texture(read_texture, 0);
1850 [compute_encoder dispatchThreads:MTLSizeMake(width, height, 1) /* Width, Height, Layer */
1851 threadsPerThreadgroup:MTLSizeMake(8, 8, 1)];
1852 copy_successful = true;
1853 }
1854 } break;
1855
1856 case GPU_TEXTURE_2D_ARRAY: {
1857 if (can_use_simple_read) {
1858 /* Use Blit Encoder READ. */
1859 id<MTLBlitCommandEncoder> enc = ctx->main_command_buffer.ensure_begin_blit_encoder();
1860 if (G.debug & G_DEBUG_GPU) {
1861 [enc insertDebugSignpost:@"GPUTextureRead2DArray"];
1862 }
1863 int base_slice = z_off;
1864 int final_slice = base_slice + depth;
1865 size_t texture_array_relative_offset = 0;
1866
1867 for (int array_slice = base_slice; array_slice < final_slice; array_slice++) {
1868 [enc copyFromTexture:read_texture
1869 sourceSlice:array_slice
1870 sourceLevel:mip
1871 sourceOrigin:MTLOriginMake(x_off, y_off, 0)
1872 sourceSize:MTLSizeMake(width, height, 1)
1873 toBuffer:destination_buffer
1874 destinationOffset:texture_array_relative_offset
1875 destinationBytesPerRow:bytes_per_row
1876 destinationBytesPerImage:bytes_per_image];
1877 texture_array_relative_offset += bytes_per_image;
1878 }
1879 copy_successful = true;
1880 }
1881 else {
1882
1883 /* Use Compute READ */
1884 id<MTLComputeCommandEncoder> compute_encoder =
1885 ctx->main_command_buffer.ensure_begin_compute_encoder();
1886 id<MTLComputePipelineState> pso = texture_read_2d_array_get_kernel(
1887 compute_specialization_kernel);
1888 TextureReadParams params = {
1889 mip,
1890 {width, height, depth},
1891 {x_off, y_off, z_off},
1892 };
1893
1894 /* Bind resources via compute state for optimal state caching performance. */
1895 MTLComputeState &cs = ctx->main_command_buffer.get_compute_state();
1896 cs.bind_pso(pso);
1897 cs.bind_compute_bytes(&params, sizeof(params), 0);
1898 cs.bind_compute_buffer(destination_buffer, 0, 1);
1899 cs.bind_compute_texture(read_texture, 0);
1900 [compute_encoder
1901 dispatchThreads:MTLSizeMake(width, height, depth) /* Width, Height, Layer */
1902 threadsPerThreadgroup:MTLSizeMake(8, 8, 1)];
1903 copy_successful = true;
1904 }
1905 } break;
1906
1907 case GPU_TEXTURE_3D: {
1908 if (can_use_simple_read) {
1909 /* Use Blit Encoder READ. */
1910 id<MTLBlitCommandEncoder> enc = ctx->main_command_buffer.ensure_begin_blit_encoder();
1911 if (G.debug & G_DEBUG_GPU) {
1912 [enc insertDebugSignpost:@"GPUTextureRead3D"];
1913 }
1914 [enc copyFromTexture:read_texture
1915 sourceSlice:0
1916 sourceLevel:mip
1917 sourceOrigin:MTLOriginMake(x_off, y_off, z_off)
1918 sourceSize:MTLSizeMake(width, height, depth)
1919 toBuffer:destination_buffer
1920 destinationOffset:0
1921 destinationBytesPerRow:bytes_per_row
1922 destinationBytesPerImage:bytes_per_image];
1923 copy_successful = true;
1924 }
1925 else {
1926
1927 /* Use Compute READ. */
1928 id<MTLComputeCommandEncoder> compute_encoder =
1929 ctx->main_command_buffer.ensure_begin_compute_encoder();
1930 id<MTLComputePipelineState> pso = texture_read_3d_get_kernel(
1931 compute_specialization_kernel);
1932 TextureReadParams params = {
1933 mip,
1934 {width, height, depth},
1935 {x_off, y_off, z_off},
1936 };
1937
1938 /* Bind resources via compute state for optimal state caching performance. */
1939 MTLComputeState &cs = ctx->main_command_buffer.get_compute_state();
1940 cs.bind_pso(pso);
1941 cs.bind_compute_bytes(&params, sizeof(params), 0);
1942 cs.bind_compute_buffer(destination_buffer, 0, 1);
1943 cs.bind_compute_texture(read_texture, 0);
1944 [compute_encoder
1945 dispatchThreads:MTLSizeMake(width, height, depth) /* Width, Height, Layer */
1946 threadsPerThreadgroup:MTLSizeMake(4, 4, 4)];
1947 copy_successful = true;
1948 }
1949 } break;
1950
1951 case GPU_TEXTURE_CUBE:
1953 BLI_assert_msg(z_off == 0 || type_ == GPU_TEXTURE_CUBE_ARRAY,
1954 "z_off > 0 is only supported by TEXTURE CUBE ARRAY reads.");
1955 BLI_assert_msg(depth <= 6 || type_ == GPU_TEXTURE_CUBE_ARRAY,
1956 "depth > 6 is only supported by TEXTURE CUBE ARRAY reads. ");
1957 if (can_use_simple_read) {
1958 id<MTLBlitCommandEncoder> enc = ctx->main_command_buffer.ensure_begin_blit_encoder();
1959 if (G.debug & G_DEBUG_GPU) {
1960 [enc insertDebugSignpost:@"GPUTextureReadCubeArray"];
1961 }
1962
1963 /* NOTE: Depth should have a minimum value of 1 as we read at least one slice. */
1964 int base_slice = z_off;
1965 int final_slice = base_slice + depth;
1966 size_t texture_array_relative_offset = 0;
1967
1968 for (int array_slice = base_slice; array_slice < final_slice; array_slice++) {
1969 [enc copyFromTexture:read_texture
1970 sourceSlice:array_slice
1971 sourceLevel:mip
1972 sourceOrigin:MTLOriginMake(x_off, y_off, 0)
1973 sourceSize:MTLSizeMake(width, height, 1)
1974 toBuffer:destination_buffer
1975 destinationOffset:texture_array_relative_offset
1976 destinationBytesPerRow:bytes_per_row
1977 destinationBytesPerImage:bytes_per_image];
1978
1979 texture_array_relative_offset += bytes_per_image;
1980 }
1981 MTL_LOG_INFO("Copying texture data to buffer GPU_TEXTURE_CUBE_ARRAY");
1982 copy_successful = true;
1983 }
1984 else {
1985 MTL_LOG_ERROR("TODO(Metal): unsupported compute copy of texture cube array");
1986 }
1987 } break;
1988
1989 default:
1991 "gpu::MTLTexture::read_internal simple-copy not yet supported for texture "
1992 "type: %d",
1993 (int)type_);
1994 break;
1995 }
1996
1997 if (copy_successful) {
1998
1999 /* Use Blit encoder to synchronize results back to CPU. */
2000 if (dest_buf->get_resource_options() == MTLResourceStorageModeManaged) {
2001 id<MTLBlitCommandEncoder> enc = ctx->main_command_buffer.ensure_begin_blit_encoder();
2002 if (G.debug & G_DEBUG_GPU) {
2003 [enc insertDebugSignpost:@"GPUTextureRead-syncResource"];
2004 }
2005 [enc synchronizeResource:destination_buffer];
2006 }
2007
2008 /* Ensure GPU copy commands have completed. */
2009 GPU_finish();
2010
2011 /* Copy data from Shared Memory into ptr. */
2012 memcpy(r_data, destination_buffer_host_ptr, total_bytes);
2013 MTL_LOG_INFO("gpu::MTLTexture::read_internal success! %lu bytes read", total_bytes);
2014 }
2015 else {
2017 "gpu::MTLTexture::read_internal not yet supported for this config -- data "
2018 "format different (src %lu bytes, dst %lu bytes) (src format: %d, dst format: %d), or "
2019 "varying component counts (src %d, dst %d)",
2020 image_bpp,
2021 desired_output_bpp,
2022 (int)data_format,
2023 (int)desired_output_format,
2024 image_components,
2025 num_output_components);
2026 }
2027
2028 /* Release destination buffer. */
2029 dest_buf->free();
2030 }
2031}
2032
2033/* Remove once no longer required -- will just return 0 for now in MTL path. */
2035{
2036 return 0;
2037}
2038
2040{
2041 this->prepare_internal();
2042 /* TODO(jbakker): Other limit checks should be added as well. When a texture violates a limit it
2043 * is not backed by a texture and will crash when used. */
2044 const int limit = GPU_max_texture_3d_size();
2045 if ((type_ == GPU_TEXTURE_3D) && (w_ > limit || h_ > limit || d_ > limit)) {
2046 return false;
2047 }
2048 return true;
2049}
2050
2052{
2053 MTLPixelFormat mtl_format = gpu_texture_format_to_metal(this->format_);
2054 mtl_max_mips_ = 1;
2055 mipmaps_ = 0;
2056 this->mip_range_set(0, 0);
2057
2058 /* Create texture from VertBuf's buffer. */
2059 MTLVertBuf *mtl_vbo = static_cast<MTLVertBuf *>(vbo);
2060 mtl_vbo->bind();
2061 mtl_vbo->flag_used();
2062
2063 /* Get Metal Buffer. */
2064 id<MTLBuffer> source_buffer = mtl_vbo->get_metal_buffer();
2065 BLI_assert(source_buffer);
2066
2067 /* Verify size. */
2068 if (w_ <= 0) {
2069 MTL_LOG_WARNING("Allocating texture buffer of width 0!");
2070 w_ = 1;
2071 }
2072
2073 /* Verify Texture and vertex buffer alignment. */
2075 size_t bytes_per_pixel = get_mtl_format_bytesize(mtl_format);
2076 size_t bytes_per_row = bytes_per_pixel * w_;
2077
2078 MTLContext *mtl_ctx = MTLContext::get();
2079 uint32_t align_requirement = uint32_t(
2080 [mtl_ctx->device minimumLinearTextureAlignmentForPixelFormat:mtl_format]);
2081
2082 /* If stride is larger than bytes per pixel, but format has multiple attributes,
2083 * split attributes across several pixels. */
2084 if (format->stride > bytes_per_pixel && format->attr_len > 1) {
2085
2086 /* We need to increase the number of pixels available to store additional attributes.
2087 * First ensure that the total stride of the vertex format fits uniformly into
2088 * multiple pixels. If these sizes are different, then attributes are of differing
2089 * sizes and this operation is unsupported. */
2090 if (bytes_per_pixel * format->attr_len != format->stride) {
2091 BLI_assert_msg(false,
2092 "Cannot split attributes across multiple pixels as attribute format sizes do "
2093 "not match.");
2094 return false;
2095 }
2096
2097 /* Provide a single pixel per attribute. */
2098 /* Increase bytes per row to ensure there are enough bytes for all vertex attribute data. */
2099 bytes_per_row *= format->attr_len;
2100 BLI_assert(bytes_per_row == format->stride * w_);
2101
2102 /* Multiply width of image to provide one attribute per pixel. */
2103 w_ *= format->attr_len;
2104 BLI_assert(bytes_per_row == bytes_per_pixel * w_);
2105 BLI_assert_msg(w_ == mtl_vbo->vertex_len * format->attr_len,
2106 "Image should contain one pixel for each attribute in every vertex.");
2107 }
2108 else {
2109 /* Verify per-vertex size aligns with texture size. */
2110 BLI_assert(bytes_per_pixel == format->stride &&
2111 "Pixel format stride MUST match the texture format stride -- These being different "
2112 "is likely caused by Metal's VBO padding to a minimum of 4-bytes per-vertex."
2113 " If multiple attributes are used. Each attribute is to be packed into its own "
2114 "individual pixel when stride length is exceeded. ");
2115 }
2116
2117 /* Create texture descriptor. */
2119 texture_descriptor_ = [[MTLTextureDescriptor alloc] init];
2120 texture_descriptor_.pixelFormat = mtl_format;
2121 texture_descriptor_.textureType = MTLTextureTypeTextureBuffer;
2122 texture_descriptor_.width = w_;
2123 texture_descriptor_.height = 1;
2124 texture_descriptor_.depth = 1;
2125 texture_descriptor_.arrayLength = 1;
2126 texture_descriptor_.mipmapLevelCount = mtl_max_mips_;
2127 texture_descriptor_.usage =
2128 MTLTextureUsageShaderRead | MTLTextureUsageShaderWrite |
2129 MTLTextureUsagePixelFormatView; /* TODO(Metal): Optimize usage flags. */
2130 texture_descriptor_.storageMode = [source_buffer storageMode];
2131 texture_descriptor_.sampleCount = 1;
2132 texture_descriptor_.cpuCacheMode = [source_buffer cpuCacheMode];
2133 texture_descriptor_.hazardTrackingMode = [source_buffer hazardTrackingMode];
2134
2135 texture_ = [source_buffer
2136 newTextureWithDescriptor:texture_descriptor_
2137 offset:0
2138 bytesPerRow:ceil_to_multiple_ul(bytes_per_row, align_requirement)];
2139 aligned_w_ = bytes_per_row / bytes_per_pixel;
2140
2141 BLI_assert(texture_);
2142 texture_.label = [NSString stringWithUTF8String:this->get_name()];
2143 is_baked_ = true;
2144 is_dirty_ = false;
2145 resource_mode_ = MTL_TEXTURE_MODE_VBO;
2146
2147 /* Track Status. */
2148 vert_buffer_ = mtl_vbo;
2149 vert_buffer_mtl_ = source_buffer;
2150
2151 return true;
2152}
2153
2155 int mip_offset,
2156 int layer_offset,
2157 bool use_stencil)
2158{
2159 BLI_assert(src);
2160
2161 /* Zero initialize. */
2162 this->prepare_internal();
2163
2164 /* Flag as using texture view. */
2165 resource_mode_ = MTL_TEXTURE_MODE_TEXTURE_VIEW;
2166 source_texture_ = src;
2167 mip_texture_base_level_ = mip_offset;
2168 mip_texture_base_layer_ = layer_offset;
2169 texture_view_dirty_flags_ |= TEXTURE_VIEW_MIP_DIRTY;
2170
2171 /* Assign usage. */
2172 internal_gpu_image_usage_flags_ = GPU_texture_usage(src);
2173
2174 /* Assign texture as view. */
2175 gpu::MTLTexture *mtltex = static_cast<gpu::MTLTexture *>(unwrap(src));
2176 mtltex->ensure_baked();
2177 texture_ = mtltex->texture_;
2178 BLI_assert(texture_);
2179 [texture_ retain];
2180
2181 /* Flag texture as baked -- we do not need explicit initialization. */
2182 is_baked_ = true;
2183 is_dirty_ = false;
2184
2185 /* Stencil view support. */
2186 texture_view_stencil_ = false;
2187 if (use_stencil) {
2189 texture_view_stencil_ = true;
2190 }
2191
2192 /* Bake mip swizzle view. */
2193 bake_mip_swizzle_view();
2194 return true;
2195}
2196
2198
2199/* -------------------------------------------------------------------- */
2202
2204{
2205 return is_baked_;
2206}
2207
2208/* Prepare texture parameters after initialization, but before baking. */
2209void gpu::MTLTexture::prepare_internal()
2210{
2211 /* Take a copy of the flags so that any modifications we make won't effect the texture
2212 * cache/pool match finding test. */
2213 internal_gpu_image_usage_flags_ = gpu_image_usage_flags_;
2214
2215 /* Metal: Texture clearing is done using frame-buffer clear. This has no performance impact or
2216 * bandwidth implications for lossless compression and is considered best-practice.
2217 *
2218 * Attachment usage also required for depth-stencil attachment targets, for depth-update support.
2219 * NOTE: Emulated atomic textures cannot support render-target usage. For clearing, the backing
2220 * buffer is cleared instead.
2221 */
2222 if (!((internal_gpu_image_usage_flags_ & GPU_TEXTURE_USAGE_ATOMIC) &&
2223 !MTLBackend::get_capabilities().supports_texture_atomics))
2224 {
2225 /* Force attachment usage - see comment above. */
2226 internal_gpu_image_usage_flags_ |= GPU_TEXTURE_USAGE_ATTACHMENT;
2227 }
2228
2229 /* Derive maximum number of mip levels by default.
2230 * TODO(Metal): This can be removed if max mip counts are specified upfront. */
2231 if (type_ == GPU_TEXTURE_1D || type_ == GPU_TEXTURE_1D_ARRAY || type_ == GPU_TEXTURE_BUFFER) {
2232 mtl_max_mips_ = 1;
2233 }
2234 else {
2235 /* Require correct explicit mipmap level counts. */
2236 mtl_max_mips_ = mipmaps_;
2237 }
2238}
2239
2240void gpu::MTLTexture::ensure_baked()
2241{
2242
2243 /* If properties have changed, re-bake. */
2244 id<MTLTexture> previous_texture = nil;
2245 bool copy_previous_contents = false;
2246
2247 if (is_baked_ && is_dirty_) {
2248 copy_previous_contents = true;
2249 previous_texture = texture_;
2250 [previous_texture retain];
2251 this->reset();
2252 }
2253
2254 if (!is_baked_) {
2255 MTLContext *ctx = MTLContext::get();
2256 BLI_assert(ctx);
2257
2258 /* Ensure texture mode is valid. */
2259 BLI_assert(resource_mode_ != MTL_TEXTURE_MODE_EXTERNAL);
2260 BLI_assert(resource_mode_ != MTL_TEXTURE_MODE_TEXTURE_VIEW);
2261 BLI_assert(resource_mode_ != MTL_TEXTURE_MODE_VBO);
2262
2263 /* Format and mip levels (TODO(Metal): Optimize mipmaps counts, specify up-front). */
2264 MTLPixelFormat mtl_format = gpu_texture_format_to_metal(format_);
2265
2266 /* SRGB textures require a texture view for reading data and when rendering with SRGB
2267 * disabled. Enabling the texture_view or texture_read usage flags disables lossless
2268 * compression, so the situations in which it is used should be limited. */
2269 if (format_ == GPU_SRGB8_A8) {
2270 internal_gpu_image_usage_flags_ |= GPU_TEXTURE_USAGE_FORMAT_VIEW;
2271 }
2272
2273 /* Create texture descriptor. */
2274 switch (type_) {
2275
2276 /* 1D */
2277 case GPU_TEXTURE_1D:
2278 case GPU_TEXTURE_1D_ARRAY: {
2279 BLI_assert(w_ > 0);
2280 texture_descriptor_ = [[MTLTextureDescriptor alloc] init];
2281 texture_descriptor_.pixelFormat = mtl_format;
2282 texture_descriptor_.textureType = (type_ == GPU_TEXTURE_1D_ARRAY) ? MTLTextureType1DArray :
2283 MTLTextureType1D;
2284 texture_descriptor_.width = w_;
2285 texture_descriptor_.height = 1;
2286 texture_descriptor_.depth = 1;
2287 texture_descriptor_.arrayLength = (type_ == GPU_TEXTURE_1D_ARRAY) ? h_ : 1;
2288 texture_descriptor_.mipmapLevelCount = (mtl_max_mips_ > 0) ? mtl_max_mips_ : 1;
2289 texture_descriptor_.usage = mtl_usage_from_gpu(internal_gpu_image_usage_flags_);
2290 texture_descriptor_.storageMode = MTLStorageModePrivate;
2291 texture_descriptor_.sampleCount = 1;
2292 texture_descriptor_.cpuCacheMode = MTLCPUCacheModeDefaultCache;
2293 texture_descriptor_.hazardTrackingMode = MTLHazardTrackingModeDefault;
2294 } break;
2295
2296 /* 2D */
2297 case GPU_TEXTURE_2D:
2298 case GPU_TEXTURE_2D_ARRAY: {
2299 BLI_assert(w_ > 0 && h_ > 0);
2300 texture_descriptor_ = [[MTLTextureDescriptor alloc] init];
2301 texture_descriptor_.pixelFormat = mtl_format;
2302 texture_descriptor_.textureType = (type_ == GPU_TEXTURE_2D_ARRAY) ? MTLTextureType2DArray :
2303 MTLTextureType2D;
2304 texture_descriptor_.width = w_;
2305 texture_descriptor_.height = h_;
2306 texture_descriptor_.depth = 1;
2307 texture_descriptor_.arrayLength = (type_ == GPU_TEXTURE_2D_ARRAY) ? d_ : 1;
2308 texture_descriptor_.mipmapLevelCount = (mtl_max_mips_ > 0) ? mtl_max_mips_ : 1;
2309 texture_descriptor_.usage = mtl_usage_from_gpu(internal_gpu_image_usage_flags_);
2310 texture_descriptor_.storageMode = MTLStorageModePrivate;
2311 texture_descriptor_.sampleCount = 1;
2312 texture_descriptor_.cpuCacheMode = MTLCPUCacheModeDefaultCache;
2313 texture_descriptor_.hazardTrackingMode = MTLHazardTrackingModeDefault;
2314 } break;
2315
2316 /* 3D */
2317 case GPU_TEXTURE_3D: {
2318 BLI_assert(w_ > 0 && h_ > 0 && d_ > 0);
2319 texture_descriptor_ = [[MTLTextureDescriptor alloc] init];
2320 texture_descriptor_.pixelFormat = mtl_format;
2321 texture_descriptor_.textureType = MTLTextureType3D;
2322 texture_descriptor_.width = w_;
2323 texture_descriptor_.height = h_;
2324 texture_descriptor_.depth = d_;
2325 texture_descriptor_.arrayLength = 1;
2326 texture_descriptor_.mipmapLevelCount = (mtl_max_mips_ > 0) ? mtl_max_mips_ : 1;
2327 texture_descriptor_.usage = mtl_usage_from_gpu(internal_gpu_image_usage_flags_);
2328 texture_descriptor_.storageMode = MTLStorageModePrivate;
2329 texture_descriptor_.sampleCount = 1;
2330 texture_descriptor_.cpuCacheMode = MTLCPUCacheModeDefaultCache;
2331 texture_descriptor_.hazardTrackingMode = MTLHazardTrackingModeDefault;
2332 } break;
2333
2334 /* CUBE TEXTURES */
2335 case GPU_TEXTURE_CUBE:
2337 /* NOTE: For a cube-map 'Texture::d_' refers to total number of faces,
2338 * not just array slices. */
2339 BLI_assert(w_ > 0 && h_ > 0);
2340 texture_descriptor_ = [[MTLTextureDescriptor alloc] init];
2341 texture_descriptor_.pixelFormat = mtl_format;
2342 texture_descriptor_.textureType = (type_ == GPU_TEXTURE_CUBE_ARRAY) ?
2343 MTLTextureTypeCubeArray :
2344 MTLTextureTypeCube;
2345 texture_descriptor_.width = w_;
2346 texture_descriptor_.height = h_;
2347 texture_descriptor_.depth = 1;
2348 texture_descriptor_.arrayLength = (type_ == GPU_TEXTURE_CUBE_ARRAY) ? d_ / 6 : 1;
2349 texture_descriptor_.mipmapLevelCount = (mtl_max_mips_ > 0) ? mtl_max_mips_ : 1;
2350 texture_descriptor_.usage = mtl_usage_from_gpu(internal_gpu_image_usage_flags_);
2351 texture_descriptor_.storageMode = MTLStorageModePrivate;
2352 texture_descriptor_.sampleCount = 1;
2353 texture_descriptor_.cpuCacheMode = MTLCPUCacheModeDefaultCache;
2354 texture_descriptor_.hazardTrackingMode = MTLHazardTrackingModeDefault;
2355 } break;
2356
2357 /* GPU_TEXTURE_BUFFER */
2358 case GPU_TEXTURE_BUFFER: {
2359 texture_descriptor_ = [[MTLTextureDescriptor alloc] init];
2360 texture_descriptor_.pixelFormat = mtl_format;
2361 texture_descriptor_.textureType = MTLTextureTypeTextureBuffer;
2362 texture_descriptor_.width = w_;
2363 texture_descriptor_.height = 1;
2364 texture_descriptor_.depth = 1;
2365 texture_descriptor_.arrayLength = 1;
2366 texture_descriptor_.mipmapLevelCount = (mtl_max_mips_ > 0) ? mtl_max_mips_ : 1;
2367 texture_descriptor_.usage = mtl_usage_from_gpu(internal_gpu_image_usage_flags_);
2368 texture_descriptor_.storageMode = MTLStorageModePrivate;
2369 texture_descriptor_.sampleCount = 1;
2370 texture_descriptor_.cpuCacheMode = MTLCPUCacheModeDefaultCache;
2371 texture_descriptor_.hazardTrackingMode = MTLHazardTrackingModeDefault;
2372 } break;
2373
2374 default: {
2375 MTL_LOG_ERROR("[METAL] Error: Cannot create texture with unknown type: %d\n", type_);
2376 return;
2377 } break;
2378 }
2379
2380 /* Determine Resource Mode. */
2381 resource_mode_ = MTL_TEXTURE_MODE_DEFAULT;
2382
2383 /* Override storage mode if memoryless attachments are being used.
2384 * NOTE: Memoryless textures can only be supported on TBDR GPUs. */
2385 if (internal_gpu_image_usage_flags_ & GPU_TEXTURE_USAGE_MEMORYLESS) {
2386 const bool is_tile_based_arch = (GPU_platform_architecture() == GPU_ARCHITECTURE_TBDR);
2387 if (is_tile_based_arch) {
2388 texture_descriptor_.storageMode = MTLStorageModeMemoryless;
2389 }
2390 }
2391
2396 bool native_texture_atomics = MTLBackend::get_capabilities().supports_texture_atomics;
2397 if ((internal_gpu_image_usage_flags_ & GPU_TEXTURE_USAGE_ATOMIC) && !native_texture_atomics) {
2398
2399 /* Validate format support. */
2401 "Texture atomic fallback support is only available for GPU_TEXTURE_2D, "
2402 "GPU_TEXTURE_2D_ARRAY and GPU_TEXTURE_3D.");
2403
2404 /* Re-assign 2D resolution to encompass all texture layers.
2405 * Texture access is handled by remapping to a linear ID and using this in the destination
2406 * texture. based on original with: LinearPxID = x + y*layer_w + z*(layer_h*layer_w);
2407 * tx_2d.y = LinearPxID/2D_tex_width;
2408 * tx_2d.x = LinearPxID - (tx_2d.y*2D_tex_width); */
2410 /* Maximum 2D texture dimensions will be (16384, 16384) on all target platforms. */
2411 const uint max_width = 16384;
2412 const uint max_height = 16384;
2413 const uint pixels_res = w_ * h_ * d_;
2414
2415 uint new_w = 0, new_h = 0;
2416 if (pixels_res <= max_width) {
2417 new_w = pixels_res;
2418 new_h = 1;
2419 }
2420 else {
2421 new_w = max_width;
2422 new_h = ((pixels_res % new_w) == 0) ? (pixels_res / new_w) : ((pixels_res / new_w) + 1);
2423 }
2424
2425 texture_descriptor_.width = new_w;
2426 texture_descriptor_.height = new_h;
2427
2428 UNUSED_VARS_NDEBUG(max_height);
2429 BLI_assert_msg(texture_descriptor_.width <= max_width &&
2430 texture_descriptor_.height <= max_height,
2431 "Atomic fallback support texture is too large.");
2432 }
2433
2434 /* Allocate buffer for texture data. */
2435 size_t bytes_per_pixel = get_mtl_format_bytesize(mtl_format);
2436 size_t bytes_per_row = bytes_per_pixel * texture_descriptor_.width;
2437 size_t total_bytes = bytes_per_row * texture_descriptor_.height;
2438
2440 total_bytes, (internal_gpu_image_usage_flags_ & GPU_TEXTURE_USAGE_HOST_READ));
2441 BLI_assert(backing_buffer_ != nullptr);
2442
2443 /* NOTE: Fallback buffer-backed texture always set to Texture2D. */
2444 texture_descriptor_.textureType = MTLTextureType2D;
2445 texture_descriptor_.depth = 1;
2446 texture_descriptor_.arrayLength = 1;
2447
2448 /* Write texture dimensions to metadata. This is required to remap 2D Array/3D sample
2449 * coordinates into 2D texture space. */
2450 tex_buffer_metadata_[0] = w_;
2451 tex_buffer_metadata_[1] = h_;
2452 tex_buffer_metadata_[2] = d_;
2453
2454 /* Texture allocation with buffer as backing storage. Bytes per row must satisfy alignment
2455 * rules for device. */
2456 uint32_t align_requirement = uint32_t(
2457 [ctx->device minimumLinearTextureAlignmentForPixelFormat:mtl_format]);
2458 size_t aligned_bytes_per_row = ceil_to_multiple_ul(bytes_per_row, align_requirement);
2459 texture_ = [backing_buffer_->get_metal_buffer()
2460 newTextureWithDescriptor:texture_descriptor_
2461 offset:0
2462 bytesPerRow:aligned_bytes_per_row];
2463 /* Aligned width. */
2464 tex_buffer_metadata_[3] = bytes_per_row / bytes_per_pixel;
2465
2466#ifndef NDEBUG
2467 texture_.label = [NSString
2468 stringWithFormat:@"AtomicBufferBackedTexture_%s", this->get_name()];
2469#endif
2470 }
2471 else {
2472
2473 /* Standard texture allocation. */
2474 texture_ = [ctx->device newTextureWithDescriptor:texture_descriptor_];
2475
2476#ifndef NDEBUG
2477 if (internal_gpu_image_usage_flags_ & GPU_TEXTURE_USAGE_MEMORYLESS) {
2478 texture_.label = [NSString stringWithFormat:@"MemorylessTexture_%s", this->get_name()];
2479 }
2480 else {
2481 texture_.label = [NSString stringWithFormat:@"Texture_%s", this->get_name()];
2482 }
2483#endif
2484 }
2485
2486 BLI_assert(texture_);
2487 is_baked_ = true;
2488 is_dirty_ = false;
2489 }
2490
2491 /* Re-apply previous contents. */
2492 if (copy_previous_contents) {
2493 /* TODO(Metal): May need to copy previous contents of texture into new texture. */
2494 [previous_texture release];
2495 }
2496}
2497
2498void gpu::MTLTexture::reset()
2499{
2500 MTL_LOG_INFO("Texture %s reset. Size %d, %d, %d", this->get_name(), w_, h_, d_);
2501 /* Delete associated METAL resources. */
2502 if (texture_ != nil) {
2503 [texture_ release];
2504 texture_ = nil;
2505 is_baked_ = false;
2506 is_dirty_ = true;
2507 }
2508
2509 /* Release backing Metal buffer, if used. */
2510 if (backing_buffer_ != nullptr) {
2511 backing_buffer_->free();
2512 backing_buffer_ = nullptr;
2513 }
2514
2515 /* Release backing storage buffer, if used. */
2516 if (storage_buffer_ != nullptr) {
2517 delete storage_buffer_;
2518 storage_buffer_ = nullptr;
2519 }
2520
2521 if (texture_no_srgb_ != nil) {
2522 [texture_no_srgb_ release];
2523 texture_no_srgb_ = nil;
2524 }
2525
2526 if (mip_swizzle_view_ != nil) {
2527 [mip_swizzle_view_ release];
2528 mip_swizzle_view_ = nil;
2529 }
2530
2531 /* Blit framebuffer. */
2532 if (blit_fb_) {
2533 GPU_framebuffer_free(blit_fb_);
2534 blit_fb_ = nullptr;
2535 }
2536
2537 /* Descriptor. */
2538 if (texture_descriptor_ != nullptr) {
2539 [texture_descriptor_ release];
2540 texture_descriptor_ = nullptr;
2541 }
2542
2543 /* Reset mipmap state. */
2544 has_generated_mips_ = false;
2545
2546 BLI_assert(texture_ == nil);
2547 BLI_assert(mip_swizzle_view_ == nil);
2548}
2549
2551
2552/* -------------------------------------------------------------------- */
2556{
2558 backing_buffer_ != nullptr,
2559 "Resource must have been created as a buffer backed resource to support SSBO wrapping.");
2560 /* Ensure texture resource is up to date. */
2561 this->ensure_baked();
2562 if (storage_buffer_ == nil) {
2563 BLI_assert(texture_ != nullptr);
2564 id<MTLBuffer> backing_buffer = [texture_ buffer];
2565 BLI_assert(backing_buffer != nil);
2566 storage_buffer_ = new MTLStorageBuf(this, [backing_buffer length]);
2567 }
2568 return storage_buffer_;
2569}
2570
2571
2572/* -------------------------------------------------------------------- */
2576{
2577 return (format_ == GPU_SRGB8_A8);
2578}
2579
2580id<MTLTexture> MTLTexture::get_non_srgb_handle()
2581{
2582 id<MTLTexture> base_tex = get_metal_handle_base();
2583 BLI_assert(base_tex != nil);
2584 if (texture_no_srgb_ == nil) {
2585 texture_no_srgb_ = [base_tex newTextureViewWithPixelFormat:MTLPixelFormatRGBA8Unorm];
2586 }
2587 return texture_no_srgb_;
2588}
2589
2591/* -------------------------------------------------------------------- */
2594
2596{
2597 /* Ensure buffer satisfies the alignment of 256 bytes for copying
2598 * data between buffers and textures. As specified in:
2599 * https://developer.apple.com/metal/Metal-Feature-Set-Tables.pdf */
2600 BLI_assert(size >= 256);
2601 buffer_ = nil;
2602}
2603
2605{
2606 if (buffer_) {
2607 [buffer_ release];
2608 buffer_ = nil;
2609 }
2610}
2611
2613{
2614 /* Duplicate the existing buffer and release original to ensure we do not directly modify data
2615 * in-flight on the GPU. */
2616 MTLContext *ctx = MTLContext::get();
2617 BLI_assert(ctx);
2618 MTLResourceOptions resource_options = ([ctx->device hasUnifiedMemory]) ?
2619 MTLResourceStorageModeShared :
2620 MTLResourceStorageModeManaged;
2621
2622 if (buffer_ != nil) {
2623 id<MTLBuffer> new_buffer = [ctx->device newBufferWithBytes:[buffer_ contents]
2625 options:resource_options];
2626 [buffer_ release];
2627 buffer_ = new_buffer;
2628 }
2629 else {
2630 buffer_ = [ctx->device newBufferWithLength:size_ options:resource_options];
2631 }
2632
2633 return [buffer_ contents];
2634}
2635
2637{
2638 if (buffer_ == nil) {
2639 return;
2640 }
2641
2642 /* Ensure changes are synchronized. */
2643 if (buffer_.resourceOptions & MTLResourceStorageModeManaged) {
2644 [buffer_ didModifyRange:NSMakeRange(0, size_)];
2645 }
2646}
2647
2649{
2650 GPUPixelBufferNativeHandle native_handle;
2651
2652 /* Only supported with unified memory currently. */
2653 MTLContext *ctx = MTLContext::get();
2654 BLI_assert(ctx);
2655 if (![ctx->device hasUnifiedMemory]) {
2656 return native_handle;
2657 }
2658
2659 /* Just get pointer to unified memory. No need to unmap. */
2660 map();
2661 native_handle.handle = reinterpret_cast<int64_t>(buffer_);
2662 native_handle.size = size_;
2663
2664 return native_handle;
2665}
2666
2668{
2669 return size_;
2670}
2671
2673{
2674 return buffer_;
2675}
2676
2678
2679} // namespace blender::gpu
@ G_DEBUG_GPU
#define BLI_assert_unreachable()
Definition BLI_assert.h:93
#define BLI_assert(a)
Definition BLI_assert.h:46
#define BLI_assert_msg(a, msg)
Definition BLI_assert.h:53
MINLINE int min_ii(int a, int b)
MINLINE uint divide_ceil_u(uint a, uint b)
MINLINE int max_ii(int a, int b)
MINLINE int max_iii(int a, int b, int c)
MINLINE uint64_t ceil_to_multiple_ul(uint64_t a, uint64_t b)
unsigned char uchar
unsigned int uint
#define UNUSED_VARS_NDEBUG(...)
#define ELEM(...)
GHOST C-API function and type declarations.
#define GPU_batch_texture_bind(batch, name, tex)
Definition GPU_batch.hh:318
void GPU_batch_draw(blender::gpu::Batch *batch)
void GPU_batch_set_shader(blender::gpu::Batch *batch, GPUShader *shader, const blender::gpu::shader::SpecializationConstants *constants_state=nullptr)
blender::gpu::Batch * GPU_batch_preset_quad()
int GPU_max_texture_3d_size()
GPUFrameBuffer * GPU_framebuffer_create(const char *name)
GPUFrameBuffer * GPU_framebuffer_active_get()
void GPU_framebuffer_restore()
#define GPU_ATTACHMENT_NONE
void GPU_framebuffer_free(GPUFrameBuffer *fb)
void GPU_framebuffer_bind(GPUFrameBuffer *fb)
#define GPU_framebuffer_ensure_config(_fb,...)
#define GPU_ATTACHMENT_TEXTURE_LAYER_MIP(_texture, _layer, _mip)
@ GPU_ARCHITECTURE_TBDR
GPUArchitectureType GPU_platform_architecture()
void GPU_shader_uniform_1i(GPUShader *sh, const char *name, int value)
void GPU_shader_uniform_2f(GPUShader *sh, const char *name, float x, float y)
void GPU_face_culling(eGPUFaceCullTest culling)
Definition gpu_state.cc:47
eGPUFaceCullTest GPU_face_culling_get()
Definition gpu_state.cc:52
eGPUBlend
Definition GPU_state.hh:84
@ GPU_BLEND_NONE
Definition GPU_state.hh:85
void GPU_blend(eGPUBlend blend)
Definition gpu_state.cc:42
void GPU_scissor_test(bool enable)
Definition gpu_state.cc:188
eGPUBlend GPU_blend_get()
Definition gpu_state.cc:226
void GPU_finish()
Definition gpu_state.cc:310
void GPU_depth_mask(bool depth)
Definition gpu_state.cc:110
eGPUDepthTest GPU_depth_test_get()
Definition gpu_state.cc:244
void GPU_stencil_test(eGPUStencilTest test)
Definition gpu_state.cc:73
void GPU_stencil_write_mask_set(uint write_mask)
Definition gpu_state.cc:210
eGPUFaceCullTest
Definition GPU_state.hh:135
@ GPU_CULL_NONE
Definition GPU_state.hh:136
void GPU_stencil_reference_set(uint reference)
Definition gpu_state.cc:205
eGPUStencilTest GPU_stencil_test_get()
Definition gpu_state.cc:250
eGPUDepthTest
Definition GPU_state.hh:110
@ GPU_DEPTH_ALWAYS
Definition GPU_state.hh:112
uint GPU_stencil_mask_get()
Definition gpu_state.cc:238
eGPUStencilTest
Definition GPU_state.hh:120
@ GPU_STENCIL_ALWAYS
Definition GPU_state.hh:122
void GPU_depth_test(eGPUDepthTest test)
Definition gpu_state.cc:68
bool GPU_depth_mask_get()
Definition gpu_state.cc:287
eGPUDataFormat
@ GPU_DATA_UINT_24_8
@ GPU_DATA_INT
@ GPU_DATA_10_11_11_REV
@ GPU_DATA_UINT
@ GPU_DATA_2_10_10_10_REV
@ GPU_DATA_FLOAT
@ GPU_TEXTURE_USAGE_SHADER_WRITE
@ GPU_TEXTURE_USAGE_HOST_READ
@ GPU_TEXTURE_USAGE_MEMORYLESS
@ GPU_TEXTURE_USAGE_ATTACHMENT
@ GPU_TEXTURE_USAGE_ATOMIC
@ GPU_TEXTURE_USAGE_FORMAT_VIEW
eGPUTextureFormat
@ GPU_DEPTH32F_STENCIL8
@ GPU_SRGB8_A8
@ GPU_DEPTH24_STENCIL8
@ GPU_RGB10_A2
@ GPU_RGB10_A2UI
@ GPU_DEPTH_COMPONENT24
@ GPU_DEPTH_COMPONENT32F
@ GPU_DEPTH_COMPONENT16
@ GPU_R11F_G11F_B10F
eGPUTextureUsage GPU_texture_usage(const GPUTexture *texture)
const GPUVertFormat * GPU_vertbuf_get_format(const blender::gpu::VertBuf *verts)
BMesh const char void * data
return true
void init()
long long int int64_t
static DBVT_INLINE btScalar size(const btDbvtVolume &a)
Definition btDbvt.cpp:52
SIMD_FORCE_INLINE const btScalar & w() const
Return the w value.
Definition btQuadWord.h:119
void reset()
clear internal cached data and reset random seed
static MTLCapabilities & get_capabilities()
gpu::MTLBuffer * allocate_with_data(uint64_t size, bool cpu_visible, const void *data=nullptr)
gpu::MTLBuffer * allocate(uint64_t size, bool cpu_visible)
gpu::MTLBuffer * allocate_aligned(uint64_t size, uint alignment, bool cpu_visible)
id< MTLBuffer > get_metal_buffer() const
id< MTLComputeCommandEncoder > ensure_begin_compute_encoder()
id< MTLBlitCommandEncoder > ensure_begin_blit_encoder()
void bind_compute_bytes(const void *bytes, uint64_t length, uint index)
void bind_compute_texture(id< MTLTexture > tex, uint slot)
void bind_compute_buffer(id< MTLBuffer > buffer, uint64_t buffer_offset, uint index)
void bind_pso(id< MTLComputePipelineState > pso)
static MTLContext * get()
MTLContextGlobalShaderPipelineState pipeline_state
MTLCommandBufferManager main_command_buffer
static MTLBufferPool * get_global_memory_manager()
GPUPixelBufferNativeHandle get_native_handle() override
id< MTLBuffer > get_metal_buffer()
void * read(int mip, eGPUDataFormat type) override
void copy_to(Texture *dst) override
void update_sub(int mip, int offset[3], int extent[3], eGPUDataFormat type, const void *data) override
MTLTexture(const char *name)
void clear(eGPUDataFormat format, const void *data) override
MTLStorageBuf * get_storagebuf()
void mip_range_set(int min, int max) override
void generate_mipmap() override
bool init_internal() override
void swizzle_set(const char swizzle_mask[4]) override
uint gl_bindcode_get() const override
virtual void texture_unbind(Texture *tex)=0
eGPUTextureFormatFlag format_flag_
eGPUTextureUsage gpu_image_usage_flags_
char name_[DEBUG_NAME_LEN]
GPUAttachmentType attachment_type(int slot) const
void mip_size_get(int mip, int r_size[3]) const
Texture(const char *name)
bool init_2D(int w, int h, int layers, int mip_len, eGPUTextureFormat format)
CCL_NAMESPACE_BEGIN struct Options options
blender::gpu::Batch * quad
#define log2
#define floor
float length(VecOp< float, D >) RET
BLI_INLINE float fb(float length, float L)
uiWidgetBaseParameters params[MAX_WIDGET_BASE_BATCH]
format
void * MEM_mallocN(size_t len, const char *str)
Definition mallocn.cc:128
MINLINE unsigned long long max_ulul(unsigned long long a, unsigned long long b)
#define G(x, y, z)
#define MTL_LOG_INFO(info,...)
Definition mtl_debug.hh:49
#define MTL_LOG_WARNING(info,...)
Definition mtl_debug.hh:42
#define MTL_LOG_ERROR(info,...)
Definition mtl_debug.hh:34
std::string get_name(const VolumeGridData &grid)
size_t get_mtl_format_bytesize(MTLPixelFormat tex_format)
static Context * unwrap(GPUContext *ctx)
MTLPixelFormat gpu_texture_format_to_metal(eGPUTextureFormat tex_format)
std::string tex_data_format_to_msl_type_str(eGPUDataFormat type)
size_t to_block_size(eGPUTextureFormat data_type)
static GPUContext * wrap(Context *ctx)
constexpr bool validate_data_format(eGPUTextureFormat tex_format, eGPUDataFormat data_format)
std::string tex_data_format_to_msl_texture_template_type(eGPUDataFormat type)
MTLPixelFormat mtl_format_get_writeable_view_format(MTLPixelFormat format)
static MTLTextureSwizzle swizzle_to_mtl(const char swizzle)
int get_mtl_format_num_components(MTLPixelFormat tex_format)
int to_bytesize(const DataFormat format)
MTLTextureUsage mtl_usage_from_gpu(eGPUTextureUsage usage)
MTLTextureType to_metal_type(eGPUTextureType type)
int to_component_len(eGPUTextureFormat format)
constexpr DataFormat to_data_format(TextureFormat format)
eGPUTextureUsage gpu_usage_from_mtl(MTLTextureUsage mtl_usage)
static void init(bNodeTree *, bNode *node)
#define min(a, b)
Definition sort.cc:36
i
Definition text_draw.cc:230
max
Definition text_draw.cc:251