Blender V4.3
vk_data_conversion.cc
Go to the documentation of this file.
1/* SPDX-FileCopyrightText: 2023 Blender Authors
2 *
3 * SPDX-License-Identifier: GPL-2.0-or-later */
4
10#include "vk_device.hh"
11
13
14#include "BLI_color.hh"
15#include "BLI_math_half.hh"
16
17namespace blender::gpu {
18
19/* -------------------------------------------------------------------- */
80
82 const eGPUTextureFormat device_format)
83{
84 if (host_format != device_format) {
85 if (host_format == GPU_RGB16F && device_format == GPU_RGBA16F) {
87 }
88 if (host_format == GPU_RGB32F && device_format == GPU_RGBA32F) {
90 }
91 if (host_format == GPU_DEPTH_COMPONENT24 && device_format == GPU_DEPTH_COMPONENT32F) {
93 }
94
96 }
97
98 switch (device_format) {
99 case GPU_RGBA32F:
100 case GPU_RG32F:
101 case GPU_R32F:
104
105 case GPU_RGBA16F:
106 case GPU_RG16F:
107 case GPU_R16F:
108 case GPU_RGB16F:
110
111 case GPU_RGBA8:
112 case GPU_RG8:
113 case GPU_R8:
115
116 case GPU_RGBA8_SNORM:
117 case GPU_RGB8_SNORM:
118 case GPU_RG8_SNORM:
119 case GPU_R8_SNORM:
121
122 case GPU_RGBA16:
123 case GPU_RG16:
124 case GPU_R16:
126
127 case GPU_RGBA16_SNORM:
128 case GPU_RGB16_SNORM:
129 case GPU_RG16_SNORM:
130 case GPU_R16_SNORM:
132
133 case GPU_SRGB8_A8:
135
138
141
145 case GPU_RGBA8_DXT1:
146 case GPU_RGBA8_DXT3:
147 case GPU_RGBA8_DXT5:
148 /* Not an actual "conversion", but compressed texture upload code
149 * pretends that host data is a float. It is actually raw BCn bits. */
151
152 case GPU_RGB32F: /* GPU_RGB32F Not supported by vendors. */
153 case GPU_RGBA8UI:
154 case GPU_RGBA8I:
155 case GPU_RGBA16UI:
156 case GPU_RGBA16I:
157 case GPU_RGBA32UI:
158 case GPU_RGBA32I:
159 case GPU_RG8UI:
160 case GPU_RG8I:
161 case GPU_RG16UI:
162 case GPU_RG16I:
163 case GPU_RG32UI:
164 case GPU_RG32I:
165 case GPU_R8UI:
166 case GPU_R8I:
167 case GPU_R16UI:
168 case GPU_R16I:
169 case GPU_R32UI:
170 case GPU_R32I:
171 case GPU_RGB10_A2:
172 case GPU_RGB10_A2UI:
175 case GPU_RGB8UI:
176 case GPU_RGB8I:
177 case GPU_RGB8:
178 case GPU_RGB16UI:
179 case GPU_RGB16I:
180 case GPU_RGB16:
181 case GPU_RGB32UI:
182 case GPU_RGB32I:
183 case GPU_SRGB8:
184 case GPU_RGB9_E5:
187 }
189}
190
192{
193 switch (device_format) {
194 case GPU_RGBA32I:
195 case GPU_RG32I:
196 case GPU_R32I:
198
199 case GPU_RGBA16I:
200 case GPU_RG16I:
201 case GPU_R16I:
203
204 case GPU_RGBA8I:
205 case GPU_RG8I:
206 case GPU_R8I:
208
209 case GPU_RGBA8UI:
210 case GPU_RGBA8:
211 case GPU_RGBA16UI:
212 case GPU_RGBA16F:
213 case GPU_RGBA16:
214 case GPU_RGBA32UI:
215 case GPU_RGBA32F:
216 case GPU_RG8UI:
217 case GPU_RG8:
218 case GPU_RG16UI:
219 case GPU_RG16F:
220 case GPU_RG32UI:
221 case GPU_RG32F:
222 case GPU_RG16:
223 case GPU_R8UI:
224 case GPU_R8:
225 case GPU_R16UI:
226 case GPU_R16F:
227 case GPU_R16:
228 case GPU_R32UI:
229 case GPU_R32F:
230 case GPU_RGB10_A2:
231 case GPU_RGB10_A2UI:
235 case GPU_SRGB8_A8:
236 case GPU_RGBA8_SNORM:
237 case GPU_RGBA16_SNORM:
238 case GPU_RGB8UI:
239 case GPU_RGB8I:
240 case GPU_RGB8:
241 case GPU_RGB8_SNORM:
242 case GPU_RGB16UI:
243 case GPU_RGB16I:
244 case GPU_RGB16F:
245 case GPU_RGB16:
246 case GPU_RGB16_SNORM:
247 case GPU_RGB32UI:
248 case GPU_RGB32I:
249 case GPU_RGB32F:
250 case GPU_RG8_SNORM:
251 case GPU_RG16_SNORM:
252 case GPU_R8_SNORM:
253 case GPU_R16_SNORM:
257 case GPU_RGBA8_DXT1:
258 case GPU_RGBA8_DXT3:
259 case GPU_RGBA8_DXT5:
260 case GPU_SRGB8:
261 case GPU_RGB9_E5:
266 }
268}
269
271{
272 switch (device_format) {
273 case GPU_RGBA32UI:
274 case GPU_RG32UI:
275 case GPU_R32UI:
278
279 case GPU_RGBA16UI:
280 case GPU_RG16UI:
281 case GPU_R16UI:
282 case GPU_RGB16UI:
284
285 case GPU_RGBA8UI:
286 case GPU_RG8UI:
287 case GPU_R8UI:
289
295 case GPU_RGBA8I:
296 case GPU_RGBA8:
297 case GPU_RGBA16I:
298 case GPU_RGBA16F:
299 case GPU_RGBA16:
300 case GPU_RGBA32I:
301 case GPU_RGBA32F:
302 case GPU_RG8I:
303 case GPU_RG8:
304 case GPU_RG16I:
305 case GPU_RG16F:
306 case GPU_RG16:
307 case GPU_RG32I:
308 case GPU_RG32F:
309 case GPU_R8I:
310 case GPU_R8:
311 case GPU_R16I:
312 case GPU_R16F:
313 case GPU_R16:
314 case GPU_R32I:
315 case GPU_R32F:
316 case GPU_RGB10_A2:
317 case GPU_RGB10_A2UI:
319 case GPU_SRGB8_A8:
320 case GPU_RGBA8_SNORM:
321 case GPU_RGBA16_SNORM:
322 case GPU_RGB8UI:
323 case GPU_RGB8I:
324 case GPU_RGB8:
325 case GPU_RGB8_SNORM:
326 case GPU_RGB16I:
327 case GPU_RGB16F:
328 case GPU_RGB16:
329 case GPU_RGB16_SNORM:
330 case GPU_RGB32UI:
331 case GPU_RGB32I:
332 case GPU_RGB32F:
333 case GPU_RG8_SNORM:
334 case GPU_RG16_SNORM:
335 case GPU_R8_SNORM:
336 case GPU_R16_SNORM:
340 case GPU_RGBA8_DXT1:
341 case GPU_RGBA8_DXT3:
342 case GPU_RGBA8_DXT5:
343 case GPU_SRGB8:
344 case GPU_RGB9_E5:
347 }
349}
350
352{
353 switch (device_format) {
354 case GPU_RGBA16F:
355 case GPU_RG16F:
356 case GPU_R16F:
358
359 case GPU_RGBA8UI:
360 case GPU_RGBA8I:
361 case GPU_RGBA8:
362 case GPU_RGBA16UI:
363 case GPU_RGBA16I:
364 case GPU_RGBA16:
365 case GPU_RGBA32UI:
366 case GPU_RGBA32I:
367 case GPU_RGBA32F:
368 case GPU_RG8UI:
369 case GPU_RG8I:
370 case GPU_RG8:
371 case GPU_RG16UI:
372 case GPU_RG16I:
373 case GPU_RG16:
374 case GPU_RG32UI:
375 case GPU_RG32I:
376 case GPU_RG32F:
377 case GPU_R8UI:
378 case GPU_R8I:
379 case GPU_R8:
380 case GPU_R16UI:
381 case GPU_R16I:
382 case GPU_R16:
383 case GPU_R32UI:
384 case GPU_R32I:
385 case GPU_R32F:
386 case GPU_RGB10_A2:
387 case GPU_RGB10_A2UI:
391 case GPU_SRGB8_A8:
392 case GPU_RGBA8_SNORM:
393 case GPU_RGBA16_SNORM:
394 case GPU_RGB8UI:
395 case GPU_RGB8I:
396 case GPU_RGB8:
397 case GPU_RGB8_SNORM:
398 case GPU_RGB16UI:
399 case GPU_RGB16I:
400 case GPU_RGB16F:
401 case GPU_RGB16:
402 case GPU_RGB16_SNORM:
403 case GPU_RGB32UI:
404 case GPU_RGB32I:
405 case GPU_RGB32F:
406 case GPU_RG8_SNORM:
407 case GPU_RG16_SNORM:
408 case GPU_R8_SNORM:
409 case GPU_R16_SNORM:
413 case GPU_RGBA8_DXT1:
414 case GPU_RGBA8_DXT3:
415 case GPU_RGBA8_DXT5:
416 case GPU_SRGB8:
417 case GPU_RGB9_E5:
422 }
424}
425
427{
428 switch (device_format) {
429 case GPU_RGBA8UI:
430 case GPU_RGBA8:
431 case GPU_RG8UI:
432 case GPU_RG8:
433 case GPU_R8UI:
434 case GPU_R8:
435 case GPU_SRGB8_A8:
437
438 case GPU_RGBA8I:
439 case GPU_RGBA16UI:
440 case GPU_RGBA16I:
441 case GPU_RGBA16F:
442 case GPU_RGBA16:
443 case GPU_RGBA32UI:
444 case GPU_RGBA32I:
445 case GPU_RGBA32F:
446 case GPU_RG8I:
447 case GPU_RG16UI:
448 case GPU_RG16I:
449 case GPU_RG16F:
450 case GPU_RG16:
451 case GPU_RG32UI:
452 case GPU_RG32I:
453 case GPU_RG32F:
454 case GPU_R8I:
455 case GPU_R16UI:
456 case GPU_R16I:
457 case GPU_R16F:
458 case GPU_R16:
459 case GPU_R32UI:
460 case GPU_R32I:
461 case GPU_R32F:
462 case GPU_RGB10_A2:
463 case GPU_RGB10_A2UI:
467 case GPU_RGBA8_SNORM:
468 case GPU_RGBA16_SNORM:
469 case GPU_RGB8UI:
470 case GPU_RGB8I:
471 case GPU_RGB8:
472 case GPU_RGB8_SNORM:
473 case GPU_RGB16UI:
474 case GPU_RGB16I:
475 case GPU_RGB16F:
476 case GPU_RGB16:
477 case GPU_RGB16_SNORM:
478 case GPU_RGB32UI:
479 case GPU_RGB32I:
480 case GPU_RGB32F:
481 case GPU_RG8_SNORM:
482 case GPU_RG16_SNORM:
483 case GPU_R8_SNORM:
484 case GPU_R16_SNORM:
488 case GPU_RGBA8_DXT1:
489 case GPU_RGBA8_DXT3:
490 case GPU_RGBA8_DXT5:
491 case GPU_SRGB8:
492 case GPU_RGB9_E5:
497 }
499}
500
502{
503 if (device_format == GPU_R11F_G11F_B10F) {
505 }
507}
508
516
518 const eGPUTextureFormat host_texture_format,
519 const eGPUTextureFormat device_format)
520{
521 BLI_assert(validate_data_format(device_format, host_format));
522
523 switch (host_format) {
524 case GPU_DATA_FLOAT:
525 return type_of_conversion_float(host_texture_format, device_format);
526 case GPU_DATA_UINT:
527 return type_of_conversion_uint(device_format);
528 case GPU_DATA_INT:
529 return type_of_conversion_int(device_format);
531 return type_of_conversion_half(device_format);
532 case GPU_DATA_UBYTE:
533 return type_of_conversion_ubyte(device_format);
535 return type_of_conversion_r11g11b10(device_format);
537 return type_of_conversion_r10g10b10a2(device_format);
538
541 }
542
544}
545
547{
548#define CASE_SINGLE(a, b) \
549 case ConversionType::a##_TO_##b: \
550 return ConversionType::b##_TO_##a;
551
552#define CASE_PAIR(a, b) \
553 CASE_SINGLE(a, b) \
554 CASE_SINGLE(b, a)
555
556 switch (type) {
559
560 CASE_PAIR(FLOAT, UNORM8)
561 CASE_PAIR(FLOAT, SNORM8)
562 CASE_PAIR(FLOAT, UNORM16)
563 CASE_PAIR(FLOAT, SNORM16)
564 CASE_PAIR(FLOAT, UNORM32)
571 CASE_PAIR(FLOAT, DEPTH_COMPONENT24)
572 CASE_PAIR(UINT, DEPTH_COMPONENT24)
573 CASE_PAIR(FLOAT, B10F_G11F_R11F)
576
579 }
580
581#undef CASE_PAIR
582#undef CASE_SINGLE
583
585}
586
587/* \} */
588
589/* -------------------------------------------------------------------- */
593static uint32_t float_to_uint32_t(float value)
594{
595 union {
596 float fl;
597 uint32_t u;
598 } float_to_bits;
599 float_to_bits.fl = value;
600 return float_to_bits.u;
601}
602
603static float uint32_t_to_float(uint32_t value)
604{
605 union {
606 float fl;
607 uint32_t u;
608 } float_to_bits;
609 float_to_bits.u = value;
610 return float_to_bits.fl;
611}
612
613template<typename InnerType> struct ComponentValue {
614 InnerType value;
615};
616template<typename InnerType> struct PixelValue {
617 InnerType value;
618};
619
630/* NOTE: Vulkan stores R11_G11_B10 in reverse component order. */
631class B10F_G11G_R11F : public PixelValue<uint32_t> {};
632
633class HALF4 : public PixelValue<uint64_t> {
634 public:
636 {
637 return value & 0xffff;
638 }
639
640 void set_r(uint64_t new_value)
641 {
642 value = (value & 0xffffffffffff0000) | (new_value & 0xffff);
643 }
645 {
646 return (value >> 16) & 0xffff;
647 }
648
649 void set_g(uint64_t new_value)
650 {
651 value = (value & 0xffffffff0000ffff) | ((new_value & 0xffff) << 16);
652 }
654 {
655 return (value >> 32) & 0xffff;
656 }
657
658 void set_b(uint64_t new_value)
659 {
660 value = (value & 0xffff0000ffffffff) | ((new_value & 0xffff) << 32);
661 }
662
663 void set_a(uint64_t new_value)
664 {
665 value = (value & 0xffffffffffff) | ((new_value & 0xffff) << 48);
666 }
667};
668
669class DepthComponent24 : public ComponentValue<uint32_t> {
670 public:
671 operator uint32_t() const
672 {
673 return value;
674 }
675
677 {
678 value = new_value;
679 return *this;
680 }
681
682 /* Depth component24 are 4 bytes, but 1 isn't used. */
683 static constexpr size_t used_byte_size()
684 {
685 return 3;
686 }
687};
688
689template<typename InnerType> struct SignedNormalized {
690 static_assert(std::is_same<InnerType, uint8_t>() || std::is_same<InnerType, uint16_t>());
691 InnerType value;
692
693 static constexpr int32_t scalar()
694 {
695 return (1 << (sizeof(InnerType) * 8 - 1));
696 }
697
698 static constexpr int32_t delta()
699 {
700 return (1 << (sizeof(InnerType) * 8 - 1)) - 1;
701 }
702
703 static constexpr int32_t max()
704 {
705 return ((1 << (sizeof(InnerType) * 8)) - 1);
706 }
707};
708
709template<typename InnerType> struct UnsignedNormalized {
710 static_assert(std::is_same<InnerType, uint8_t>() || std::is_same<InnerType, uint16_t>() ||
711 std::is_same<InnerType, uint32_t>() ||
712 std::is_same<InnerType, DepthComponent24>());
713 InnerType value;
714
715 static constexpr size_t used_byte_size()
716 {
717 if constexpr (std::is_same<InnerType, DepthComponent24>()) {
718 return InnerType::used_byte_size();
719 }
720 else {
721 return sizeof(InnerType);
722 }
723 }
724
725 static constexpr uint32_t scalar()
726 {
727 if constexpr (std::is_same<InnerType, DepthComponent24>()) {
728 return (1 << (used_byte_size() * 8)) - 1;
729 }
730 else {
731 return std::numeric_limits<InnerType>::max();
732 }
733 }
734
735 static constexpr uint32_t max()
736 {
737 if constexpr (std::is_same<InnerType, DepthComponent24>()) {
738 return (1 << (used_byte_size() * 8)) - 1;
739 }
740 else {
741 return std::numeric_limits<InnerType>::max();
742 }
743 }
744};
745
746template<typename StorageType> void convert(SignedNormalized<StorageType> &dst, const F32 &src)
747{
748 static constexpr int32_t scalar = SignedNormalized<StorageType>::scalar();
749 static constexpr int32_t delta = SignedNormalized<StorageType>::delta();
750 static constexpr int32_t max = SignedNormalized<StorageType>::max();
751 dst.value = clamp_i((src.value * scalar + delta), 0, max);
752}
753
754template<typename StorageType> void convert(F32 &dst, const SignedNormalized<StorageType> &src)
755{
756 static constexpr int32_t scalar = SignedNormalized<StorageType>::scalar();
757 static constexpr int32_t delta = SignedNormalized<StorageType>::delta();
758 dst.value = float(int32_t(src.value) - delta) / scalar;
759}
760
761template<typename StorageType> void convert(UnsignedNormalized<StorageType> &dst, const F32 &src)
762{
763 static constexpr uint32_t scalar = UnsignedNormalized<StorageType>::scalar();
764 static constexpr uint32_t max = scalar;
765 /* When converting a DEPTH32F to DEPTH24 the scalar gets to large where 1.0 will wrap around and
766 * become 0. Make sure that depth 1.0 will not wrap around. Without this gpu_select_pick will
767 * fail as all depth 1.0 will occlude previous depths. */
768 dst.value = src.value >= 1.0f ? max : max_ff(src.value * float(scalar), 0.0);
769}
770
771template<typename StorageType> void convert(F32 &dst, const UnsignedNormalized<StorageType> &src)
772{
773 static constexpr uint32_t scalar = UnsignedNormalized<StorageType>::scalar();
774 dst.value = float(uint32_t(src.value)) / float(scalar);
775}
776
777template<typename StorageType>
778void convert(UnsignedNormalized<StorageType> & /*dst*/, const UI32 & /*src*/)
779{
781}
782
783template<typename StorageType> void convert(UI32 &dst, const UnsignedNormalized<StorageType> &src)
784{
785 static constexpr uint32_t scalar = UnsignedNormalized<StorageType>::scalar();
786 dst.value = uint32_t(src.value) & scalar;
787}
788
789/* Copy the contents of src to dst with out performing any actual conversion. */
790template<typename DestinationType, typename SourceType>
791void convert(DestinationType &dst, const SourceType &src)
792{
793 static_assert(std::is_same<DestinationType, UI8>() || std::is_same<DestinationType, UI16>() ||
794 std::is_same<DestinationType, UI32>() || std::is_same<DestinationType, I8>() ||
795 std::is_same<DestinationType, I16>() || std::is_same<DestinationType, I32>());
796 static_assert(std::is_same<SourceType, UI8>() || std::is_same<SourceType, UI16>() ||
797 std::is_same<SourceType, UI32>() || std::is_same<SourceType, I8>() ||
798 std::is_same<SourceType, I16>() || std::is_same<SourceType, I32>());
799 static_assert(!std::is_same<DestinationType, SourceType>());
800 dst.value = src.value;
801}
802
803static void convert(SRGBA8 &dst, const FLOAT4 &src)
804{
805 dst.value = src.value.encode();
806}
807
808static void convert(FLOAT4 &dst, const SRGBA8 &src)
809{
810 dst.value = src.value.decode();
811}
812
813static void convert(FLOAT3 &dst, const HALF4 &src)
814{
815 dst.value.x = math::half_to_float(src.get_r());
816 dst.value.y = math::half_to_float(src.get_g());
817 dst.value.z = math::half_to_float(src.get_b());
818}
819
820static void convert(HALF4 &dst, const FLOAT3 &src)
821{
822 dst.set_r(math::float_to_half(src.value.x));
823 dst.set_g(math::float_to_half(src.value.y));
824 dst.set_b(math::float_to_half(src.value.z));
825 dst.set_a(0x3c00); /* FP16 1.0 */
826}
827
828static void convert(FLOAT3 &dst, const FLOAT4 &src)
829{
830 dst.value.x = src.value.r;
831 dst.value.y = src.value.g;
832 dst.value.z = src.value.b;
833}
834
835static void convert(FLOAT4 &dst, const FLOAT3 &src)
836{
837 dst.value.r = src.value.x;
838 dst.value.g = src.value.y;
839 dst.value.b = src.value.z;
840 dst.value.a = 1.0f;
841}
842
843constexpr uint32_t MASK_10_BITS = 0b1111111111;
844constexpr uint32_t MASK_11_BITS = 0b11111111111;
845constexpr uint8_t SHIFT_B = 22;
846constexpr uint8_t SHIFT_G = 11;
847constexpr uint8_t SHIFT_R = 0;
848
858
866
867/* \} */
868
869template<typename DestinationType, typename SourceType>
871{
872 BLI_assert(src.size() == dst.size());
873 for (int64_t index : IndexRange(src.size())) {
874 convert(dst[index], src[index]);
875 }
876}
877
878template<typename DestinationType, typename SourceType>
879void convert_per_component(void *dst_memory,
880 const void *src_memory,
881 size_t buffer_size,
882 eGPUTextureFormat device_format)
883{
884 size_t total_components = to_component_len(device_format) * buffer_size;
885 Span<SourceType> src = Span<SourceType>(static_cast<const SourceType *>(src_memory),
886 total_components);
888 static_cast<DestinationType *>(dst_memory), total_components);
890}
891
892template<typename DestinationType, typename SourceType>
893void convert_per_pixel(void *dst_memory, const void *src_memory, size_t buffer_size)
894{
895 Span<SourceType> src = Span<SourceType>(static_cast<const SourceType *>(src_memory),
896 buffer_size);
898 static_cast<DestinationType *>(dst_memory), buffer_size);
900}
901
902static void convert_buffer(void *dst_memory,
903 const void *src_memory,
904 size_t buffer_size,
905 eGPUTextureFormat device_format,
906 ConversionType type)
907{
908 switch (type) {
910 return;
911
914 memcpy(dst_memory, src_memory, buffer_size * to_bytesize(device_format));
915 return;
916
918 convert_per_component<UI16, UI32>(dst_memory, src_memory, buffer_size, device_format);
919 break;
920
922 convert_per_component<UI32, UI16>(dst_memory, src_memory, buffer_size, device_format);
923 break;
924
926 convert_per_component<UI8, UI32>(dst_memory, src_memory, buffer_size, device_format);
927 break;
928
930 convert_per_component<UI32, UI8>(dst_memory, src_memory, buffer_size, device_format);
931 break;
932
934 convert_per_component<I16, I32>(dst_memory, src_memory, buffer_size, device_format);
935 break;
936
938 convert_per_component<I32, I16>(dst_memory, src_memory, buffer_size, device_format);
939 break;
940
942 convert_per_component<I8, I32>(dst_memory, src_memory, buffer_size, device_format);
943 break;
944
946 convert_per_component<I32, I8>(dst_memory, src_memory, buffer_size, device_format);
947 break;
948
951 dst_memory, src_memory, buffer_size, device_format);
952 break;
955 dst_memory, src_memory, buffer_size, device_format);
956 break;
957
960 dst_memory, src_memory, buffer_size, device_format);
961 break;
964 dst_memory, src_memory, buffer_size, device_format);
965 break;
966
969 dst_memory, src_memory, buffer_size, device_format);
970 break;
973 dst_memory, src_memory, buffer_size, device_format);
974 break;
975
978 dst_memory, src_memory, buffer_size, device_format);
979 break;
982 dst_memory, src_memory, buffer_size, device_format);
983 break;
984
987 dst_memory, src_memory, buffer_size, device_format);
988 break;
991 dst_memory, src_memory, buffer_size, device_format);
992 break;
993
995 blender::math::float_to_half_array(static_cast<const float *>(src_memory),
996 static_cast<uint16_t *>(dst_memory),
997 to_component_len(device_format) * buffer_size);
998 break;
1000 blender::math::half_to_float_array(static_cast<const uint16_t *>(src_memory),
1001 static_cast<float *>(dst_memory),
1002 to_component_len(device_format) * buffer_size);
1003 break;
1004
1006 convert_per_pixel<SRGBA8, FLOAT4>(dst_memory, src_memory, buffer_size);
1007 break;
1009 convert_per_pixel<FLOAT4, SRGBA8>(dst_memory, src_memory, buffer_size);
1010 break;
1011
1014 dst_memory, src_memory, buffer_size, device_format);
1015 break;
1018 dst_memory, src_memory, buffer_size, device_format);
1019 break;
1022 dst_memory, src_memory, buffer_size, device_format);
1023 break;
1025 convert_per_pixel<B10F_G11G_R11F, FLOAT3>(dst_memory, src_memory, buffer_size);
1026 break;
1027
1029 convert_per_pixel<FLOAT3, B10F_G11G_R11F>(dst_memory, src_memory, buffer_size);
1030 break;
1031
1033 convert_per_pixel<HALF4, FLOAT3>(dst_memory, src_memory, buffer_size);
1034 break;
1036 convert_per_pixel<FLOAT3, HALF4>(dst_memory, src_memory, buffer_size);
1037 break;
1038
1040 convert_per_pixel<FLOAT4, FLOAT3>(dst_memory, src_memory, buffer_size);
1041 break;
1043 convert_per_pixel<FLOAT3, FLOAT4>(dst_memory, src_memory, buffer_size);
1044 break;
1045 }
1046}
1047
1048/* -------------------------------------------------------------------- */
1052void convert_host_to_device(void *dst_buffer,
1053 const void *src_buffer,
1054 size_t buffer_size,
1055 eGPUDataFormat host_format,
1056 eGPUTextureFormat host_texture_format,
1057 eGPUTextureFormat device_format)
1058{
1059 ConversionType conversion_type = host_to_device(host_format, host_texture_format, device_format);
1060 BLI_assert(conversion_type != ConversionType::UNSUPPORTED);
1061 convert_buffer(dst_buffer, src_buffer, buffer_size, device_format, conversion_type);
1062}
1063
1064void convert_device_to_host(void *dst_buffer,
1065 const void *src_buffer,
1066 size_t buffer_size,
1067 eGPUDataFormat host_format,
1068 eGPUTextureFormat host_texture_format,
1069 eGPUTextureFormat device_format)
1070{
1071 ConversionType conversion_type = reversed(
1072 host_to_device(host_format, host_texture_format, device_format));
1074 "Data conversion between host_format and device_format isn't supported (yet).");
1075 convert_buffer(dst_buffer, src_buffer, buffer_size, device_format, conversion_type);
1076}
1077
1078/* \} */
1079
1080/* -------------------------------------------------------------------- */
1084static bool attribute_check(const GPUVertAttr attribute,
1085 GPUVertCompType comp_type,
1086 GPUVertFetchMode fetch_mode)
1087{
1088 return attribute.comp_type == comp_type && attribute.fetch_mode == fetch_mode;
1089}
1090
1091static bool attribute_check(const GPUVertAttr attribute, GPUVertCompType comp_type, uint comp_len)
1092{
1093 return attribute.comp_type == comp_type && attribute.comp_len == comp_len;
1094}
1095
1097{
1098 source_format_ = nullptr;
1099 device_format_ = nullptr;
1100 GPU_vertformat_clear(&converted_format_);
1101
1102 needs_conversion_ = false;
1103}
1104
1106{
1107 return device_format_ != nullptr;
1108}
1109
1111 const VKWorkarounds &workarounds)
1112{
1113 source_format_ = vertex_format;
1114 device_format_ = vertex_format;
1115
1116 update_conversion_flags(*source_format_, workarounds);
1117 if (needs_conversion_) {
1118 init_device_format(workarounds);
1119 }
1120}
1121
1123{
1125 return *device_format_;
1126}
1127
1129{
1131 return needs_conversion_;
1132}
1133
1134void VertexFormatConverter::update_conversion_flags(const GPUVertFormat &vertex_format,
1135 const VKWorkarounds &workarounds)
1136{
1137 needs_conversion_ = false;
1138
1139 for (int attr_index : IndexRange(vertex_format.attr_len)) {
1140 const GPUVertAttr &vert_attr = vertex_format.attrs[attr_index];
1141 update_conversion_flags(vert_attr, workarounds);
1142 }
1143}
1144
1145void VertexFormatConverter::update_conversion_flags(const GPUVertAttr &vertex_attribute,
1146 const VKWorkarounds &workarounds)
1147{
1148 /* I32/U32 to F32 conversion doesn't exist in vulkan. */
1149 if (vertex_attribute.fetch_mode == GPU_FETCH_INT_TO_FLOAT &&
1150 ELEM(vertex_attribute.comp_type, GPU_COMP_I32, GPU_COMP_U32))
1151 {
1152 needs_conversion_ = true;
1153 }
1154 /* r8g8b8 formats will be stored as r8g8b8a8. */
1155 else if (workarounds.vertex_formats.r8g8b8 && attribute_check(vertex_attribute, GPU_COMP_U8, 3))
1156 {
1157 needs_conversion_ = true;
1158 }
1159}
1160
1161void VertexFormatConverter::init_device_format(const VKWorkarounds &workarounds)
1162{
1163 BLI_assert(needs_conversion_);
1164 GPU_vertformat_copy(&converted_format_, *source_format_);
1165 bool needs_repack = false;
1166
1167 for (int attr_index : IndexRange(converted_format_.attr_len)) {
1168 GPUVertAttr &vert_attr = converted_format_.attrs[attr_index];
1169 make_device_compatible(vert_attr, workarounds, needs_repack);
1170 }
1171
1172 if (needs_repack) {
1173 VertexFormat_pack(&converted_format_);
1174 }
1175 device_format_ = &converted_format_;
1176}
1177
1178void VertexFormatConverter::make_device_compatible(GPUVertAttr &vertex_attribute,
1179 const VKWorkarounds &workarounds,
1180 bool &r_needs_repack) const
1181{
1182 if (vertex_attribute.fetch_mode == GPU_FETCH_INT_TO_FLOAT &&
1183 ELEM(vertex_attribute.comp_type, GPU_COMP_I32, GPU_COMP_U32))
1184 {
1185 vertex_attribute.fetch_mode = GPU_FETCH_FLOAT;
1186 vertex_attribute.comp_type = GPU_COMP_F32;
1187 }
1188 else if (workarounds.vertex_formats.r8g8b8 && attribute_check(vertex_attribute, GPU_COMP_U8, 3))
1189 {
1190 vertex_attribute.comp_len = 4;
1191 vertex_attribute.size = 4;
1192 r_needs_repack = true;
1193 }
1194}
1195
1196void VertexFormatConverter::convert(void *device_data,
1197 const void *source_data,
1198 const uint vertex_len) const
1199{
1200 BLI_assert(needs_conversion_);
1201 if (source_data != device_data) {
1202 memcpy(device_data, source_data, device_format_->stride * vertex_len);
1203 }
1204
1205 const void *source_row_data = static_cast<const uint8_t *>(source_data);
1206 void *device_row_data = static_cast<uint8_t *>(device_data);
1207 for (int vertex_index : IndexRange(vertex_len)) {
1208 UNUSED_VARS(vertex_index);
1209 convert_row(device_row_data, source_row_data);
1210 source_row_data = static_cast<const uint8_t *>(source_row_data) + source_format_->stride;
1211 device_row_data = static_cast<uint8_t *>(device_row_data) + device_format_->stride;
1212 }
1213}
1214
1215void VertexFormatConverter::convert_row(void *device_row_data, const void *source_row_data) const
1216{
1217 for (int attr_index : IndexRange(source_format_->attr_len)) {
1218 const GPUVertAttr &device_attribute = device_format_->attrs[attr_index];
1219 const GPUVertAttr &source_attribute = source_format_->attrs[attr_index];
1220 convert_attribute(device_row_data, source_row_data, device_attribute, source_attribute);
1221 }
1222}
1223
1224void VertexFormatConverter::convert_attribute(void *device_row_data,
1225 const void *source_row_data,
1226 const GPUVertAttr &device_attribute,
1227 const GPUVertAttr &source_attribute) const
1228{
1229 const void *source_attr_data = static_cast<const uint8_t *>(source_row_data) +
1230 source_attribute.offset;
1231 void *device_attr_data = static_cast<uint8_t *>(device_row_data) + device_attribute.offset;
1232 if (source_attribute.comp_len == device_attribute.comp_len &&
1233 source_attribute.comp_type == device_attribute.comp_type &&
1234 source_attribute.fetch_mode == device_attribute.fetch_mode)
1235 {
1236 /* This check is done first to improve possible branch prediction. */
1237 }
1238 else if (attribute_check(source_attribute, GPU_COMP_I32, GPU_FETCH_INT_TO_FLOAT) &&
1239 attribute_check(device_attribute, GPU_COMP_F32, GPU_FETCH_FLOAT))
1240 {
1241 for (int component : IndexRange(source_attribute.comp_len)) {
1242 const int32_t *component_in = static_cast<const int32_t *>(source_attr_data) + component;
1243 float *component_out = static_cast<float *>(device_attr_data) + component;
1244 *component_out = float(*component_in);
1245 }
1246 }
1247 else if (attribute_check(source_attribute, GPU_COMP_U32, GPU_FETCH_INT_TO_FLOAT) &&
1248 attribute_check(device_attribute, GPU_COMP_F32, GPU_FETCH_FLOAT))
1249 {
1250 for (int component : IndexRange(source_attribute.comp_len)) {
1251 const uint32_t *component_in = static_cast<const uint32_t *>(source_attr_data) + component;
1252 float *component_out = static_cast<float *>(device_attr_data) + component;
1253 *component_out = float(*component_in);
1254 }
1255 }
1256 else if (attribute_check(source_attribute, GPU_COMP_U8, 3) &&
1257 attribute_check(device_attribute, GPU_COMP_U8, 4))
1258 {
1259 const uchar3 *attr_in = static_cast<const uchar3 *>(source_attr_data);
1260 uchar4 *attr_out = static_cast<uchar4 *>(device_attr_data);
1261 *attr_out = uchar4(attr_in->x, attr_in->y, attr_in->z, 255);
1262 }
1263 else {
1265 }
1266}
1267
1268/* \} */
1269
1270} // namespace blender::gpu
#define BLI_assert_unreachable()
Definition BLI_assert.h:97
#define BLI_assert(a)
Definition BLI_assert.h:50
#define BLI_assert_msg(a, msg)
Definition BLI_assert.h:57
MINLINE float max_ff(float a, float b)
MINLINE int clamp_i(int value, int min, int max)
unsigned int uint
#define UNUSED_VARS(...)
#define ELEM(...)
eGPUDataFormat
@ GPU_DATA_HALF_FLOAT
@ GPU_DATA_UINT_24_8
@ GPU_DATA_INT
@ GPU_DATA_10_11_11_REV
@ GPU_DATA_UBYTE
@ GPU_DATA_UINT
@ GPU_DATA_2_10_10_10_REV
@ GPU_DATA_FLOAT
eGPUTextureFormat
@ GPU_RGB16
@ GPU_R16UI
@ GPU_RGB8
@ GPU_DEPTH32F_STENCIL8
@ GPU_SRGB8
@ GPU_R16I
@ GPU_SRGB8_A8
@ GPU_RG8_SNORM
@ GPU_DEPTH24_STENCIL8
@ GPU_RGB10_A2
@ GPU_RGB8I
@ GPU_R32I
@ GPU_RGBA8_SNORM
@ GPU_RGB10_A2UI
@ GPU_RG8UI
@ GPU_RGB16I
@ GPU_RGBA16_SNORM
@ GPU_RGB9_E5
@ GPU_SRGB8_A8_DXT5
@ GPU_RG8I
@ GPU_RG16I
@ GPU_RG32UI
@ GPU_RGB32I
@ GPU_RG8
@ GPU_RG32I
@ GPU_SRGB8_A8_DXT1
@ GPU_RGBA32UI
@ GPU_R8I
@ GPU_R16
@ GPU_RG16UI
@ GPU_RGBA8I
@ GPU_RGBA8_DXT1
@ GPU_RGBA8UI
@ GPU_RGB32F
@ GPU_RGBA16UI
@ GPU_RGBA16I
@ GPU_R8UI
@ GPU_RGBA16
@ GPU_SRGB8_A8_DXT3
@ GPU_RGB8_SNORM
@ GPU_RGBA8_DXT3
@ GPU_RGB32UI
@ GPU_R8_SNORM
@ GPU_RG32F
@ GPU_R8
@ GPU_RGB16_SNORM
@ GPU_DEPTH_COMPONENT24
@ GPU_RG16_SNORM
@ GPU_RGB8UI
@ GPU_RGB16F
@ GPU_RGB16UI
@ GPU_RGBA32I
@ GPU_RGBA8_DXT5
@ GPU_DEPTH_COMPONENT32F
@ GPU_R16_SNORM
@ GPU_DEPTH_COMPONENT16
GPUVertFetchMode
@ GPU_FETCH_FLOAT
@ GPU_FETCH_INT_TO_FLOAT
void GPU_vertformat_clear(GPUVertFormat *)
void GPU_vertformat_copy(GPUVertFormat *dest, const GPUVertFormat &src)
GPUVertCompType
@ GPU_COMP_F32
@ GPU_COMP_I32
@ GPU_COMP_U32
@ GPU_COMP_U8
constexpr int64_t size() const
Definition BLI_span.hh:494
constexpr int64_t size() const
Definition BLI_span.hh:253
DepthComponent24 & operator=(uint32_t new_value)
static constexpr size_t used_byte_size()
void set_g(uint64_t new_value)
void set_r(uint64_t new_value)
void set_a(uint64_t new_value)
void set_b(uint64_t new_value)
local_group_size(16, 16) .push_constant(Type b
additional_info("compositor_sum_float_shared") .push_constant(Type additional_info("compositor_sum_float_shared") .push_constant(Type GPU_RGBA32F
@ HALF
GPU_SHADER_INTERFACE_INFO(depth_2d_update_iface, "").smooth(Type fragColor push_constant(Type::VEC2, "extent") .push_constant(Type source_data
DOF_TILES_FLATTEN_GROUP_SIZE coc_tx GPU_R11F_G11F_B10F
draw_view in_light_buf[] float
out_radiance out_gbuf_normal out_gbuf_closure2 GPU_RG16
SHADOW_TILEMAP_RES tiles_buf[] statistics_buf render_view_buf[SHADOW_VIEW_MAX] GPU_R32UI
RAYTRACE_GROUP_SIZE additional_info("eevee_shared", "eevee_gbuffer_data", "eevee_global_ubo", "eevee_sampling_data", "eevee_utility_texture", "eevee_hiz_data", "draw_view") .specialization_constant(Type RAYTRACE_GROUP_SIZE in_sh_0_tx in_sh_2_tx screen_normal_tx GPU_RGBA8
void VertexFormat_pack(GPUVertFormat *format)
constexpr uint32_t MASK_10_BITS
PixelValue< ColorSceneLinearByteEncoded4b< eAlpha::Premultiplied > > SRGBA8
static ConversionType type_of_conversion_float(const eGPUTextureFormat host_format, const eGPUTextureFormat device_format)
constexpr uint8_t SHIFT_B
static ConversionType type_of_conversion_ubyte(eGPUTextureFormat device_format)
static ConversionType reversed(ConversionType type)
static ConversionType type_of_conversion_half(eGPUTextureFormat device_format)
void convert_host_to_device(void *dst_buffer, const void *src_buffer, size_t buffer_size, eGPUDataFormat host_format, eGPUTextureFormat host_texture_format, eGPUTextureFormat device_format)
void convert_per_component(void *dst_memory, const void *src_memory, size_t buffer_size, eGPUTextureFormat device_format)
static ConversionType host_to_device(const eGPUDataFormat host_format, const eGPUTextureFormat host_texture_format, const eGPUTextureFormat device_format)
static uint32_t float_to_uint32_t(float value)
void convert(SignedNormalized< StorageType > &dst, const F32 &src)
constexpr uint8_t SHIFT_R
constexpr uint8_t SHIFT_G
void convert_device_to_host(void *dst_buffer, const void *src_buffer, size_t buffer_size, eGPUDataFormat host_format, eGPUTextureFormat host_texture_format, eGPUTextureFormat device_format)
constexpr bool validate_data_format(eGPUTextureFormat tex_format, eGPUDataFormat data_format)
static ConversionType type_of_conversion_uint(eGPUTextureFormat device_format)
size_t to_bytesize(GPUIndexBufType type)
static bool attribute_check(const GPUVertAttr attribute, GPUVertCompType comp_type, GPUVertFetchMode fetch_mode)
PixelValue< ColorSceneLinear4f< eAlpha::Premultiplied > > FLOAT4
static ConversionType type_of_conversion_r10g10b10a2(eGPUTextureFormat device_format)
static void convert_buffer(void *dst_memory, const void *src_memory, size_t buffer_size, eGPUTextureFormat device_format, ConversionType type)
int to_component_len(eGPUTextureFormat format)
uint32_t convert_float_formats(uint32_t value)
static float uint32_t_to_float(uint32_t value)
void convert_per_pixel(void *dst_memory, const void *src_memory, size_t buffer_size)
static ConversionType type_of_conversion_r11g11b10(eGPUTextureFormat device_format)
static ConversionType type_of_conversion_int(eGPUTextureFormat device_format)
constexpr uint32_t MASK_11_BITS
void float_to_half_array(const float *src, uint16_t *dst, size_t length)
Definition math_half.cc:221
uint16_t float_to_half(float v)
Definition math_half.cc:27
void half_to_float_array(const uint16_t *src, float *dst, size_t length)
Definition math_half.cc:257
float half_to_float(uint16_t v)
Definition math_half.cc:91
blender::VecBase< uint8_t, 4 > uchar4
@ FLOAT
unsigned short uint16_t
Definition stdint.h:79
unsigned int uint32_t
Definition stdint.h:80
__int64 int64_t
Definition stdint.h:89
signed int int32_t
Definition stdint.h:77
unsigned char uint8_t
Definition stdint.h:78
unsigned __int64 uint64_t
Definition stdint.h:90
#define CASE_PAIR(value_src, value_dst)
GPUVertAttr attrs[GPU_VERT_ATTR_MAX_LEN]
static constexpr int32_t scalar()
static constexpr int32_t delta()
static constexpr int32_t max()
static constexpr uint32_t max()
static constexpr size_t used_byte_size()
static constexpr uint32_t scalar()
void init(const GPUVertFormat *vertex_format, const VKWorkarounds &workarounds)
void convert(void *device_data, const void *src_data, const uint vertex_len) const
const GPUVertFormat & device_format_get() const
uchar y
uchar z
uchar x
float max