Skip to content

Commit 5be66a2

Browse files
committed
Merge branch 'features_sync_with_vk' into erfan_device_features
2 parents bdc08be + e4a4388 commit 5be66a2

31 files changed

+4966
-678
lines changed

include/nbl/asset/ISampler.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@ class NBL_API ISampler : public virtual core::IReferenceCounted
2727
//! Texture is alternatingly mirrored (0..1..0..1..0..)
2828
ETC_MIRROR,
2929
//! Texture is mirrored once and then clamped to edge
30+
//! `SPhysicalDeviceFeatures::samplerMirrorClampToEdge` should be enabled in LogicalDevice Creation and (obviously) PhysicalDevice should report support.
3031
ETC_MIRROR_CLAMP_TO_EDGE,
3132
//! Texture is mirrored once and then clamped to border
3233
ETC_MIRROR_CLAMP_TO_BORDER,

include/nbl/asset/format/EFormat.h

Lines changed: 114 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -524,8 +524,6 @@ constexpr const uint8_t getBitsPerChannel()
524524
}
525525
*/
526526

527-
528-
529527
/*
530528
It provides some useful functions for dealing
531529
with texel-block conversions, rounding up
@@ -688,27 +686,38 @@ inline bool isBGRALayoutFormat(asset::E_FORMAT _fmt)
688686
{
689687
switch (_fmt)
690688
{
691-
//case EF_B8G8R8_UNORM:
692-
//case EF_B8G8R8_SNORM:
693-
//case EF_B8G8R8_USCALED:
694-
//case EF_B8G8R8_SSCALED:
695-
//case EF_B8G8R8_UINT:
696-
//case EF_B8G8R8_SINT:
697-
//case EF_B8G8R8_SRGB:
698-
case EF_A1R5G5B5_UNORM_PACK16:
689+
case EF_B4G4R4A4_UNORM_PACK16:
690+
case EF_B5G6R5_UNORM_PACK16:
691+
case EF_B5G5R5A1_UNORM_PACK16:
692+
case EF_B8G8R8_UNORM:
693+
case EF_B8G8R8_SNORM:
694+
case EF_B8G8R8_USCALED:
695+
case EF_B8G8R8_SSCALED:
696+
case EF_B8G8R8_UINT:
697+
case EF_B8G8R8_SINT:
698+
case EF_B8G8R8_SRGB:
699699
case EF_B8G8R8A8_UNORM:
700700
case EF_B8G8R8A8_SNORM:
701701
case EF_B8G8R8A8_USCALED:
702702
case EF_B8G8R8A8_SSCALED:
703703
case EF_B8G8R8A8_UINT:
704704
case EF_B8G8R8A8_SINT:
705705
case EF_B8G8R8A8_SRGB:
706-
case EF_A2R10G10B10_UNORM_PACK32:
707-
case EF_A2R10G10B10_SNORM_PACK32:
708-
case EF_A2R10G10B10_USCALED_PACK32:
709-
case EF_A2R10G10B10_SSCALED_PACK32:
710-
case EF_A2R10G10B10_UINT_PACK32:
711-
case EF_A2R10G10B10_SINT_PACK32:
706+
case EF_A8B8G8R8_UNORM_PACK32:
707+
case EF_A8B8G8R8_SNORM_PACK32:
708+
case EF_A8B8G8R8_USCALED_PACK32:
709+
case EF_A8B8G8R8_SSCALED_PACK32:
710+
case EF_A8B8G8R8_UINT_PACK32:
711+
case EF_A8B8G8R8_SINT_PACK32:
712+
case EF_A8B8G8R8_SRGB_PACK32:
713+
case EF_A2B10G10R10_UNORM_PACK32:
714+
case EF_A2B10G10R10_SNORM_PACK32:
715+
case EF_A2B10G10R10_USCALED_PACK32:
716+
case EF_A2B10G10R10_SSCALED_PACK32:
717+
case EF_A2B10G10R10_UINT_PACK32:
718+
case EF_A2B10G10R10_SINT_PACK32:
719+
case EF_B10G11R11_UFLOAT_PACK32:
720+
case EF_E5B9G9R9_UFLOAT_PACK32:
712721
return true;
713722
default:
714723
return false;
@@ -720,27 +729,38 @@ constexpr bool isBGRALayoutFormat()
720729
{
721730
switch (_fmt)
722731
{
723-
//case EF_B8G8R8_UNORM:
724-
//case EF_B8G8R8_SNORM:
725-
//case EF_B8G8R8_USCALED:
726-
//case EF_B8G8R8_SSCALED:
727-
//case EF_B8G8R8_UINT:
728-
//case EF_B8G8R8_SINT:
729-
//case EF_B8G8R8_SRGB:
730-
case EF_A1R5G5B5_UNORM_PACK16:
732+
case EF_B4G4R4A4_UNORM_PACK16:
733+
case EF_B5G6R5_UNORM_PACK16:
734+
case EF_B5G5R5A1_UNORM_PACK16:
735+
case EF_B8G8R8_UNORM:
736+
case EF_B8G8R8_SNORM:
737+
case EF_B8G8R8_USCALED:
738+
case EF_B8G8R8_SSCALED:
739+
case EF_B8G8R8_UINT:
740+
case EF_B8G8R8_SINT:
741+
case EF_B8G8R8_SRGB:
731742
case EF_B8G8R8A8_UNORM:
732743
case EF_B8G8R8A8_SNORM:
733744
case EF_B8G8R8A8_USCALED:
734745
case EF_B8G8R8A8_SSCALED:
735746
case EF_B8G8R8A8_UINT:
736747
case EF_B8G8R8A8_SINT:
737748
case EF_B8G8R8A8_SRGB:
738-
case EF_A2R10G10B10_UNORM_PACK32:
739-
case EF_A2R10G10B10_SNORM_PACK32:
740-
case EF_A2R10G10B10_USCALED_PACK32:
741-
case EF_A2R10G10B10_SSCALED_PACK32:
742-
case EF_A2R10G10B10_UINT_PACK32:
743-
case EF_A2R10G10B10_SINT_PACK32:
749+
case EF_A8B8G8R8_UNORM_PACK32:
750+
case EF_A8B8G8R8_SNORM_PACK32:
751+
case EF_A8B8G8R8_USCALED_PACK32:
752+
case EF_A8B8G8R8_SSCALED_PACK32:
753+
case EF_A8B8G8R8_UINT_PACK32:
754+
case EF_A8B8G8R8_SINT_PACK32:
755+
case EF_A8B8G8R8_SRGB_PACK32:
756+
case EF_A2B10G10R10_UNORM_PACK32:
757+
case EF_A2B10G10R10_SNORM_PACK32:
758+
case EF_A2B10G10R10_USCALED_PACK32:
759+
case EF_A2B10G10R10_SSCALED_PACK32:
760+
case EF_A2B10G10R10_UINT_PACK32:
761+
case EF_A2B10G10R10_SINT_PACK32:
762+
case EF_B10G11R11_UFLOAT_PACK32:
763+
case EF_E5B9G9R9_UFLOAT_PACK32:
744764
return true;
745765
default:
746766
return false;
@@ -1180,28 +1200,34 @@ constexpr bool isBGRALayoutFormat()
11801200
case EF_R16_SNORM:
11811201
case EF_R16_SSCALED:
11821202
case EF_R16_SINT:
1203+
case EF_R16_SFLOAT:
11831204
case EF_R16G16_SNORM:
11841205
case EF_R16G16_SSCALED:
11851206
case EF_R16G16_SINT:
1207+
case EF_R16G16_SFLOAT:
11861208
case EF_R16G16B16_SNORM:
11871209
case EF_R16G16B16_SSCALED:
11881210
case EF_R16G16B16_SINT:
1211+
case EF_R16G16B16_SFLOAT:
11891212
case EF_R16G16B16A16_SNORM:
11901213
case EF_R16G16B16A16_SSCALED:
11911214
case EF_R16G16B16A16_SINT:
1215+
case EF_R16G16B16A16_SFLOAT:
11921216
case EF_R32_SINT:
1217+
case EF_R32_SFLOAT:
11931218
case EF_R32G32_SINT:
1219+
case EF_R32G32_SFLOAT:
11941220
case EF_R32G32B32_SINT:
1195-
case EF_R32G32B32A32_SINT:
1196-
case EF_R64_SINT:
1197-
case EF_R64G64_SINT:
1198-
case EF_R64G64B64_SINT:
1199-
case EF_R64G64B64A64_SINT:
1200-
case EF_R16G16B16_SFLOAT:
12011221
case EF_R32G32B32_SFLOAT:
1222+
case EF_R32G32B32A32_SINT:
1223+
case EF_R32G32B32A32_SFLOAT:
1224+
case EF_R64_SINT:
12021225
case EF_R64_SFLOAT:
1226+
case EF_R64G64_SINT:
12031227
case EF_R64G64_SFLOAT:
1228+
case EF_R64G64B64_SINT:
12041229
case EF_R64G64B64_SFLOAT:
1230+
case EF_R64G64B64A64_SINT:
12051231
case EF_R64G64B64A64_SFLOAT:
12061232
case EF_EAC_R11_SNORM_BLOCK:
12071233
case EF_EAC_R11G11_SNORM_BLOCK:
@@ -1216,6 +1242,7 @@ constexpr bool isBGRALayoutFormat()
12161242
{
12171243
switch (_fmt)
12181244
{
1245+
case EF_S8_UINT:
12191246
case EF_R8_SINT:
12201247
case EF_R8_UINT:
12211248
case EF_R8G8_SINT:
@@ -1230,7 +1257,6 @@ constexpr bool isBGRALayoutFormat()
12301257
case EF_B8G8R8A8_UINT:
12311258
case EF_A8B8G8R8_UINT_PACK32:
12321259
case EF_A8B8G8R8_SINT_PACK32:
1233-
case EF_A8B8G8R8_SRGB_PACK32:
12341260
case EF_A2R10G10B10_UINT_PACK32:
12351261
case EF_A2R10G10B10_SINT_PACK32:
12361262
case EF_A2B10G10R10_UINT_PACK32:
@@ -1268,6 +1294,7 @@ constexpr bool isBGRALayoutFormat()
12681294
{
12691295
switch (_fmt)
12701296
{
1297+
case EF_D32_SFLOAT:
12711298
case EF_R16_SFLOAT:
12721299
case EF_R16G16_SFLOAT:
12731300
case EF_R16G16B16_SFLOAT:
@@ -1293,6 +1320,10 @@ constexpr bool isBGRALayoutFormat()
12931320
{
12941321
switch (_fmt)
12951322
{
1323+
case EF_D16_UNORM:
1324+
case EF_X8_D24_UNORM_PACK32:
1325+
case EF_D16_UNORM_S8_UINT:
1326+
case EF_D24_UNORM_S8_UINT:
12961327
case EF_R4G4_UNORM_PACK8:
12971328
case EF_R4G4B4A4_UNORM_PACK16:
12981329
case EF_B4G4R4A4_UNORM_PACK16:
@@ -1656,6 +1687,17 @@ inline value_type getFormatMaxValue(E_FORMAT format, uint32_t channel)
16561687
const bool _signed = isSignedFormat(format);
16571688
if (isIntegerFormat(format) || isScaledFormat(format))
16581689
{
1690+
switch (format)
1691+
{
1692+
case EF_A2R10G10B10_UINT_PACK32:
1693+
case EF_A2B10G10R10_UINT_PACK32:
1694+
return (channel == 3u) ? 3 : 1023;
1695+
case EF_A2R10G10B10_SINT_PACK32:
1696+
case EF_A2B10G10R10_SINT_PACK32:
1697+
return (channel == 3u) ? 3 : 1023;
1698+
default: break;
1699+
}
1700+
16591701
auto bytesPerChannel = (getBytesPerPixel(format)*core::rational(1,getFormatChannelCount(format))).getIntegerApprox();
16601702
if (_signed)
16611703
{
@@ -1686,6 +1728,13 @@ inline value_type getFormatMaxValue(E_FORMAT format, uint32_t channel)
16861728
}
16871729
else if (isFloatingPointFormat(format))
16881730
{
1731+
switch (format)
1732+
{
1733+
case EF_BC6H_SFLOAT_BLOCK: return 32767;
1734+
case EF_BC6H_UFLOAT_BLOCK: return 65504;
1735+
default: break;
1736+
}
1737+
16891738
auto bytesPerChannel = (getBytesPerPixel(format)*core::rational(1,getFormatChannelCount(format))).getIntegerApprox();
16901739
switch (bytesPerChannel)
16911740
{
@@ -1706,6 +1755,14 @@ inline value_type getFormatMinValue(E_FORMAT format, uint32_t channel)
17061755
return 0;
17071756
if (isIntegerFormat(format) || isScaledFormat(format))
17081757
{
1758+
switch (format)
1759+
{
1760+
case EF_A2R10G10B10_SINT_PACK32:
1761+
case EF_A2B10G10R10_SINT_PACK32:
1762+
return (channel == 3u) ? 0 : -1023;
1763+
default: break;
1764+
}
1765+
17091766
auto bytesPerChannel = (getBytesPerPixel(format)*core::rational(1,getFormatChannelCount(format))).getIntegerApprox();
17101767
switch (bytesPerChannel)
17111768
{
@@ -1722,6 +1779,12 @@ inline value_type getFormatMinValue(E_FORMAT format, uint32_t channel)
17221779
}
17231780
else if (isFloatingPointFormat(format))
17241781
{
1782+
switch (format)
1783+
{
1784+
case EF_BC6H_SFLOAT_BLOCK: return -32767;
1785+
default: break;
1786+
}
1787+
17251788
auto bytesPerChannel = (getBytesPerPixel(format)*core::rational(1,getFormatChannelCount(format))).getIntegerApprox();
17261789
switch (bytesPerChannel)
17271790
{
@@ -1740,7 +1803,7 @@ inline value_type getFormatPrecision(E_FORMAT format, uint32_t channel, value_ty
17401803
{
17411804
_NBL_DEBUG_BREAK_IF(isBlockCompressionFormat(format)); //????
17421805

1743-
if (isIntegerFormat(format))
1806+
if (isIntegerFormat(format) || isScaledFormat(format))
17441807
return 1;
17451808

17461809
if (isSRGBFormat(format))
@@ -1778,9 +1841,9 @@ inline value_type getFormatPrecision(E_FORMAT format, uint32_t channel, value_ty
17781841
switch (bytesPerChannel)
17791842
{
17801843
case 1u:
1781-
return 1.0/255.0;
1844+
return isSignedFormat(format) ? 1.0/127.0 : 1.0/255.0;
17821845
case 2u:
1783-
return 1.0/65535.0;
1846+
return isSignedFormat(format) ? 1.0/32765.0 : 1.0/65535.0;
17841847
default: break;
17851848
}
17861849
}
@@ -1789,7 +1852,17 @@ inline value_type getFormatPrecision(E_FORMAT format, uint32_t channel, value_ty
17891852
switch (format)
17901853
{
17911854
case EF_B10G11R11_UFLOAT_PACK32:
1792-
return 0; //TODO
1855+
{
1856+
// unsigned values are always ordered as + 1
1857+
float f = std::abs(static_cast<float>(value));
1858+
int bitshft = channel == 2u ? 6 : 5;
1859+
1860+
uint16_t f16 = core::Float16Compressor::compress(f);
1861+
uint16_t enc = f16 >> bitshft;
1862+
uint16_t next_f16 = (enc + 1) << bitshft;
1863+
1864+
return core::Float16Compressor::decompress(next_f16) - f;
1865+
}
17931866
case EF_E5B9G9R9_UFLOAT_PACK32:
17941867
return 0; //TODO
17951868
default: break;

include/nbl/asset/format/impl/EFormat_getFormatChannelCount.h

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,16 @@
11
switch (_fmt)
22
{
3+
case EF_D16_UNORM:
4+
case EF_X8_D24_UNORM_PACK32:
5+
case EF_D32_SFLOAT:
6+
case EF_S8_UINT:
7+
return 1u;
8+
9+
case EF_D16_UNORM_S8_UINT:
10+
case EF_D24_UNORM_S8_UINT:
11+
case EF_D32_SFLOAT_S8_UINT:
12+
return 2u;
13+
314
case EF_R8_UNORM:
415
case EF_R8_SNORM:
516
case EF_R8_USCALED:

include/nbl/builtin/glsl/property_pool/copy.comp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
#version 440 core
2-
layout(local_size_x=NBL_LIMIT_MAX_OPTIMALLY_RESIDENT_WORKGROUP_INVOCATIONS) in;
2+
layout(local_size_x=NBL_GLSL_LIMIT_MAX_OPTIMALLY_RESIDENT_WORKGROUP_INVOCATIONS) in;
33

44
#include "nbl/builtin/glsl/property_pool/transfer.glsl"
55

@@ -68,7 +68,7 @@ void main()
6868
const uint dstIndexOffset = transfer.dstIndexOffset-indicesToSkip;
6969
// set up loop
7070
const uint DWORDs = min(transfer.elementCount*propDWORDs,pc.endDWORD);
71-
const uint dispatchSize = gl_NumWorkGroups[0]*NBL_LIMIT_MAX_OPTIMALLY_RESIDENT_WORKGROUP_INVOCATIONS;
71+
const uint dispatchSize = gl_NumWorkGroups[0]*NBL_GLSL_LIMIT_MAX_OPTIMALLY_RESIDENT_WORKGROUP_INVOCATIONS;
7272
//
7373
const bool fill = bool(transfer.propertyDWORDsize_flags&(NBL_BUILTIN_PROPERTY_POOL_TRANSFER_EF_SRC_FILL<<flagsBitOffset));
7474
if (fill)

include/nbl/builtin/glsl/skinning/cache_update.comp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
#version 440 core
2-
layout(local_size_x=NBL_LIMIT_MAX_OPTIMALLY_RESIDENT_WORKGROUP_INVOCATIONS) in;
2+
layout(local_size_x=NBL_GLSL_LIMIT_MAX_OPTIMALLY_RESIDENT_WORKGROUP_INVOCATIONS) in;
33

44
#include <nbl/builtin/glsl/skinning/cache_descriptor_set.glsl>
55
#include <nbl/builtin/glsl/skinning/update_descriptor_set.glsl>
@@ -15,7 +15,7 @@ void main()
1515
return;
1616

1717
const uint totalJointCount = jointCountInclPrefixSum[skinCount-1u];
18-
const uint dispatchSize = NBL_LIMIT_MAX_OPTIMALLY_RESIDENT_WORKGROUP_INVOCATIONS*gl_NumWorkGroups[0];
18+
const uint dispatchSize = NBL_GLSL_LIMIT_MAX_OPTIMALLY_RESIDENT_WORKGROUP_INVOCATIONS*gl_NumWorkGroups[0];
1919
for (uint jointIndex=gl_GlobalInvocationID.x; jointIndex<totalJointCount; jointIndex+=dispatchSize)
2020
{
2121
// TODO: implement via https://moderngpu.github.io/sortedsearch.html, find the upper and lower bounds of the workgroup, then go to town with sorted search

include/nbl/builtin/glsl/subgroup/basic_portability.glsl

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -17,9 +17,9 @@
1717
#define nbl_glsl_MinSubgroupSizeLog2 2
1818
#define nbl_glsl_MinSubgroupSize (0x1<<nbl_glsl_MinSubgroupSizeLog2)
1919

20-
#ifdef NBL_IMPL_GL_NV_shader_thread_group
20+
#ifdef NBL_GLSL_IMPL_GL_NV_shader_thread_group
2121
#define nbl_glsl_MaxSubgroupSizeLog2 5
22-
#elif defined(NBL_IMPL_GL_AMD_gcn_shader)||defined(NBL_IMPL_GL_ARB_shader_ballot)
22+
#elif defined(NBL_GLSL_IMPL_GL_AMD_gcn_shader)||defined(NBL_GLSL_IMPL_GL_ARB_shader_ballot)
2323
#define nbl_glsl_MaxSubgroupSizeLog2 6
2424
#else
2525
#define nbl_glsl_MaxSubgroupSizeLog2 7

include/nbl/builtin/glsl/transform_tree/global_transform_update_common.glsl

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
layout(local_size_x=NBL_LIMIT_MAX_OPTIMALLY_RESIDENT_WORKGROUP_INVOCATIONS) in;
1+
layout(local_size_x=NBL_GLSL_LIMIT_MAX_OPTIMALLY_RESIDENT_WORKGROUP_INVOCATIONS) in;
22

33
#define NBL_GLSL_TRANSFORM_TREE_POOL_NODE_RECOMPUTED_TIMESTAMP_DESCRIPTOR_QUALIFIERS coherent restrict
44
#define NBL_GLSL_TRANSFORM_TREE_POOL_NODE_GLOBAL_TRANSFORM_DESCRIPTOR_QUALIFIERS coherent restrict
@@ -66,7 +66,7 @@ void main()
6666
#define NBL_GLSL_TRANSFORM_TREE_STACK_SIZE (NBL_GLSL_TRANSFORM_TREE_MAX_DEPTH-1)
6767
uint stack[NBL_GLSL_TRANSFORM_TREE_STACK_SIZE];
6868

69-
const uint dispatchSize = NBL_LIMIT_MAX_OPTIMALLY_RESIDENT_WORKGROUP_INVOCATIONS*gl_NumWorkGroups[0];
69+
const uint dispatchSize = NBL_GLSL_LIMIT_MAX_OPTIMALLY_RESIDENT_WORKGROUP_INVOCATIONS*gl_NumWorkGroups[0];
7070
for (uint nodeID=gl_GlobalInvocationID.x; nodeID<nodesToUpdate.count; nodeID+=dispatchSize)
7171
{
7272
int stackPtr = 0;

include/nbl/builtin/glsl/transform_tree/relative_transform_update.comp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
#version 440 core
2-
layout(local_size_x=NBL_LIMIT_MAX_OPTIMALLY_RESIDENT_WORKGROUP_INVOCATIONS) in;
2+
layout(local_size_x=NBL_GLSL_LIMIT_MAX_OPTIMALLY_RESIDENT_WORKGROUP_INVOCATIONS) in;
33

44
// disable descriptors we dont need
55
#define NBL_GLSL_TRANSFORM_TREE_POOL_NODE_PARENT_DESCRIPTOR_DECLARED
@@ -12,7 +12,7 @@ layout(local_size_x=NBL_LIMIT_MAX_OPTIMALLY_RESIDENT_WORKGROUP_INVOCATIONS) in;
1212

1313
void main()
1414
{
15-
const uint dispatchSize = NBL_LIMIT_MAX_OPTIMALLY_RESIDENT_WORKGROUP_INVOCATIONS*gl_NumWorkGroups[0];
15+
const uint dispatchSize = NBL_GLSL_LIMIT_MAX_OPTIMALLY_RESIDENT_WORKGROUP_INVOCATIONS*gl_NumWorkGroups[0];
1616
for (uint nodeID=gl_GlobalInvocationID.x; nodeID<relativeTransformModificationRequestRanges.rangeCount; nodeID+=dispatchSize)
1717
{
1818
const nbl_glsl_transform_tree_modification_request_range_t requestRange = relativeTransformModificationRequestRanges.data[nodeID];

0 commit comments

Comments
 (0)