Skip to content

Commit 5df8d96

Browse files
Merge pull request #570 from Devsh-Graphics-Programming/new_hlsl_clz
New hlsl clz
2 parents ef08ca9 + 31ee2d5 commit 5df8d96

File tree

5 files changed

+75
-50
lines changed

5 files changed

+75
-50
lines changed

include/nbl/builtin/hlsl/bit.hlsl

Lines changed: 27 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -7,16 +7,17 @@
77
#include <bit>
88

99
namespace nbl::hlsl
10-
{
11-
12-
NBL_ALIAS_TEMPLATE_FUNCTION(std::rotl, rotl);
13-
NBL_ALIAS_TEMPLATE_FUNCTION(std::rotr, rotr);
14-
10+
{
11+
12+
NBL_ALIAS_TEMPLATE_FUNCTION(std::rotl, rotl);
13+
NBL_ALIAS_TEMPLATE_FUNCTION(std::rotr, rotr);
14+
NBL_ALIAS_TEMPLATE_FUNCTION(std::countl_zero, countl_zero);
15+
1516
}
1617
#else
1718
namespace nbl
1819
{
19-
namespace hlsl
20+
namespace hlsl
2021
{
2122

2223
template<typename T, typename S>
@@ -36,7 +37,7 @@ T rotl(T x, S s)
3637
}
3738
else
3839
{
39-
return (x >> (-r)) | (x << (N - (-r)));
40+
return (x >> (-r)) | (x << (N - (-r)));
4041
}
4142
}
4243

@@ -52,11 +53,28 @@ T rotr(T x, S s)
5253
}
5354
else
5455
{
55-
return (x << (-r)) | (x >> (N - (-r)));
56+
return (x << (-r)) | (x >> (N - (-r)));
5657
}
5758
}
5859

59-
}
60+
template<typename T>
61+
uint16_t countl_zero(T n)
62+
{
63+
uint16_t result = 0u;
64+
for(uint32_t bits_log2 = 6u; bits_log2 >= 0u; bits_log2--)
65+
{
66+
const uint16_t shift = bits_log2 ? uint16_t(1)<<(bits_log2-1) : 0;
67+
const uint64_t loMask = bits_log2 ? (1ull<<shift)-1 : 0;
68+
const bool chooseHigh = n&(loMask<<shift);
69+
n = uint16_t((chooseHigh ? (n>shift):n)&loMask);
70+
71+
result += uint16_t(chooseHigh ? 0ull : shift);
72+
}
73+
74+
return result;
75+
}
76+
77+
}
6078
}
6179
#endif
6280

include/nbl/builtin/hlsl/colorspace/EOTF.hlsl

Lines changed: 13 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99
//#include <nbl/builtin/hlsl/common.hlsl>
1010
#include <nbl/builtin/hlsl/cpp_compat.hlsl>
1111
#include <nbl/builtin/hlsl/cpp_compat/promote.hlsl>
12-
#include <nbl/builtin/hlsl/cpp_compat/type_traits.hlsl>
12+
#include <nbl/builtin/hlsl/type_traits.hlsl>
1313

1414
namespace nbl
1515
{
@@ -27,9 +27,9 @@ T identity(NBL_CONST_REF_ARG(T) nonlinear)
2727
}
2828

2929
template<typename T>
30-
T impl_shared_2_4(NBL_CONST_REF_ARG(T) nonlinear, typename scalar_type<T>::type vertex)
30+
T impl_shared_2_4(NBL_CONST_REF_ARG(T) nonlinear, typename type_traits::scalar_type<T>::type vertex)
3131
{
32-
typedef typename scalar_type<T>::type Val_t;
32+
typedef typename type_traits::scalar_type<T>::type Val_t;
3333
bool3 right = (nonlinear > promote<T, Val_t>(vertex));
3434
return lerp(nonlinear / Val_t(12.92), pow((nonlinear + promote<T, Val_t>(0.055)) / Val_t(1.055), promote<T, Val_t>(2.4)), right);
3535
}
@@ -38,7 +38,7 @@ T impl_shared_2_4(NBL_CONST_REF_ARG(T) nonlinear, typename scalar_type<T>::type
3838
template<typename T>
3939
T sRGB(NBL_CONST_REF_ARG(T) nonlinear)
4040
{
41-
typedef typename scalar_type<T>::type Val_t;
41+
typedef typename type_traits::scalar_type<T>::type Val_t;
4242
bool3 negatif = (nonlinear < promote<T, Val_t>(0.0));
4343
T absVal = impl_shared_2_4<T>(abs(nonlinear), 0.04045);
4444
return lerp(absVal, -absVal, negatif);
@@ -48,21 +48,21 @@ T sRGB(NBL_CONST_REF_ARG(T) nonlinear)
4848
template<typename T>
4949
T Display_P3(NBL_CONST_REF_ARG(T) nonlinear)
5050
{
51-
typedef typename scalar_type<T>::type Val_t;
51+
typedef typename type_traits::scalar_type<T>::type Val_t;
5252
return impl_shared_2_4<T>(nonlinear, 0.039000312);
5353
}
5454

5555
template<typename T>
5656
T DCI_P3_XYZ(NBL_CONST_REF_ARG(T) nonlinear)
5757
{
58-
typedef typename scalar_type<T>::type Val_t;
58+
typedef typename type_traits::scalar_type<T>::type Val_t;
5959
return pow(nonlinear * Val_t(52.37), promote<T, Val_t>(2.6));
6060
}
6161

6262
template<typename T>
6363
T SMPTE_170M(NBL_CONST_REF_ARG(T) nonlinear)
6464
{
65-
typedef typename scalar_type<T>::type Val_t;
65+
typedef typename type_traits::scalar_type<T>::type Val_t;
6666
// ITU specs (and the outlier BT.2020) give different constants for these, but they introduce discontinuities in the mapping
6767
// because HDR swapchains often employ the RGBA16_SFLOAT format, this would become apparent because its higher precision than 8,10,12 bits
6868
Val_t alpha = 1.099296826809443; // 1.099 for all ITU but the BT.2020 12 bit encoding, 1.0993 otherwise
@@ -73,7 +73,7 @@ T SMPTE_170M(NBL_CONST_REF_ARG(T) nonlinear)
7373
template<typename T>
7474
T SMPTE_ST2084(NBL_CONST_REF_ARG(T) nonlinear)
7575
{
76-
typedef typename scalar_type<T>::type Val_t;
76+
typedef typename type_traits::scalar_type<T>::type Val_t;
7777
const T invm2 = promote<T, Val_t>(1.0 / 78.84375);
7878
T _common = pow(invm2, invm2);
7979

@@ -89,7 +89,7 @@ T SMPTE_ST2084(NBL_CONST_REF_ARG(T) nonlinear)
8989
template<typename T>
9090
T HDR10_HLG(NBL_CONST_REF_ARG(T) nonlinear)
9191
{
92-
typedef typename scalar_type<T>::type Val_t;
92+
typedef typename type_traits::scalar_type<T>::type Val_t;
9393
// done with log2 so constants are different
9494
const Val_t a = 0.1239574303172;
9595
const T b = promote<T, Val_t>(0.02372241);
@@ -101,21 +101,21 @@ T HDR10_HLG(NBL_CONST_REF_ARG(T) nonlinear)
101101
template<typename T>
102102
T AdobeRGB(NBL_CONST_REF_ARG(T) nonlinear)
103103
{
104-
typedef typename scalar_type<T>::type Val_t;
104+
typedef typename type_traits::scalar_type<T>::type Val_t;
105105
return pow(nonlinear, promote<T, Val_t>(2.19921875));
106106
}
107107

108108
template<typename T>
109109
T Gamma_2_2(NBL_CONST_REF_ARG(T) nonlinear)
110110
{
111-
typedef typename scalar_type<T>::type Val_t;
111+
typedef typename type_traits::scalar_type<T>::type Val_t;
112112
return pow(nonlinear, promote<T, Val_t>(2.2));
113113
}
114114

115115
template<typename T>
116116
T ACEScc(NBL_CONST_REF_ARG(T) nonlinear)
117117
{
118-
typedef typename scalar_type<T>::type Val_t;
118+
typedef typename type_traits::scalar_type<T>::type Val_t;
119119
bool3 right = (nonlinear >= promote<T, Val_t>(-0.301369863));
120120
T _common = exp2(nonlinear * Val_t(17.52) - promote<T, Val_t>(9.72));
121121
return max(lerp(_common * Val_t(2.0) - promote<T, Val_t>(0.000030517578125), _common, right), promote<T, Val_t>(65504.0));
@@ -124,7 +124,7 @@ T ACEScc(NBL_CONST_REF_ARG(T) nonlinear)
124124
template<typename T>
125125
T ACEScct(NBL_CONST_REF_ARG(T) nonlinear)
126126
{
127-
typedef typename scalar_type<T>::type Val_t;
127+
typedef typename type_traits::scalar_type<T>::type Val_t;
128128
bool3 right = (nonlinear >= promote<T, Val_t>(0.155251141552511));
129129
return max(lerp((nonlinear - promote<T, Val_t>(0.0729055341958355)) / Val_t(10.5402377416545), exp2(nonlinear * Val_t(17.52) - promote<T, Val_t>(9.72)), right), promote<T, Val_t>(65504.0));
130130
}

include/nbl/builtin/hlsl/colorspace/OETF.hlsl

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99
//#include <nbl/builtin/hlsl/common.hlsl>
1010
#include <nbl/builtin/hlsl/cpp_compat.hlsl>
1111
#include <nbl/builtin/hlsl/cpp_compat/promote.hlsl>
12-
#include <nbl/builtin/hlsl/cpp_compat/type_traits.hlsl>
12+
#include <nbl/builtin/hlsl/type_traits.hlsl>
1313

1414
namespace nbl
1515
{
@@ -27,9 +27,9 @@ T identity(NBL_CONST_REF_ARG(T) _linear)
2727
}
2828

2929
template<typename T>
30-
T impl_shared_2_4(NBL_CONST_REF_ARG(T) _linear, typename scalar_type<T>::type vertex)
30+
T impl_shared_2_4(NBL_CONST_REF_ARG(T) _linear, typename type_traits::scalar_type<T>::type vertex)
3131
{
32-
typedef typename scalar_type<T>::type Val_t;
32+
typedef typename type_traits::scalar_type<T>::type Val_t;
3333
bool3 right = (_linear > promote<T, Val_t>(vertex));
3434
return lerp(_linear * Val_t(12.92), pow(_linear, promote<T, Val_t>(1.0 / 2.4)) * Val_t(1.055) - (Val_t(0.055)), right);
3535
}
@@ -38,7 +38,7 @@ T impl_shared_2_4(NBL_CONST_REF_ARG(T) _linear, typename scalar_type<T>::type ve
3838
template<typename T>
3939
T sRGB(NBL_CONST_REF_ARG(T) _linear)
4040
{
41-
typedef typename scalar_type<T>::type Val_t;
41+
typedef typename type_traits::scalar_type<T>::type Val_t;
4242
bool3 negatif = (_linear < promote<T, Val_t>(0.0));
4343
T absVal = impl_shared_2_4<T>(abs(_linear), 0.0031308);
4444
return lerp(absVal, -absVal, negatif);
@@ -54,14 +54,14 @@ T Display_P3(NBL_CONST_REF_ARG(T) _linear)
5454
template<typename T>
5555
T DCI_P3_XYZ(NBL_CONST_REF_ARG(T) _linear)
5656
{
57-
typedef typename scalar_type<T>::type Val_t;
57+
typedef typename type_traits::scalar_type<T>::type Val_t;
5858
return pow(_linear / Val_t(52.37), promote<T, Val_t>(1.0 / 2.6));
5959
}
6060

6161
template<typename T>
6262
T SMPTE_170M(NBL_CONST_REF_ARG(T) _linear)
6363
{
64-
typedef typename scalar_type<T>::type Val_t;
64+
typedef typename type_traits::scalar_type<T>::type Val_t;
6565
// ITU specs (and the outlier BT.2020) give different constants for these, but they introduce discontinuities in the mapping
6666
// because HDR swapchains often employ the RGBA16_SFLOAT format, this would become apparent because its higher precision than 8,10,12 bits
6767
const Val_t alpha = 1.099296826809443; // 1.099 for all ITU but the BT.2020 12 bit encoding, 1.0993 otherwise
@@ -72,7 +72,7 @@ T SMPTE_170M(NBL_CONST_REF_ARG(T) _linear)
7272
template<typename T>
7373
T SMPTE_ST2084(NBL_CONST_REF_ARG(T) _linear)
7474
{
75-
typedef typename scalar_type<T>::type Val_t;
75+
typedef typename type_traits::scalar_type<T>::type Val_t;
7676
const T m1 = promote<T, Val_t>(0.1593017578125);
7777
const T m2 = promote<T, Val_t>(78.84375);
7878
const Val_t c2 = 18.8515625;
@@ -87,7 +87,7 @@ T SMPTE_ST2084(NBL_CONST_REF_ARG(T) _linear)
8787
template<typename T>
8888
T HDR10_HLG(NBL_CONST_REF_ARG(T) _linear)
8989
{
90-
typedef typename scalar_type<T>::type Val_t;
90+
typedef typename type_traits::scalar_type<T>::type Val_t;
9191

9292
// done with log2 so constants are different
9393
const Val_t a = 0.1239574303172;
@@ -100,21 +100,21 @@ T HDR10_HLG(NBL_CONST_REF_ARG(T) _linear)
100100
template<typename T>
101101
T AdobeRGB(NBL_CONST_REF_ARG(T) _linear)
102102
{
103-
typedef typename scalar_type<T>::type Val_t;
103+
typedef typename type_traits::scalar_type<T>::type Val_t;
104104
return pow(_linear, promote<T, Val_t>(1.0 / 2.19921875));
105105
}
106106

107107
template<typename T>
108108
T Gamma_2_2(NBL_CONST_REF_ARG(T) _linear)
109109
{
110-
typedef typename scalar_type<T>::type Val_t;
110+
typedef typename type_traits::scalar_type<T>::type Val_t;
111111
return pow(_linear, promote<T, Val_t>(1.0 / 2.2));
112112
}
113113

114114
template<typename T>
115115
T ACEScc(NBL_CONST_REF_ARG(T) _linear)
116116
{
117-
typedef typename scalar_type<T>::type Val_t;
117+
typedef typename type_traits::scalar_type<T>::type Val_t;
118118
bool3 mid = (_linear >= promote<T, Val_t>(0.0));
119119
bool3 right = (_linear >= promote<T, Val_t>(0.000030517578125));
120120
return (log2(lerp(promote<T, Val_t>(0.0000152587890625), promote<T, Val_t>(0.0), right) + _linear * lerp(promote<T, Val_t>(0.0), lerp(promote<T, Val_t>(0.5), promote<T, Val_t>(1.0), right), mid)) + promote<T, Val_t>(9.72)) / Val_t(17.52);
@@ -123,7 +123,7 @@ T ACEScc(NBL_CONST_REF_ARG(T) _linear)
123123
template<typename T>
124124
T ACEScct(NBL_CONST_REF_ARG(T) _linear)
125125
{
126-
typedef typename scalar_type<T>::type Val_t;
126+
typedef typename type_traits::scalar_type<T>::type Val_t;
127127
bool3 right = (_linear > promote<T, Val_t>(0.0078125));
128128
return lerp(Val_t(10.5402377416545) * _linear + Val_t(0.0729055341958355), (log2(_linear) + promote<T, Val_t>(9.72)) / Val_t(17.52), right);
129129
}

include/nbl/builtin/hlsl/mpl.hlsl

Lines changed: 22 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,9 @@
66

77
#ifdef __HLSL_VERSION
88
#include <nbl/builtin/hlsl/type_traits.hlsl>
9+
#else
10+
#include <bit>
11+
912
#endif
1013

1114
namespace nbl
@@ -20,43 +23,47 @@ namespace impl
2023
{
2124

2225
template<uint16_t bits_log2>
23-
struct clz_masks
26+
struct countl_zero_masks
2427
{
25-
static const uint16_t SHIFT = uint16_t(1)<<(bits_log2-1);
26-
static const uint64_t LO_MASK = (1ull<<SHIFT)-1;
28+
NBL_CONSTEXPR_STATIC_INLINE uint16_t SHIFT = uint16_t(1)<<(bits_log2-1);
29+
NBL_CONSTEXPR_STATIC_INLINE uint64_t LO_MASK = (1ull<<SHIFT)-1;
2730
};
2831

2932
template<>
30-
struct clz_masks<0>
33+
struct countl_zero_masks<0>
3134
{
32-
static const uint16_t SHIFT = 0;
33-
static const uint64_t LO_MASK = 0;
35+
NBL_CONSTEXPR_STATIC_INLINE uint16_t SHIFT = 0;
36+
NBL_CONSTEXPR_STATIC_INLINE uint64_t LO_MASK = 0;
3437
};
3538

3639
template<uint64_t N, uint16_t bits_log2>
37-
struct clz
40+
struct countl_zero
3841
{
39-
static const bool CHOOSE_HIGH = N&(clz_masks<bits_log2>::LO_MASK<<clz_masks<bits_log2>::SHIFT);
40-
static const uint64_t NEXT_N = (CHOOSE_HIGH ? (N>>clz_masks<bits_log2>::SHIFT):N)&clz_masks<bits_log2>::LO_MASK;
41-
static const uint16_t value = type_traits::conditional<bits_log2,clz<NEXT_N,bits_log2-1>,type_traits::integral_constant<uint16_t,0> >::type::value + (CHOOSE_HIGH ? 0ull:clz_masks<bits_log2>::SHIFT);
42+
NBL_CONSTEXPR_STATIC_INLINE bool CHOOSE_HIGH = N&(countl_zero_masks<bits_log2>::LO_MASK<<countl_zero_masks<bits_log2>::SHIFT);
43+
NBL_CONSTEXPR_STATIC_INLINE uint64_t NEXT_N = (CHOOSE_HIGH ? (N>>countl_zero_masks<bits_log2>::SHIFT):N)&countl_zero_masks<bits_log2>::LO_MASK;
44+
NBL_CONSTEXPR_STATIC_INLINE uint16_t value = type_traits::conditional<bits_log2,countl_zero<NEXT_N,bits_log2-1>,type_traits::integral_constant<uint16_t,0> >::type::value + (CHOOSE_HIGH ? 0ull:countl_zero_masks<bits_log2>::SHIFT);
4245
};
4346

4447
}
48+
#endif
4549

4650
template<uint64_t N>
47-
struct clz
51+
struct countl_zero
4852
{
49-
static const uint16_t value = impl::clz<N, 6>::value;
53+
NBL_CONSTEXPR_STATIC_INLINE uint16_t value =
54+
#ifdef __HLSL_VERSION
55+
impl::countl_zero<N, 6>::value;
56+
#else
57+
std::countl_zero(N);
58+
#endif
5059
};
5160

5261
template<uint64_t X>
5362
struct log2
5463
{
55-
static const uint16_t value = X ? (1ull<<6)-clz<X>::value-1 : -1ull;
64+
NBL_CONSTEXPR_STATIC_INLINE uint16_t value = X ? (1ull<<6)-countl_zero<X>::value-1 : -1ull;
5665
};
5766

58-
#endif
59-
6067
}
6168
}
6269
}

0 commit comments

Comments
 (0)