Skip to content

Commit 4ce79ba

Browse files
Merge pull request #99 from Devsh-Graphics-Programming/exclusive_min_fix
workgroupExclusiveMin fix and a fix of the floating point And workgroup op identity
2 parents cac8d5e + 46c812d commit 4ce79ba

File tree

4 files changed

+25
-9
lines changed

4 files changed

+25
-9
lines changed

examples_tests/48.ArithmeticUnitTest/main.cpp

Lines changed: 18 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -9,13 +9,28 @@ using namespace core;
99
using namespace video;
1010
using namespace asset;
1111

12+
template <class To, class From>
13+
typename std::enable_if_t<
14+
std::is_trivially_copyable_v<From> &&
15+
std::is_trivially_copyable_v<To>,
16+
To>
17+
// constexpr support needs compiler magic
18+
bit_cast(const From& src) noexcept
19+
{
20+
static_assert(std::is_trivially_constructible_v<To>,
21+
"This implementation additionally requires destination type to be trivially constructible");
22+
23+
To dst;
24+
std::memcpy(&dst, &src, sizeof(To));
25+
return dst;
26+
}
1227

1328

1429
template<typename T>
1530
struct and
1631
{
1732
using type_t = T;
18-
_NBL_STATIC_INLINE_CONSTEXPR T IdentityElement = ~0ull; // this should be a reinterpret cast
33+
static inline const T IdentityElement = bit_cast<T>(~0ull); // until C++20 std::bit_cast this cannot be a constexpr
1934

2035
inline T operator()(T left, T right) { return left & right; }
2136
_NBL_STATIC_INLINE_CONSTEXPR bool runOPonFirst = false;
@@ -25,7 +40,7 @@ template<typename T>
2540
struct xor
2641
{
2742
using type_t = T;
28-
_NBL_STATIC_INLINE_CONSTEXPR T IdentityElement = 0ull; // this should be a reinterpret cast
43+
static inline const T IdentityElement = bit_cast<T>(0ull); // until C++20 std::bit_cast this cannot be a constexpr
2944

3045
inline T operator()(T left, T right) { return left ^ right; }
3146
_NBL_STATIC_INLINE_CONSTEXPR bool runOPonFirst = false;
@@ -35,7 +50,7 @@ template<typename T>
3550
struct or
3651
{
3752
using type_t = T;
38-
_NBL_STATIC_INLINE_CONSTEXPR T IdentityElement = 0ull; // this should be a reinterpret cast
53+
static inline const T IdentityElement = bit_cast<T>(0ull); // until C++20 std::bit_cast this cannot be a constexpr
3954

4055
inline T operator()(T left, T right) { return left | right; }
4156
_NBL_STATIC_INLINE_CONSTEXPR bool runOPonFirst = false;

include/nbl/builtin/glsl/subgroup/basic_portability.glsl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -155,7 +155,7 @@ bool nbl_glsl_subgroupElect()
155155
#ifdef NBL_GL_KHR_shader_subgroup_basic_subgroup_elect
156156
return subgroupElect();
157157
#else
158-
// TODO: do a bunch of `atomicXor` on a shared memory address
158+
// TODO: do a bunch of `atomicXor` on a shared memory address OR do a ballotARB?
159159
atomicXor(ADDRESS,nbl_glsl_SubgroupEqMask);
160160
memoryBarrierShared();
161161
return (ADDRESS&nbl_glsl_SubgroupLeMask)==nbl_glsl_SubgroupEqMask;

include/nbl/builtin/glsl/subgroup/shuffle_portability.glsl

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
#include <nbl/builtin/glsl/subgroup/shared_shuffle_portability.glsl>
66

77

8+
// TODO: A SPIRV-Cross contribution so we can set NBL_GL_KHR_shader_subgroup_shuffle when AMD_gcn_shader or NVidia shuffle extensions are available
89
#ifdef NBL_GL_KHR_shader_subgroup_shuffle
910

1011

include/nbl/builtin/glsl/workgroup/arithmetic.glsl

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -52,15 +52,15 @@
5252

5353
uint nbl_glsl_workgroupAnd_noBarriers(in uint val)
5454
{
55-
NBL_GLSL_WORKGROUP_REDUCE(nbl_glsl_identityFunction,nbl_glsl_subgroupInclusiveAnd_impl,val,~0u,nbl_glsl_identityFunction);
55+
NBL_GLSL_WORKGROUP_REDUCE(nbl_glsl_identityFunction,nbl_glsl_subgroupInclusiveAnd_impl,val,0xffFFffFFu,nbl_glsl_identityFunction);
5656
}
5757
int nbl_glsl_workgroupAnd_noBarriers(in int val)
5858
{
5959
return int(nbl_glsl_workgroupAnd_noBarriers(uint(val)));
6060
}
6161
float nbl_glsl_workgroupAnd_noBarriers(in float val)
6262
{
63-
NBL_GLSL_WORKGROUP_REDUCE(uintBitsToFloat,nbl_glsl_subgroupInclusiveAnd_impl,val,0.0,floatBitsToUint);
63+
return uintBitsToFloat(nbl_glsl_workgroupAnd_noBarriers(floatBitsToUint(val)));
6464
}
6565

6666
DECLARE_OVERLOAD_WITH_BARRIERS(uint,workgroupAnd)
@@ -185,7 +185,7 @@ DECLARE_OVERLOAD_WITH_BARRIERS(float,workgroupMax)
185185

186186
uint nbl_glsl_workgroupInclusiveAnd_noBarriers(in uint val)
187187
{
188-
NBL_GLSL_WORKGROUP_SCAN(false,nbl_glsl_identityFunction,nbl_glsl_and,nbl_glsl_subgroupInclusiveAnd_impl,val,~0u,nbl_glsl_identityFunction);
188+
NBL_GLSL_WORKGROUP_SCAN(false,nbl_glsl_identityFunction,nbl_glsl_and,nbl_glsl_subgroupInclusiveAnd_impl,val,0xffFFffFFu,nbl_glsl_identityFunction);
189189
}
190190
int nbl_glsl_workgroupInclusiveAnd_noBarriers(in int val)
191191
{
@@ -203,7 +203,7 @@ DECLARE_OVERLOAD_WITH_BARRIERS(float,workgroupInclusiveAnd)
203203

204204
uint nbl_glsl_workgroupExclusiveAnd_noBarriers(in uint val)
205205
{
206-
NBL_GLSL_WORKGROUP_SCAN(true,nbl_glsl_identityFunction,nbl_glsl_and,nbl_glsl_subgroupInclusiveAnd_impl,val,~0u,nbl_glsl_identityFunction);
206+
NBL_GLSL_WORKGROUP_SCAN(true,nbl_glsl_identityFunction,nbl_glsl_and,nbl_glsl_subgroupInclusiveAnd_impl,val,0xffFFffFFu,nbl_glsl_identityFunction);
207207
}
208208
int nbl_glsl_workgroupExclusiveAnd_noBarriers(in int val)
209209
{
@@ -392,7 +392,7 @@ uint nbl_glsl_workgroupExclusiveMin_noBarriers(in uint val)
392392
}
393393
int nbl_glsl_workgroupExclusiveMin_noBarriers(in int val)
394394
{
395-
NBL_GLSL_WORKGROUP_SCAN(true,int,min,nbl_glsl_subgroupExclusiveMin_impl,val,INT_MAX,uint);
395+
NBL_GLSL_WORKGROUP_SCAN(true,int,min,nbl_glsl_subgroupInclusiveMin_impl,val,INT_MAX,uint);
396396
}
397397
float nbl_glsl_workgroupExclusiveMin_noBarriers(in float val)
398398
{

0 commit comments

Comments
 (0)