11
11
#include "nbl/builtin/hlsl/subgroup2/ballot.hlsl"
12
12
13
13
#include "nbl/builtin/hlsl/functional.hlsl"
14
-
15
- // #include "nbl/builtin/hlsl/subgroup/arithmetic_portability_impl.hlsl"
14
+ #include "nbl/builtin/hlsl/cpp_compat/intrinsics.hlsl"
16
15
17
16
namespace nbl
18
17
{
@@ -73,7 +72,7 @@ struct exclusive_scan
73
72
type_t left = glsl::subgroupShuffleUp<type_t>(value,1 );
74
73
75
74
type_t retval;
76
- retval[0 ] = bool (glsl:: gl_SubgroupInvocationID ()) ? left[ItemsPerInvocation-1 ] : binop_t::identity ;
75
+ retval[0 ] = hlsl:: mix (binop_t::identity, left[ItemsPerInvocation-1 ], bool (glsl:: gl_SubgroupInvocationID ())) ;
77
76
[unroll]
78
77
for (uint32_t i = 1 ; i < ItemsPerInvocation; i++)
79
78
retval[i] = value[i-1 ];
@@ -154,17 +153,15 @@ struct inclusive_scan<Params, BinOp, 1, false>
154
153
const uint32_t subgroupInvocation = glsl::gl_SubgroupInvocationID ();
155
154
156
155
type_t rhs = glsl::subgroupShuffleUp<type_t>(value, 1u); // all invocations must execute the shuffle, even if we don't apply the op() to all of them
157
- // TODO waiting on mix intrinsic fix from bxdf branch, value = op(value, hlsl::mix(rhs, binop_t::identity, subgroupInvocation < 1u));
158
- value = op (value, subgroupInvocation<1u ? binop_t::identity : rhs);
156
+ value = op (value, hlsl::mix (rhs, binop_t::identity, subgroupInvocation < 1u));
159
157
160
158
const uint32_t SubgroupSizeLog2 = config_t::SizeLog2;
161
159
[unroll]
162
160
for (uint32_t i = 1 ; i < integral_constant<uint32_t,SubgroupSizeLog2>::value; i++)
163
161
{
164
162
const uint32_t step = i * 2 ;
165
163
rhs = glsl::subgroupShuffleUp<type_t>(value, step);
166
- // TODO value = op(value, hlsl::mix(rhs, binop_t::identity, subgroupInvocation < step));
167
- value = op (value, subgroupInvocation<step ? binop_t::identity : rhs);
164
+ value = op (value, hlsl::mix (rhs, binop_t::identity, subgroupInvocation < step));
168
165
}
169
166
return value;
170
167
}
@@ -183,7 +180,7 @@ struct exclusive_scan<Params, BinOp, 1, false>
183
180
// can't risk getting short-circuited, need to store to a var
184
181
type_t left = glsl::subgroupShuffleUp<type_t>(value,1 );
185
182
// the first invocation doesn't have anything in its left so we set to the binop's identity value for exlusive scan
186
- return bool (glsl::gl_SubgroupInvocationID ()) ? left:binop_t::identity ;
183
+ return hlsl:: mix (binop_t::identity, left, bool (glsl::gl_SubgroupInvocationID ())) ;
187
184
}
188
185
};
189
186
0 commit comments