Skip to content

Commit 76d95a8

Browse files
committed
changes to handle coalesced data loads
1 parent d4ca075 commit 76d95a8

File tree

2 files changed

+56
-12
lines changed

2 files changed

+56
-12
lines changed

include/nbl/builtin/hlsl/subgroup2/arithmetic_portability_impl.hlsl

Lines changed: 55 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -46,21 +46,49 @@ struct inclusive_scan
4646

4747
// NBL_CONSTEXPR_STATIC_INLINE uint32_t ItemsPerInvocation = vector_traits<T>::Dimension;
4848

49-
type_t operator()(NBL_CONST_REF_ARG(type_t) value)
49+
// type_t operator()(NBL_CONST_REF_ARG(type_t) value)
50+
// {
51+
// binop_t binop;
52+
// type_t retval;
53+
// retval[0] = value[0];
54+
// [unroll]
55+
// for (uint32_t i = 1; i < ItemsPerInvocation; i++)
56+
// retval[i] = binop(retval[i-1], value[i]);
57+
58+
// exclusive_scan_op_t op;
59+
// scalar_t exclusive = op(retval[ItemsPerInvocation-1]);
60+
61+
// [unroll]
62+
// for (uint32_t i = 0; i < ItemsPerInvocation; i++)
63+
// retval[i] = binop(retval[i], exclusive);
64+
// return retval;
65+
// }
66+
67+
type_t operator()(type_t value)
5068
{
5169
binop_t binop;
5270
type_t retval;
53-
retval[0] = value[0];
71+
72+
// rhs = shuffleUp
73+
type_t rhs = glsl::subgroupShuffleUp<type_t>(value, 1u);
74+
// value = op(value, is 1st invoc ? op::identity : rhs)
75+
value = binop(value, hlsl::mix(binop_t::identity, rhs, bool(glsl::gl_SubgroupInvocationID())));
76+
77+
// ex_scan = exclusive_scan(value)
78+
type_t exclusive;
79+
exclusive[0] = binop_t::identity;
5480
[unroll]
5581
for (uint32_t i = 1; i < ItemsPerInvocation; i++)
56-
retval[i] = binop(retval[i-1], value[i]);
82+
exclusive[i] = binop(value[i-1], exclusive[i-1]);
83+
// last_ex_scan = broadcast_last(ex_scan)
84+
exclusive = BroadcastLast<type_t>(exclusive);
5785

58-
exclusive_scan_op_t op;
59-
scalar_t exclusive = op(retval[ItemsPerInvocation-1]);
86+
// for i in 0->N
87+
// retval[i] = op(value[i], last_ex_scan[i])
6088

6189
[unroll]
6290
for (uint32_t i = 0; i < ItemsPerInvocation; i++)
63-
retval[i] = binop(retval[i], exclusive);
91+
retval[i] = binop(value[i], exclusive[i]);
6492
return retval;
6593
}
6694
};
@@ -75,19 +103,35 @@ struct exclusive_scan
75103

76104
// NBL_CONSTEXPR_STATIC_INLINE uint32_t ItemsPerInvocation = vector_traits<T>::Dimension;
77105

106+
// type_t operator()(type_t value)
107+
// {
108+
// inclusive_scan_op_t op;
109+
// value = op(value);
110+
111+
// type_t left = glsl::subgroupShuffleUp<type_t>(value,1);
112+
113+
// type_t retval;
114+
// retval[0] = hlsl::mix(binop_t::identity, left[ItemsPerInvocation-1], bool(glsl::gl_SubgroupInvocationID()));
115+
// [unroll]
116+
// for (uint32_t i = 1; i < ItemsPerInvocation; i++)
117+
// retval[i] = value[i-1];
118+
// return retval;
119+
// }
120+
78121
type_t operator()(type_t value)
79122
{
80123
inclusive_scan_op_t op;
81124
value = op(value);
82125

83-
type_t left = glsl::subgroupShuffleUp<type_t>(value,1);
126+
const uint32_t SubgroupSizeMinusOne = config_t::Size - 1u;
127+
type_t left = ItemsPerInvocation > 1u ? glsl::subgroupShuffle<type_t>(value,(glsl::gl_SubgroupInvocationID()+SubgroupSizeMinusOne)&SubgroupSizeMinusOne) : glsl::subgroupShuffleUp<type_t>(value,1);
84128

85-
type_t retval;
86-
retval[0] = hlsl::mix(binop_t::identity, left[ItemsPerInvocation-1], bool(glsl::gl_SubgroupInvocationID()));
129+
type_t newFirst;
130+
newFirst[0] = binop_t::identity;
87131
[unroll]
88132
for (uint32_t i = 1; i < ItemsPerInvocation; i++)
89-
retval[i] = value[i-1];
90-
return retval;
133+
newFirst[i] = left[i-1];
134+
return hlsl::mix(newFirst, left, bool(glsl::gl_SubgroupInvocationID()));
91135
}
92136
};
93137

0 commit comments

Comments
 (0)