@@ -46,21 +46,49 @@ struct inclusive_scan
46
46
47
47
// NBL_CONSTEXPR_STATIC_INLINE uint32_t ItemsPerInvocation = vector_traits<T>::Dimension;
48
48
49
- type_t operator ()(NBL_CONST_REF_ARG (type_t) value)
49
+ // type_t operator()(NBL_CONST_REF_ARG(type_t) value)
50
+ // {
51
+ // binop_t binop;
52
+ // type_t retval;
53
+ // retval[0] = value[0];
54
+ // [unroll]
55
+ // for (uint32_t i = 1; i < ItemsPerInvocation; i++)
56
+ // retval[i] = binop(retval[i-1], value[i]);
57
+
58
+ // exclusive_scan_op_t op;
59
+ // scalar_t exclusive = op(retval[ItemsPerInvocation-1]);
60
+
61
+ // [unroll]
62
+ // for (uint32_t i = 0; i < ItemsPerInvocation; i++)
63
+ // retval[i] = binop(retval[i], exclusive);
64
+ // return retval;
65
+ // }
66
+
67
+ type_t operator ()(type_t value)
50
68
{
51
69
binop_t binop;
52
70
type_t retval;
53
- retval[0 ] = value[0 ];
71
+
72
+ // rhs = shuffleUp
73
+ type_t rhs = glsl::subgroupShuffleUp<type_t>(value, 1u);
74
+ // value = op(value, is 1st invoc ? op::identity : rhs)
75
+ value = binop (value, hlsl::mix (binop_t::identity, rhs, bool (glsl::gl_SubgroupInvocationID ())));
76
+
77
+ // ex_scan = exclusive_scan(value)
78
+ type_t exclusive;
79
+ exclusive[0 ] = binop_t::identity;
54
80
[unroll]
55
81
for (uint32_t i = 1 ; i < ItemsPerInvocation; i++)
56
- retval[i] = binop (retval[i-1 ], value[i]);
82
+ exclusive[i] = binop (value[i-1 ], exclusive[i-1 ]);
83
+ // last_ex_scan = broadcast_last(ex_scan)
84
+ exclusive = BroadcastLast<type_t>(exclusive);
57
85
58
- exclusive_scan_op_t op;
59
- scalar_t exclusive = op (retval[ItemsPerInvocation- 1 ]);
86
+ // for i in 0->N
87
+ // retval[i] = op(value[i], last_ex_scan[i])
60
88
61
89
[unroll]
62
90
for (uint32_t i = 0 ; i < ItemsPerInvocation; i++)
63
- retval[i] = binop (retval [i], exclusive);
91
+ retval[i] = binop (value [i], exclusive[i] );
64
92
return retval;
65
93
}
66
94
};
@@ -75,19 +103,35 @@ struct exclusive_scan
75
103
76
104
// NBL_CONSTEXPR_STATIC_INLINE uint32_t ItemsPerInvocation = vector_traits<T>::Dimension;
77
105
106
+ // type_t operator()(type_t value)
107
+ // {
108
+ // inclusive_scan_op_t op;
109
+ // value = op(value);
110
+
111
+ // type_t left = glsl::subgroupShuffleUp<type_t>(value,1);
112
+
113
+ // type_t retval;
114
+ // retval[0] = hlsl::mix(binop_t::identity, left[ItemsPerInvocation-1], bool(glsl::gl_SubgroupInvocationID()));
115
+ // [unroll]
116
+ // for (uint32_t i = 1; i < ItemsPerInvocation; i++)
117
+ // retval[i] = value[i-1];
118
+ // return retval;
119
+ // }
120
+
78
121
type_t operator ()(type_t value)
79
122
{
80
123
inclusive_scan_op_t op;
81
124
value = op (value);
82
125
83
- type_t left = glsl::subgroupShuffleUp<type_t>(value,1 );
126
+ const uint32_t SubgroupSizeMinusOne = config_t::Size - 1u;
127
+ type_t left = ItemsPerInvocation > 1u ? glsl::subgroupShuffle<type_t>(value,(glsl::gl_SubgroupInvocationID ()+SubgroupSizeMinusOne)&SubgroupSizeMinusOne) : glsl::subgroupShuffleUp<type_t>(value,1 );
84
128
85
- type_t retval ;
86
- retval [0 ] = hlsl:: mix ( binop_t::identity, left[ItemsPerInvocation- 1 ], bool (glsl:: gl_SubgroupInvocationID ())) ;
129
+ type_t newFirst ;
130
+ newFirst [0 ] = binop_t::identity;
87
131
[unroll]
88
132
for (uint32_t i = 1 ; i < ItemsPerInvocation; i++)
89
- retval [i] = value [i-1 ];
90
- return retval ;
133
+ newFirst [i] = left [i-1 ];
134
+ return hlsl:: mix (newFirst, left, bool (glsl:: gl_SubgroupInvocationID ())) ;
91
135
}
92
136
};
93
137
0 commit comments