Skip to content

Commit a9930a0

Browse files
committed
removed referencing workgroupID in scans
1 parent 542592f commit a9930a0

File tree

3 files changed

+23
-13
lines changed

3 files changed

+23
-13
lines changed

include/nbl/builtin/hlsl/workgroup2/arithmetic_config.hlsl

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -77,6 +77,16 @@ struct ArithmeticConfiguration
7777
NBL_CONSTEXPR_STATIC_INLINE uint16_t ElementCount = conditional_value<LevelCount==1,uint16_t,0,conditional_value<LevelCount==3,uint16_t,SubgroupSize*ItemsPerInvocation_2,0>::value + SubgroupSize*ItemsPerInvocation_1>::value;
7878
};
7979

80+
template<class T>
81+
struct is_configuration : bool_constant<false> {};
82+
83+
template<uint16_t W, uint16_t S, uint16_t I>
84+
struct is_configuration<ArithmeticConfiguration<W,S,I> > : bool_constant<true> {};
85+
86+
template<typename T>
87+
NBL_CONSTEXPR bool is_configuration_v = is_configuration<T>::value;
88+
89+
8090
}
8191
}
8292
}

include/nbl/builtin/hlsl/workgroup2/shared_scan.hlsl

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -43,9 +43,9 @@ struct reduce<Config, BinOp, 1, device_capabilities>
4343

4444
subgroup2::reduction<params_t> reduction;
4545
vector_t value;
46-
dataAccessor.get(glsl::gl_WorkGroupID().x * Config::SubgroupSize + workgroup::SubgroupContiguousIndex(), value);
46+
dataAccessor.get(workgroup::SubgroupContiguousIndex(), value);
4747
value = reduction(value);
48-
dataAccessor.set(glsl::gl_WorkGroupID().x * Config::SubgroupSize + workgroup::SubgroupContiguousIndex(), value); // can be safely merged with top line?
48+
dataAccessor.set(workgroup::SubgroupContiguousIndex(), value);
4949
}
5050
};
5151

@@ -63,7 +63,7 @@ struct scan<Config, BinOp, Exclusive, 1, device_capabilities>
6363
using params_t = subgroup2::ArithmeticParams<config_t, BinOp, Config::ItemsPerInvocation_0, device_capabilities>;
6464

6565
vector_t value;
66-
dataAccessor.get(glsl::gl_WorkGroupID().x * Config::SubgroupSize + workgroup::SubgroupContiguousIndex(), value);
66+
dataAccessor.get(workgroup::SubgroupContiguousIndex(), value);
6767
if (Exclusive)
6868
{
6969
subgroup2::exclusive_scan<params_t> excl_scan;
@@ -74,7 +74,7 @@ struct scan<Config, BinOp, Exclusive, 1, device_capabilities>
7474
subgroup2::inclusive_scan<params_t> incl_scan;
7575
value = incl_scan(value);
7676
}
77-
dataAccessor.set(glsl::gl_WorkGroupID().x * Config::SubgroupSize + workgroup::SubgroupContiguousIndex(), value); // can be safely merged with above lines?
77+
dataAccessor.set(workgroup::SubgroupContiguousIndex(), value); // can be safely merged with above lines?
7878
}
7979
};
8080

@@ -101,7 +101,7 @@ struct reduce<Config, BinOp, 2, device_capabilities>
101101
[unroll]
102102
for (uint32_t idx = 0, virtualInvocationIndex = invocationIndex; idx < Config::VirtualWorkgroupSize / Config::WorkgroupSize; idx++)
103103
{
104-
dataAccessor.get(glsl::gl_WorkGroupID().x * Config::VirtualWorkgroupSize + idx * Config::WorkgroupSize + virtualInvocationIndex, scan_local[idx]);
104+
dataAccessor.get(idx * Config::WorkgroupSize + virtualInvocationIndex, scan_local[idx]);
105105
scan_local[idx] = reduction0(scan_local[idx]);
106106
if (glsl::gl_SubgroupInvocationID()==Config::SubgroupSize-1)
107107
{
@@ -131,7 +131,7 @@ struct reduce<Config, BinOp, 2, device_capabilities>
131131
{
132132
scalar_t reduce_val;
133133
scratchAccessor.get(glsl::gl_SubgroupInvocationID(),reduce_val);
134-
dataAccessor.set(glsl::gl_WorkGroupID().x * Config::VirtualWorkgroupSize + idx * Config::WorkgroupSize + virtualInvocationIndex, reduce_val);
134+
dataAccessor.set(idx * Config::WorkgroupSize + virtualInvocationIndex, reduce_val);
135135
}
136136
}
137137
};
@@ -158,7 +158,7 @@ struct scan<Config, BinOp, Exclusive, 2, device_capabilities>
158158
[unroll]
159159
for (uint32_t idx = 0, virtualInvocationIndex = invocationIndex; idx < Config::VirtualWorkgroupSize / Config::WorkgroupSize; idx++)
160160
{
161-
dataAccessor.get(glsl::gl_WorkGroupID().x * Config::VirtualWorkgroupSize + idx * Config::WorkgroupSize + virtualInvocationIndex, scan_local[idx]);
161+
dataAccessor.get(idx * Config::WorkgroupSize + virtualInvocationIndex, scan_local[idx]);
162162
scan_local[idx] = inclusiveScan0(scan_local[idx]);
163163
if (glsl::gl_SubgroupInvocationID()==Config::SubgroupSize-1)
164164
{
@@ -204,7 +204,7 @@ struct scan<Config, BinOp, Exclusive, 2, device_capabilities>
204204
for (uint32_t i = 0; i < Config::ItemsPerInvocation_0; i++)
205205
scan_local[idx][i] = binop(left, scan_local[idx][i]);
206206
}
207-
dataAccessor.set(glsl::gl_WorkGroupID().x * Config::VirtualWorkgroupSize + idx * Config::WorkgroupSize + virtualInvocationIndex, scan_local[idx]);
207+
dataAccessor.set(idx * Config::WorkgroupSize + virtualInvocationIndex, scan_local[idx]);
208208
}
209209
}
210210
};
@@ -234,7 +234,7 @@ struct reduce<Config, BinOp, 3, device_capabilities>
234234
[unroll]
235235
for (uint32_t idx = 0, virtualInvocationIndex = invocationIndex; idx < Config::VirtualWorkgroupSize / Config::WorkgroupSize; idx++)
236236
{
237-
dataAccessor.get(glsl::gl_WorkGroupID().x * Config::VirtualWorkgroupSize + idx * Config::WorkgroupSize + virtualInvocationIndex, scan_local[idx]);
237+
dataAccessor.get(idx * Config::WorkgroupSize + virtualInvocationIndex, scan_local[idx]);
238238
scan_local[idx] = reduction0(scan_local[idx]);
239239
if (glsl::gl_SubgroupInvocationID()==Config::SubgroupSize-1)
240240
{
@@ -281,7 +281,7 @@ struct reduce<Config, BinOp, 3, device_capabilities>
281281
{
282282
scalar_t reduce_val;
283283
scratchAccessor.get(glsl::gl_SubgroupInvocationID(),reduce_val);
284-
dataAccessor.set(glsl::gl_WorkGroupID().x * Config::VirtualWorkgroupSize + idx * Config::WorkgroupSize + virtualInvocationIndex, reduce_val);
284+
dataAccessor.set(idx * Config::WorkgroupSize + virtualInvocationIndex, reduce_val);
285285
}
286286
}
287287
};
@@ -310,7 +310,7 @@ struct scan<Config, BinOp, Exclusive, 3, device_capabilities>
310310
[unroll]
311311
for (uint32_t idx = 0, virtualInvocationIndex = invocationIndex; idx < Config::VirtualWorkgroupSize / Config::WorkgroupSize; idx++)
312312
{
313-
dataAccessor.get(glsl::gl_WorkGroupID().x * Config::VirtualWorkgroupSize + idx * Config::WorkgroupSize + virtualInvocationIndex, scan_local[idx]);
313+
dataAccessor.get(idx * Config::WorkgroupSize + virtualInvocationIndex, scan_local[idx]);
314314
scan_local[idx] = inclusiveScan0(scan_local[idx]);
315315
if (glsl::gl_SubgroupInvocationID()==Config::SubgroupSize-1)
316316
{
@@ -384,7 +384,7 @@ struct scan<Config, BinOp, Exclusive, 3, device_capabilities>
384384
for (uint32_t i = 0; i < Config::ItemsPerInvocation_0; i++)
385385
scan_local[idx][i] = binop(left, scan_local[idx][i]);
386386
}
387-
dataAccessor.set(glsl::gl_WorkGroupID().x * Config::VirtualWorkgroupSize + idx * Config::WorkgroupSize + virtualInvocationIndex, scan_local[idx]);
387+
dataAccessor.set(idx * Config::WorkgroupSize + virtualInvocationIndex, scan_local[idx]);
388388
}
389389
}
390390
};

0 commit comments

Comments
 (0)