Skip to content

Commit 55d89c5

Browse files
committed
no need to store locals in reduce
1 parent a9930a0 commit 55d89c5

File tree

1 file changed

+8
-8
lines changed

1 file changed

+8
-8
lines changed

include/nbl/builtin/hlsl/workgroup2/shared_scan.hlsl

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -94,20 +94,20 @@ struct reduce<Config, BinOp, 2, device_capabilities>
9494
using params_lv1_t = subgroup2::ArithmeticParams<config_t, BinOp, Config::ItemsPerInvocation_1, device_capabilities>;
9595
BinOp binop;
9696

97-
vector_lv0_t scan_local[Config::VirtualWorkgroupSize / Config::WorkgroupSize];
9897
const uint32_t invocationIndex = workgroup::SubgroupContiguousIndex();
9998
// level 0 scan
10099
subgroup2::reduction<params_lv0_t> reduction0;
101100
[unroll]
102101
for (uint32_t idx = 0, virtualInvocationIndex = invocationIndex; idx < Config::VirtualWorkgroupSize / Config::WorkgroupSize; idx++)
103102
{
104-
dataAccessor.get(idx * Config::WorkgroupSize + virtualInvocationIndex, scan_local[idx]);
105-
scan_local[idx] = reduction0(scan_local[idx]);
103+
vector_lv0_t scan_local;
104+
dataAccessor.get(idx * Config::WorkgroupSize + virtualInvocationIndex, scan_local);
105+
scan_local = reduction0(scan_local);
106106
if (glsl::gl_SubgroupInvocationID()==Config::SubgroupSize-1)
107107
{
108108
const uint32_t virtualSubgroupID = idx * (Config::WorkgroupSize >> Config::SubgroupSizeLog2) + glsl::gl_SubgroupID();
109109
const uint32_t bankedIndex = (virtualSubgroupID & (Config::ItemsPerInvocation_1-1)) * Config::SubgroupsPerVirtualWorkgroup + (virtualSubgroupID/Config::ItemsPerInvocation_1);
110-
scratchAccessor.set(bankedIndex, scan_local[idx][Config::ItemsPerInvocation_0-1]); // set last element of subgroup scan (reduction) to level 1 scan
110+
scratchAccessor.set(bankedIndex, scan_local[Config::ItemsPerInvocation_0-1]); // set last element of subgroup scan (reduction) to level 1 scan
111111
}
112112
}
113113
scratchAccessor.workgroupExecutionAndMemoryBarrier();
@@ -227,20 +227,20 @@ struct reduce<Config, BinOp, 3, device_capabilities>
227227
using params_lv2_t = subgroup2::ArithmeticParams<config_t, BinOp, Config::ItemsPerInvocation_2, device_capabilities>;
228228
BinOp binop;
229229

230-
vector_lv0_t scan_local[Config::VirtualWorkgroupSize / Config::WorkgroupSize];
231230
const uint32_t invocationIndex = workgroup::SubgroupContiguousIndex();
232231
// level 0 scan
233232
subgroup2::reduction<params_lv0_t> reduction0;
234233
[unroll]
235234
for (uint32_t idx = 0, virtualInvocationIndex = invocationIndex; idx < Config::VirtualWorkgroupSize / Config::WorkgroupSize; idx++)
236235
{
237-
dataAccessor.get(idx * Config::WorkgroupSize + virtualInvocationIndex, scan_local[idx]);
238-
scan_local[idx] = reduction0(scan_local[idx]);
236+
vector_lv0_t scan_local;
237+
dataAccessor.get(idx * Config::WorkgroupSize + virtualInvocationIndex, scan_local);
238+
scan_local = reduction0(scan_local);
239239
if (glsl::gl_SubgroupInvocationID()==Config::SubgroupSize-1)
240240
{
241241
const uint32_t virtualSubgroupID = idx * (Config::WorkgroupSize >> Config::SubgroupSizeLog2) + glsl::gl_SubgroupID();
242242
const uint32_t bankedIndex = (virtualSubgroupID & (Config::ItemsPerInvocation_1-1)) * Config::SubgroupsPerVirtualWorkgroup + (virtualSubgroupID/Config::ItemsPerInvocation_1);
243-
scratchAccessor.set(bankedIndex, scan_local[idx][Config::ItemsPerInvocation_0-1]); // set last element of subgroup scan (reduction) to level 1 scan
243+
scratchAccessor.set(bankedIndex, scan_local[Config::ItemsPerInvocation_0-1]); // set last element of subgroup scan (reduction) to level 1 scan
244244
}
245245
}
246246
scratchAccessor.workgroupExecutionAndMemoryBarrier();

0 commit comments

Comments
 (0)