Skip to content

Commit 5777e2a

Browse files
committed
Changed position of memory barrier in workgroup FFT
1 parent 415bff4 commit 5777e2a

File tree

2 files changed

+4
-3
lines changed

2 files changed

+4
-3
lines changed

examples_tests

Submodule examples_tests updated 48 files

include/nbl/builtin/hlsl/workgroup/fft.hlsl

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -219,8 +219,9 @@ struct FFT<K, true, Scalar, device_capabilities>
219219
}
220220
accessor = offsetAccessor.accessor;
221221

222-
for (uint32_t stride = _NBL_HLSL_WORKGROUP_SIZE_ << 1; stride <= (K >> 1) * _NBL_HLSL_WORKGROUP_SIZE_; stride <<= 1)
222+
for (uint32_t stride = 2 * _NBL_HLSL_WORKGROUP_SIZE_; stride < K * _NBL_HLSL_WORKGROUP_SIZE_; stride <<= 1)
223223
{
224+
accessor.memoryBarrier(); // no execution barrier just making sure writes propagate to accessor
224225
//[unroll(K/2)]
225226
for (uint32_t virtualThreadID = SubgroupContiguousIndex(); virtualThreadID < (K >> 1) * _NBL_HLSL_WORKGROUP_SIZE_; virtualThreadID += _NBL_HLSL_WORKGROUP_SIZE_)
226227
{
@@ -243,7 +244,7 @@ struct FFT<K, true, Scalar, device_capabilities>
243244
accessor.set(loIx, lo);
244245
accessor.set(hiIx, hi);
245246
}
246-
accessor.memoryBarrier(); // no execution barrier just making sure writes propagate to accessor
247+
247248
}
248249

249250
}

0 commit comments

Comments
 (0)