Skip to content

Commit 58d8929

Browse files
committed
Add functionality for nabla unpacking trades when doing FFT of packed real signals
1 parent e618e58 commit 58d8929

File tree

1 file changed

+23
-0
lines changed
  • include/nbl/builtin/hlsl/workgroup

1 file changed

+23
-0
lines changed

include/nbl/builtin/hlsl/workgroup/fft.hlsl

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -151,8 +151,31 @@ struct FFTIndexingUtils
151151
return getNablaIndex(getDFTMirrorIndex(getDFTIndex(idx)));
152152
}
153153

154+
// When unpacking an FFT of two packed signals, given a `localElementIndex` representing a `globalElementIndex` you need its "mirror index" to unpack the value at
155+
// NablaFFT[globalElementIndex].
156+
// The function above has you covered in that sense, but what also happens is that not only does the thread holding `NablaFFT[globalElementIndex]` need its mirror value
157+
// but also the thread holding said mirror value will at the same time be trying to unpack `NFFT[someOtherIndex]` and need the mirror value of that.
158+
// As long as this unpacking is happening concurrently and in order (meaning the local element index - the higher bits - of `globalElementIndex` and `someOtherIndex` is the
159+
// same) then this function returns both the SubgroupContiguousIndex of the other thread AND the local element index of *the mirror* of `someOtherIndex`
160+
struct NablaMirrorTradeInfo
161+
{
162+
uint32_t otherThreadID;
163+
uint32_t mirrorLocalIndex;
164+
};
165+
166+
static NablaMirrorTradeInfo getNablaMirrorTradeInfo(uint32_t localElementIndex)
167+
{
168+
const uint32_t globalElementIndex = localElementIndex * WorkgroupSize | workgroup::SubgroupContiguousIndex();
169+
const uint32_t otherElementIndex = FFTIndexingUtils::getNablaMirrorIndex(globalElementIndex);
170+
const uint32_t mirrorLocalIndex = otherElementIndex / WorkgroupSize;
171+
const uint32_t otherThreadID = otherElementIndex & (WorkgroupSize - 1);
172+
NablaMirrorTradeInfo info = { otherThreadID, mirrorLocalIndex };
173+
return info;
174+
}
175+
154176
NBL_CONSTEXPR_STATIC_INLINE uint16_t FFTSizeLog2 = ElementsPerInvocationLog2 + WorkgroupSizeLog2;
155177
NBL_CONSTEXPR_STATIC_INLINE uint32_t FFTSize = uint32_t(1) << FFTSizeLog2;
178+
NBL_CONSTEXPR_STATIC_INLINE uint32_t WorkgroupSize = uint32_t(1) << WorkgroupSizeLog2;
156179
};
157180

158181
} //namespace fft

0 commit comments

Comments
 (0)