Skip to content

Commit 0b93b15

Browse files
committed
Impkementing stuff needed to run Bloom example
1 parent 29dadfb commit 0b93b15

File tree

5 files changed

+83
-13
lines changed

5 files changed

+83
-13
lines changed

include/nbl/builtin/hlsl/fft/utils.hlsl

Whitespace-only changes.

include/nbl/builtin/hlsl/functional.hlsl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ namespace nbl
1313
{
1414
namespace hlsl
1515
{
16-
#ifdef __HLSL_VERSION // CPP
16+
#ifdef __HLSL_VERSION // HLSL
1717
template<uint32_t StorageClass, typename T>
1818
using __spv_ptr_t = spirv::pointer_t<StorageClass,T>;
1919

include/nbl/builtin/hlsl/workgroup/fft.hlsl

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@ struct exchangeValues<SharedMemoryAdaptor, float16_t>
3434
const bool topHalf = bool(threadID & stride);
3535
// Pack two halves into a single uint32_t
3636
uint32_t toExchange = bit_cast<uint32_t, float16_t2 >(topHalf ? float16_t2 (lo.real(), lo.imag()) : float16_t2 (hi.real(), hi.imag()));
37-
shuffleXor<SharedMemoryAdaptor, uint32_t>::__call(toExchange, stride, sharedmemAdaptor);
37+
shuffleXor<SharedMemoryAdaptor, uint32_t>(toExchange, stride, sharedmemAdaptor);
3838
float16_t2 exchanged = bit_cast<float16_t2, uint32_t>(toExchange);
3939
if (topHalf)
4040
{
@@ -57,7 +57,7 @@ struct exchangeValues<SharedMemoryAdaptor, float32_t>
5757
const bool topHalf = bool(threadID & stride);
5858
// pack into `float32_t2` because ternary operator doesn't support structs
5959
float32_t2 exchanged = topHalf ? float32_t2(lo.real(), lo.imag()) : float32_t2(hi.real(), hi.imag());
60-
shuffleXor<SharedMemoryAdaptor, float32_t2>::__call(exchanged, stride, sharedmemAdaptor);
60+
shuffleXor<SharedMemoryAdaptor, float32_t2>(exchanged, stride, sharedmemAdaptor);
6161
if (topHalf)
6262
{
6363
lo.real(exchanged.x);
@@ -79,7 +79,7 @@ struct exchangeValues<SharedMemoryAdaptor, float64_t>
7979
const bool topHalf = bool(threadID & stride);
8080
// pack into `float64_t2` because ternary operator doesn't support structs
8181
float64_t2 exchanged = topHalf ? float64_t2(lo.real(), lo.imag()) : float64_t2(hi.real(), hi.imag());
82-
shuffleXor<SharedMemoryAdaptor, float64_t2 >::__call(exchanged, stride, sharedmemAdaptor);
82+
shuffleXor<SharedMemoryAdaptor, float64_t2 >(exchanged, stride, sharedmemAdaptor);
8383
if (topHalf)
8484
{
8585
lo.real(exchanged.x);
@@ -152,6 +152,14 @@ uint32_t getOutputIndex(uint32_t freqIdx)
152152
return glsl::bitfieldReverse<uint32_t>(bitShiftLeftHigher<FFT_SIZE_LOG_2, FFT_SIZE_LOG_2 - ELEMENTS_PER_INVOCATION_LOG_2 + 1>(freqIdx)) >> (32 - FFT_SIZE_LOG_2);
153153
}
154154

155+
// Mirrors an index about the Nyquist frequency
156+
template<uint16_t ElementsPerInvocation, uint32_t WorkgroupSize>
157+
uint32_t mirror(uint32_t idx)
158+
{
159+
NBL_CONSTEXPR_STATIC_INLINE uint32_t FFT_SIZE = WorkgroupSize * uint32_t(ElementsPerInvocation);
160+
return (FFT_SIZE - idx) & (FFT_SIZE - 1);
161+
}
162+
155163
} //namespace fft
156164

157165
// ----------------------------------- End Utils -----------------------------------------------

include/nbl/builtin/hlsl/workgroup/shuffle.hlsl

Lines changed: 70 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
#define _NBL_BUILTIN_HLSL_WORKGROUP_SHUFFLE_INCLUDED_
33

44
#include "nbl/builtin/hlsl/memory_accessor.hlsl"
5+
#include "nbl/builtin/hlsl/functional.hlsl"
56

67
// TODO: Add other shuffles
78

@@ -14,26 +15,87 @@ namespace hlsl
1415
namespace workgroup
1516
{
1617

18+
// ------------------------------------- Skeletons for implementing other Shuffles --------------------------------
19+
1720
template<typename SharedMemoryAdaptor, typename T>
18-
struct shuffleXor
21+
struct Shuffle
22+
{
23+
static void __call(NBL_REF_ARG(T) value, uint32_t storeIdx, uint32_t loadIdx, NBL_REF_ARG(SharedMemoryAdaptor) sharedmemAdaptor)
24+
{
25+
// TODO: optimization (optional) where we shuffle in the shared memory available (using rounds)
26+
sharedmemAdaptor.template set<T>(storeIdx, value);
27+
28+
// Wait until all writes are done before reading
29+
sharedmemAdaptor.workgroupExecutionAndMemoryBarrier();
30+
31+
sharedmemAdaptor.template get<T>(loadIdx, value);
32+
}
33+
34+
// By default store to threadID in the workgroup
35+
static void __call(NBL_REF_ARG(T) value, uint32_t loadIdx, NBL_REF_ARG(SharedMemoryAdaptor) sharedmemAdaptor)
36+
{
37+
__call(value, uint32_t(SubgroupContiguousIndex()), loadIdx, sharedmemAdaptor);
38+
}
39+
};
40+
41+
template<class UnOp, typename SharedMemoryAdaptor, typename T>
42+
struct ShuffleUnOp
43+
{
44+
static void __call(NBL_REF_ARG(T) value, uint32_t a, NBL_REF_ARG(SharedMemoryAdaptor) sharedmemAdaptor)
45+
{
46+
UnOp unop;
47+
// TODO: optimization (optional) where we shuffle in the shared memory available (using rounds)
48+
sharedmemAdaptor.template set<T>(a, value);
49+
50+
// Wait until all writes are done before reading
51+
sharedmemAdaptor.workgroupExecutionAndMemoryBarrier();
52+
53+
sharedmemAdaptor.template get<T>(unop(a), value);
54+
}
55+
56+
// By default store to threadID's index and load from unop(threadID)
57+
static void __call(NBL_REF_ARG(T) value, NBL_REF_ARG(SharedMemoryAdaptor) sharedmemAdaptor)
58+
{
59+
__call(value, uint32_t(SubgroupContiguousIndex()), sharedmemAdaptor);
60+
}
61+
};
62+
63+
template<class BinOp, typename SharedMemoryAdaptor, typename T>
64+
struct ShuffleBinOp
1965
{
20-
static void __call(NBL_REF_ARG(T) value, uint32_t mask, uint32_t threadID, NBL_REF_ARG(SharedMemoryAdaptor) sharedmemAdaptor)
66+
static void __call(NBL_REF_ARG(T) value, uint32_t a, uint32_t b, NBL_REF_ARG(SharedMemoryAdaptor) sharedmemAdaptor)
2167
{
68+
BinOp binop;
2269
// TODO: optimization (optional) where we shuffle in the shared memory available (using rounds)
23-
sharedmemAdaptor.template set<T>(threadID, value);
24-
70+
sharedmemAdaptor.template set<T>(a, value);
71+
2572
// Wait until all writes are done before reading
2673
sharedmemAdaptor.workgroupExecutionAndMemoryBarrier();
27-
28-
sharedmemAdaptor.template get<T>(threadID ^ mask, value);
74+
75+
sharedmemAdaptor.template get<T>(binop(a, b), value);
2976
}
3077

31-
static void __call(NBL_REF_ARG(T) value, uint32_t mask, NBL_REF_ARG(SharedMemoryAdaptor) sharedmemAdaptor)
78+
// By default first argument of binary op is the thread's ID in the workgroup
79+
static void __call(NBL_REF_ARG(T) value, uint32_t b, NBL_REF_ARG(SharedMemoryAdaptor) sharedmemAdaptor)
3280
{
33-
__call(value, mask, uint32_t(SubgroupContiguousIndex()), sharedmemAdaptor);
81+
__call(value, uint32_t(SubgroupContiguousIndex()), b, sharedmemAdaptor);
3482
}
3583
};
3684

85+
// ------------------------------------------ ShuffleXor ---------------------------------------------------------------
86+
87+
template<typename SharedMemoryAdaptor, typename T>
88+
void shuffleXor(NBL_REF_ARG(T) value, uint32_t threadID, uint32_t mask, NBL_REF_ARG(SharedMemoryAdaptor) sharedmemAdaptor)
89+
{
90+
return ShuffleBinOp<bit_xor<uint32_t>, SharedMemoryAdaptor, T>::__call(value, threadID, mask, sharedmemAdaptor);
91+
}
92+
93+
template<typename SharedMemoryAdaptor, typename T>
94+
void shuffleXor(NBL_REF_ARG(T) value, uint32_t mask, NBL_REF_ARG(SharedMemoryAdaptor) sharedmemAdaptor)
95+
{
96+
return ShuffleBinOp<bit_xor<uint32_t>, SharedMemoryAdaptor, T>::__call(value, mask, sharedmemAdaptor);
97+
}
98+
3799
}
38100
}
39101
}

0 commit comments

Comments
 (0)