1+
2+ #ifndef _NBL_BUILTIN_HLSL_WORKGROUP_ENVMAP_IMPORTANCE_SAMPLING_INCLUDED_
3+ #define _NBL_BUILTIN_HLSL_WORKGROUP_ENVMAP_IMPORTANCE_SAMPLING_INCLUDED_
4+
5+ namespace nbl
6+ {
7+ namespace hlsl
8+ {
9+ namespace workgroup
10+ {
11+ namespace envmap
12+ {
13+ namespace impl
14+ {
15+ bool choseSecond (float first, float second, NBL_REF_ARG (float ) xi)
16+ {
17+ // numerical resilience against IEEE754
18+ float firstProb = 1.0f / (1.0f + second / first);
19+ float dummy = 0.0f ;
20+ return math::partitionRandVariable (firstProb, xi, dummy);
21+ }
22+
23+ }
24+
25+ }
26+ }
27+ }
28+ }
29+
30+ #ifdef __HLSL_VERSION
31+ namespace nbl
32+ {
33+ namespace hlsl
34+ {
35+ namespace workgroup
36+ {
37+ namespace envmap
38+ {
39+
40+ struct WarpmapGeneration
41+ {
42+
43+ template <typename LuminanceAccessor, typename OutputAccessor NBL_FUNC_REQUIRES (envmap::LuminanceReadAccessor<LuminanceAccessor> && envmap::WarpmapWriteAccessor<OutputAccessor>)
44+ // TODO(kevinyu): Should lumapMapSize and warpMapSize provided by Accessor?
45+ static void __call (NBL_CONST_REF_ARG (LuminanceAccessor) luminanceAccessor, NBL_REF_ARG (OutputAcessor) outputAccessor, uint32_t2 lumaMapSize, uint32_t2 warpMapSize)
46+ {
47+ const uint32_t threadID = uint32_t (SubgroupContiguousIndex ());
48+ const uint32_t lastWarpMapPixel = warpMapSize - uint32_t2 (1 , 1 );
49+
50+ if (all (threadID < warpMapSize))
51+ {
52+ float32_t2 xi = float32_t2 (threadID) / float32_t2 (lastWarpMapPixel);
53+
54+ uint32_t2 p;
55+ p.y = 0 ;
56+
57+ // TODO(kevinyu): Implement findMSB
58+ const uint32_t2 mip2x1 = findMSB (lumaMapSize.x) - 1 ;
59+ // do one split in the X axis first cause penultimate full mip would have been 2x1
60+ p.x = impl::choseSecond (luminanceAccessor.get (uint32_t2 (0 , 0 ), mip2x1, uint32_t2 (0 , 0 )), luminanceAccessor.get (uint32_t2 (0 , 0 ), mip2x1, uint32_t2 (1 , 0 ), xi.x) ? 1 : 0 ;
61+ for (uint32_t i = mip2x1; i != 0 ;)
62+ {
63+ --i;
64+ p <<= 1 ;
65+ const float32_t4 values = float32_t4 (
66+ luminanceAccessor.get (p, i, uint32_t2 (0 , 1 )),
67+ luminanceAccessor.get (p, i, uint32_t2 (1 , 1 )),
68+ luminanceAccessor.get (p, i, uint32_t2 (1 , 0 )),
69+ luminanceAccessor.get (p, i, uint32_t2 (0 , 0 ))
70+ );
71+
72+ float32_t wx_0, wx_1;
73+ {
74+ const float32_t wy_0 = values[3 ] + values[2 ];
75+ const float32_t wy_1 = values[1 ] + values[0 ];
76+ if (impl::choseSecond (wy_0, wy_1, xi.y))
77+ {
78+ p.y |= 1 ;
79+ wx_0 = values[0 ];
80+ wx_1 = values[1 ];
81+ }
82+ else
83+ {
84+ wx_0 = values[3 ];
85+ wx_1 = values[2 ];
86+ }
87+ }
88+
89+ if (impl::choseSecond (wx_0, wx_1, xi.x))
90+ {
91+ p.x |= 1 ;
92+ }
93+ }
94+
95+ const float32_t2 directionUV = (float32_t2 (p.x, p.y) + xi) / float32_t2 (lumaMapSize);
96+ outputAccessor.set (threadID, directionUV);
97+ }
98+ }
99+
100+ };
101+
102+ }
103+ }
104+ }
105+ }
106+ #endif
107+
108+ #endif
0 commit comments