Skip to content

Commit adc22f6

Browse files
kechoEvergreen
authored andcommitted
[JIRA # UUM-65155] Fixing stencil compiler bug for subsurface scattering on scarlett
Fix coarse stencil bug computation race condition caused by a compiler bug in DXC. The issue manifests itself as flickering tiles on sub surface scatter pixels. This issue is caused because the DXC compiler does not make the branches using a groupThreadId vector comparison a scalar. Changing this to use the group index instead (preflattened) triggers the compiler to recognize the branch as a scalar instead.
1 parent de25ca2 commit adc22f6

File tree

1 file changed

+12
-19
lines changed

1 file changed

+12
-19
lines changed

Packages/com.unity.render-pipelines.high-definition/Runtime/ShaderLibrary/ResolveStencilBuffer.compute

Lines changed: 12 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -46,52 +46,45 @@ RWStructuredBuffer<uint> _CoarseStencilBuffer;
4646
groupshared uint coarseStencilValue;
4747
#endif
4848

49-
[numthreads(8, 8, 1)]
50-
void Main(uint3 groupId : SV_GroupID,
51-
uint3 groupThreadId : SV_GroupThreadID,
52-
uint3 dispatchThreadID : SV_DispatchThreadID)
49+
[numthreads(64, 1, 1)]
50+
void Main(uint3 groupId : SV_GroupID, uint groupThreadIndex : SV_GroupIndex)
5351
{
54-
UNITY_XR_ASSIGN_VIEW_INDEX(dispatchThreadID.z);
52+
UNITY_XR_ASSIGN_VIEW_INDEX(groupId.z);
5553

5654
// The best shot at resolving is being overly conservative, hence the OR operator. This is by nature inaccurate, but there is no way to blend MSAA sub-samples properly and we need to pick the lesser evil.
5755
uint resolvedStencil = 0;
56+
uint2 pixelCoord = (groupId.xy << 3) + uint2(groupThreadIndex & 0x7, groupThreadIndex >> 3);
5857

59-
if (dispatchThreadID.x < (uint)_ScreenSize.x && dispatchThreadID.y < (uint)_ScreenSize.y)
58+
if (pixelCoord.x < (uint)_ScreenSize.x && pixelCoord.y < (uint)_ScreenSize.y)
6059
{
6160
UNITY_UNROLL
6261
for (uint i = 0; i < NUM_SAMPLES; i++)
6362
{
6463
uint2 sampledStencil;
6564
#ifndef MSAA
66-
sampledStencil = LOAD_TEXTURE2D_X(_StencilTexture, dispatchThreadID.xy);
65+
sampledStencil = LOAD_TEXTURE2D_X(_StencilTexture, pixelCoord.xy);
6766
#else
68-
sampledStencil = LOAD_TEXTURE2D_X_MSAA(_StencilTexture, dispatchThreadID.xy, i);
67+
sampledStencil = LOAD_TEXTURE2D_X_MSAA(_StencilTexture, pixelCoord.xy, i);
6968
#endif
7069
resolvedStencil |= GetStencilValue(sampledStencil); // In not MSAA cases the | is the same as assigning given that NUM_SAMPLES is 1
7170
}
7271
}
7372
#if defined(RESOLVE) && defined(MSAA)
74-
_OutputStencilBuffer[COORD_TEXTURE2D_X(dispatchThreadID.xy)] = uint2(resolvedStencil, resolvedStencil);
73+
_OutputStencilBuffer[COORD_TEXTURE2D_X(pixelCoord.xy)] = uint2(resolvedStencil, resolvedStencil);
7574
#endif
7675

7776
#ifdef COARSE_STENCIL
7877

7978
#if USE_INTRINSICS
8079

8180
// Need to workaround a warning incorrectly triggered when on Xbox One, so instead of using WaveIsFirstLane()
82-
// we check the groupThreadId as in the non intrinsic version.
83-
//bool isFirstThread = WaveIsFirstLane();
84-
bool isFirstThread = groupThreadId.x == 0 && groupThreadId.y == 0;
81+
// we check the groupThreadIndex as in the non intrinsic version.
8582
uint coarseStencilValue = WaveActiveBitOr(resolvedStencil);
8683

87-
8884
#else
8985

90-
bool isFirstThread = groupThreadId.x == 0 && groupThreadId.y == 0;
91-
if (isFirstThread)
92-
{
86+
if (groupThreadIndex == 0)
9387
coarseStencilValue = 0;
94-
}
9588

9689
GroupMemoryBarrierWithGroupSync();
9790

@@ -101,11 +94,11 @@ void Main(uint3 groupId : SV_GroupID,
10194

10295
#endif
10396

104-
//This temp is needed outside the if(isFirstThread) condition to workaround a DXC DXIL codegen
97+
//This temp is needed outside the if(groupThreadIndex == 0) condition to workaround a DXC DXIL codegen
10598
// issue https://github.com/microsoft/DirectXShaderCompiler/issues/2743 until it's fixed
10699
uint perThreadCoarseStencilValue = coarseStencilValue;
107100

108-
if (isFirstThread)
101+
if (groupThreadIndex == 0)
109102
{
110103
uint addressIndex = Get1DAddressFromPixelCoord(groupId.xy, _CoarseStencilBufferSize.xy, groupId.z);
111104
_CoarseStencilBuffer[addressIndex] = perThreadCoarseStencilValue;

0 commit comments

Comments
 (0)