Skip to content

Commit c171091

Browse files
EvgeniiGEvergreen
authored andcommitted
[HDRP] Fix a performance regression introduced in PR 39296
Fix a performance regression introduced in https://github.cds.internal.unity3d.com/unity/unity/pull/39296. The fix introduces scalarization of area lighting code in forward lighting shaders. Both the issue and the fix are console-only.
1 parent 6bc923e commit c171091

File tree

1 file changed

+49
-6
lines changed
  • Packages/com.unity.render-pipelines.high-definition/Runtime/Lighting/LightLoop

1 file changed

+49
-6
lines changed

Packages/com.unity.render-pipelines.high-definition/Runtime/Lighting/LightLoop/LightLoop.hlsl

Lines changed: 49 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -639,7 +639,7 @@ void LightLoop( float3 V, PositionInputs posInput, PreLightData preLightData, BS
639639
#if SHADEROPTIONS_AREA_LIGHTS
640640
if (featureFlags & LIGHTFEATUREFLAGS_AREA)
641641
{
642-
uint lightCount, lightStart;
642+
uint lightCount, lightStart; // Start is the offset specific to the tile (or cluster)
643643

644644
#ifndef LIGHTLOOP_DISABLE_TILE_AND_CLUSTER
645645
GetCountAndStart(posInput, LIGHTCATEGORY_AREA, lightStart, lightCount);
@@ -648,14 +648,57 @@ void LightLoop( float3 V, PositionInputs posInput, PreLightData preLightData, BS
648648
lightStart = _PunctualLightCount;
649649
#endif
650650

651-
for (i = 0; i < lightCount; i++)
651+
bool fastPath = false;
652+
#if SCALARIZE_LIGHT_LOOP
653+
uint lightStartLane0;
654+
fastPath = IsFastPath(lightStart, lightStartLane0); // True if all pixels belong to the same tile (or cluster)
655+
656+
if (fastPath)
657+
{
658+
lightStart = lightStartLane0;
659+
}
660+
#endif
661+
662+
// Scalarized loop. All lights that are in a tile/cluster touched by any pixel in the wave are loaded (scalar load), only the one relevant to current thread/pixel are processed.
663+
// For clarity, the following code will follow the convention: variables starting with s_ are meant to be wave uniform (meant for scalar register),
664+
// v_ are variables that might have different value for each thread in the wave (meant for vector registers).
665+
// This will perform more loads than it is supposed to, however, the benefits should offset the downside, especially given that light data accessed should be largely coherent.
666+
// Note that the above is valid only if wave intriniscs are supported.
667+
uint v_lightListOffset = 0;
668+
uint v_lightIdx = lightStart;
669+
670+
#if NEED_TO_CHECK_HELPER_LANE
671+
// On some platform helper lanes don't behave as we'd expect, therefore we prevent them from entering the loop altogether.
672+
// IMPORTANT! This has implications if ddx/ddy is used on results derived from lighting, however given Lightloop is called in compute we should be
673+
// sure it will not happen.
674+
bool isHelperLane = WaveIsHelperLane();
675+
while (!isHelperLane && v_lightListOffset < lightCount)
676+
#else
677+
while (v_lightListOffset < lightCount)
678+
#endif
652679
{
653-
LightData lightData = FetchLight(lightStart, i);
680+
v_lightIdx = FetchIndex(lightStart, v_lightListOffset);
681+
#if SCALARIZE_LIGHT_LOOP
682+
uint s_lightIdx = ScalarizeElementIndex(v_lightIdx, fastPath);
683+
#else
684+
uint s_lightIdx = v_lightIdx;
685+
#endif
686+
if (s_lightIdx == -1)
687+
break;
654688

655-
if (IsMatchingLightLayer(lightData.lightLayers, builtinData.renderingLayers))
689+
LightData s_lightData = FetchLight(s_lightIdx);
690+
691+
// If current scalar and vector light index match, we process the light. The v_lightListOffset for current thread is increased.
692+
// Note that the following should really be ==, however, since helper lanes are not considered by WaveActiveMin, such helper lanes could
693+
// end up with a unique v_lightIdx value that is smaller than s_lightIdx hence being stuck in a loop. All the active lanes will not have this problem.
694+
if (s_lightIdx >= v_lightIdx)
656695
{
657-
DirectLighting lighting = EvaluateBSDF_Area(context, V, posInput, preLightData, lightData, bsdfData, builtinData);
658-
AccumulateDirectLighting(lighting, aggregateLighting);
696+
v_lightListOffset++;
697+
if (IsMatchingLightLayer(s_lightData.lightLayers, builtinData.renderingLayers))
698+
{
699+
DirectLighting lighting = EvaluateBSDF_Area(context, V, posInput, preLightData, s_lightData, bsdfData, builtinData);
700+
AccumulateDirectLighting(lighting, aggregateLighting);
701+
}
659702
}
660703
}
661704
}

0 commit comments

Comments
 (0)