Skip to content

Commit ff3a8f2

Browse files
committed
GS/VK/GL/DX12/DX11: Depth feedback loops and accurate AFAIL.
1 parent f322dfb commit ff3a8f2

File tree

18 files changed

+700
-256
lines changed

18 files changed

+700
-256
lines changed

bin/resources/shaders/dx11/tfx.fx

Lines changed: 55 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,18 @@
2121
#define GS_FORWARD_PRIMID 0
2222
#endif
2323

24+
#ifndef ZTST_GEQUAL
25+
#define ZTST_GEQUAL 2
26+
#define ZTST_GREATER 3
27+
#endif
28+
29+
#ifndef AFAIL_KEEP
30+
#define AFAIL_KEEP 0
31+
#define AFAIL_FB_ONLY 1
32+
#define AFAIL_ZB_ONLY 2
33+
#define AFAIL_RGB_ONLY 3
34+
#endif
35+
2436
#ifndef PS_FST
2537
#define PS_IIP 0
2638
#define PS_FST 0
@@ -78,12 +90,16 @@
7890
#define PS_NO_COLOR 0
7991
#define PS_NO_COLOR1 0
8092
#define PS_DATE 0
93+
#define PS_TEX_IS_FB 0
94+
#define PS_COLOR_FEEDBACK 0
95+
#define PS_DEPTH_FEEDBACK 0
8196
#endif
8297

8398
#define SW_BLEND (PS_BLEND_A || PS_BLEND_B || PS_BLEND_D)
8499
#define SW_BLEND_NEEDS_RT (SW_BLEND && (PS_BLEND_A == 1 || PS_BLEND_B == 1 || PS_BLEND_C == 1 || PS_BLEND_D == 1))
85100
#define SW_AD_TO_HW (PS_BLEND_C == 1 && PS_A_MASKED)
86-
#define NEEDS_RT_FOR_AFAIL (PS_AFAIL == 3 && PS_NO_COLOR1)
101+
#define AFAIL_NEEDS_RT (PS_AFAIL == AFAIL_ZB_ONLY || (PS_AFAIL == AFAIL_RGB_ONLY && PS_NO_COLOR1))
102+
#define AFAIL_NEEDS_DEPTH (PS_AFAIL == AFAIL_FB_ONLY || PS_AFAIL == AFAIL_RGB_ONLY)
87103

88104
struct VS_INPUT
89105
{
@@ -138,7 +154,7 @@ struct PS_OUTPUT
138154
#endif
139155
#endif
140156
#endif
141-
#if PS_ZCLAMP
157+
#if PS_ZCLAMP || (PS_DEPTH_FEEDBACK && AFAIL_NEEDS_DEPTH)
142158
float depth : SV_Depth;
143159
#endif
144160
};
@@ -147,6 +163,7 @@ Texture2D<float4> Texture : register(t0);
147163
Texture2D<float4> Palette : register(t1);
148164
Texture2D<float4> RtTexture : register(t2);
149165
Texture2D<float> PrimMinTexture : register(t3);
166+
Texture2D<float> DepthTexture : register(t4);
150167
SamplerState TextureSampler : register(s0);
151168

152169
#ifdef DX12
@@ -1017,10 +1034,27 @@ void ps_blend(inout float4 Color, inout float4 As_rgba, float2 pos_xy)
10171034

10181035
PS_OUTPUT ps_main(PS_INPUT input)
10191036
{
1037+
1038+
#if PS_DEPTH_FEEDBACK && (PS_ZTST == ZTST_GEQUAL || PS_ZTST == ZTST_GREATER)
1039+
#if PS_ZTST == ZTST_GEQUAL
1040+
if (input.p.z < DepthTexture.Load(int3(input.p.xy, 0)).r)
1041+
discard;
1042+
#elif PS_ZTST == ZTST_GREATER
1043+
if (input.p.z <= DepthTexture.Load(int3(input.p.xy, 0)).r)
1044+
discard;
1045+
#endif
1046+
#endif // PS_ZTST
1047+
10201048
float4 C = ps_color(input);
1049+
1050+
#if PS_FIXED_ONE_A
1051+
// AA (Fixed one) will output a coverage of 1.0 as alpha
1052+
C.a = 128.0f;
1053+
#endif
1054+
10211055
bool atst_pass = atst(C);
10221056

1023-
#if PS_AFAIL == 0 // KEEP or ATST off
1057+
#if PS_AFAIL == AFAIL_KEEP
10241058
if (!atst_pass)
10251059
discard;
10261060
#endif
@@ -1034,14 +1068,6 @@ PS_OUTPUT ps_main(PS_INPUT input)
10341068
discard;
10351069
}
10361070

1037-
// Must be done before alpha correction
1038-
1039-
// AA (Fixed one) will output a coverage of 1.0 as alpha
1040-
if (PS_FIXED_ONE_A)
1041-
{
1042-
C.a = 128.0f;
1043-
}
1044-
10451071
float4 alpha_blend = (float4)0.0f;
10461072
if (SW_AD_TO_HW)
10471073
{
@@ -1186,7 +1212,7 @@ PS_OUTPUT ps_main(PS_INPUT input)
11861212

11871213
ps_fbmask(C, input.p.xy);
11881214

1189-
#if PS_AFAIL == 3 && !PS_NO_COLOR1 // RGB_ONLY
1215+
#if (PS_AFAIL == AFAIL_RGB_ONLY) && !PS_NO_COLOR1
11901216
// Use alpha blend factor to determine whether to update A.
11911217
alpha_blend.a = float(atst_pass);
11921218
#endif
@@ -1197,11 +1223,23 @@ PS_OUTPUT ps_main(PS_INPUT input)
11971223
#if !PS_NO_COLOR1
11981224
output.c1 = alpha_blend;
11991225
#endif
1200-
#if PS_AFAIL == 3 && PS_NO_COLOR1 // RGB_ONLY, no dual src blend
1226+
1227+
// Alpha test with feedback
1228+
#if (PS_AFAIL == AFAIL_FB_ONLY) && PS_DEPTH_FEEDBACK
1229+
if (!atst_pass)
1230+
input.p.z = DepthTexture.Load(int3(input.p.xy, 0)).r;
1231+
#elif (PS_AFAIL == AFAIL_ZB_ONLY) && PS_COLOR_FEEDBACK
1232+
if (!atst_pass)
1233+
output.c0 = RtTexture.Load(int3(input.p.xy, 0));
1234+
#elif (PS_AFAIL == AFAIL_RGB_ONLY)
12011235
if (!atst_pass)
12021236
{
1203-
float RTa = NEEDS_RT_FOR_AFAIL ? RtTexture.Load(int3(input.p.xy, 0)).a : 0.0f;
1204-
output.c0.a = RTa;
1237+
#if PS_COLOR_FEEDBACK && PS_NO_COLOR1 // No dual src blend
1238+
output.c0.a = RtTexture.Load(int3(input.p.xy, 0)).a;
1239+
#endif
1240+
#if PS_DEPTH_FEEDBACK
1241+
input.p.z = DepthTexture.Load(int3(input.p.xy, 0)).r;
1242+
#endif
12051243
}
12061244
#endif
12071245

@@ -1211,6 +1249,8 @@ PS_OUTPUT ps_main(PS_INPUT input)
12111249

12121250
#if PS_ZCLAMP
12131251
output.depth = min(input.p.z, MaxDepthPS);
1252+
#elif PS_DEPTH_FEEDBACK && AFAIL_NEEDS_DEPTH
1253+
output.depth = input.p.z; // Output depth value for ATST pass/fail
12141254
#endif
12151255

12161256
return output;

bin/resources/shaders/opengl/tfx_fs.glsl

Lines changed: 77 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,18 @@
1111
#define SHUFFLE_WRITE 2
1212
#define SHUFFLE_READWRITE 3
1313

14+
#ifndef ZTST_GEQUAL
15+
#define ZTST_GEQUAL 2
16+
#define ZTST_GREATER 3
17+
#endif
18+
19+
#ifndef AFAIL_KEEP
20+
#define AFAIL_KEEP 0
21+
#define AFAIL_FB_ONLY 1
22+
#define AFAIL_ZB_ONLY 2
23+
#define AFAIL_RGB_ONLY 3
24+
#endif
25+
1426
// TEX_COORD_DEBUG output the uv coordinate as color. It is useful
1527
// to detect bad sampling due to upscaling
1628
//#define TEX_COORD_DEBUG
@@ -25,9 +37,13 @@
2537
#define SW_AD_TO_HW (PS_BLEND_C == 1 && PS_A_MASKED)
2638
#define PS_PRIMID_INIT (PS_DATE == 1 || PS_DATE == 2)
2739
#define NEEDS_RT_EARLY (PS_TEX_IS_FB == 1 || PS_DATE >= 5)
28-
#define NEEDS_RT_FOR_AFAIL (PS_AFAIL == 3 && PS_NO_COLOR1)
29-
#define NEEDS_RT (NEEDS_RT_EARLY || NEEDS_RT_FOR_AFAIL || (!PS_PRIMID_INIT && (PS_FBMASK || SW_BLEND_NEEDS_RT || SW_AD_TO_HW)))
40+
#define NEEDS_RT_FOR_AFAIL (PS_AFAIL == PS_ZB_ONLY || (PS_AFAIL == AFAIL_RGB_ONLY && PS_NO_COLOR1))
41+
#define NEEDS_DEPTH_FOR_AFAIL (PS_AFAIL == AFAIL_FB_ONLY || PS_AFAIL == AFAIL_RGB_ONLY)
42+
#define NEEDS_RT (NEEDS_RT_EARLY || NEEDS_RT_FOR_AFAIL || (!PS_PRIMID_INIT && (PS_FBMASK || SW_BLEND_NEEDS_RT || SW_AD_TO_HW)) || PS_COLOR_FEEDBACK)
3043
#define NEEDS_TEX (PS_TFX != 4)
44+
#define NEEDS_DEPTH (PS_DEPTH_FEEDBACK && NEEDS_DEPTH_FOR_AFAIL)
45+
46+
vec4 FragCoord;
3147

3248
layout(std140, binding = 0) uniform cb21
3349
{
@@ -107,9 +123,10 @@ layout(binding = 2) uniform sampler2D RtSampler; // note 2 already use by the im
107123

108124
#if PS_DATE == 3
109125
layout(binding = 3) uniform sampler2D img_prim_min;
126+
#endif
110127

111-
// I don't remember why I set this parameter but it is surely useless
112-
//layout(pixel_center_integer) in vec4 gl_FragCoord;
128+
#if NEEDS_DEPTH
129+
layout(binding = 4) uniform sampler2D DepthSampler;
113130
#endif
114131

115132
vec4 sample_from_rt()
@@ -119,7 +136,16 @@ vec4 sample_from_rt()
119136
#elif HAS_FRAMEBUFFER_FETCH
120137
return LAST_FRAG_COLOR;
121138
#else
122-
return texelFetch(RtSampler, ivec2(gl_FragCoord.xy), 0);
139+
return texelFetch(RtSampler, ivec2(FragCoord.xy), 0);
140+
#endif
141+
}
142+
143+
vec4 sample_from_depth()
144+
{
145+
#if !NEEDS_DEPTH
146+
return vec4(0.0);
147+
#else
148+
return texelFetch(DepthSampler, ivec2(FragCoord.xy), 0);
123149
#endif
124150
}
125151

@@ -315,7 +341,7 @@ int fetch_raw_depth()
315341
#if PS_TEX_IS_FB == 1
316342
return int(sample_from_rt().r * multiplier);
317343
#else
318-
return int(texelFetch(TextureSampler, ivec2(gl_FragCoord.xy), 0).r * multiplier);
344+
return int(texelFetch(TextureSampler, ivec2(FragCoord.xy), 0).r * multiplier);
319345
#endif
320346
}
321347

@@ -324,7 +350,7 @@ vec4 fetch_raw_color()
324350
#if PS_TEX_IS_FB == 1
325351
return sample_from_rt();
326352
#else
327-
return texelFetch(TextureSampler, ivec2(gl_FragCoord.xy), 0);
353+
return texelFetch(TextureSampler, ivec2(FragCoord.xy), 0);
328354
#endif
329355
}
330356

@@ -724,9 +750,9 @@ void ps_dither(inout vec3 C, float As)
724750
{
725751
#if PS_DITHER > 0 && PS_DITHER < 3
726752
#if PS_DITHER == 2
727-
ivec2 fpos = ivec2(gl_FragCoord.xy);
753+
ivec2 fpos = ivec2(FragCoord.xy);
728754
#else
729-
ivec2 fpos = ivec2(gl_FragCoord.xy * RcpScaleFactor);
755+
ivec2 fpos = ivec2(FragCoord.xy * RcpScaleFactor);
730756
#endif
731757
float value = DitherMatrix[fpos.y&3][fpos.x&3];
732758

@@ -969,9 +995,21 @@ float As = As_rgba.a;
969995

970996
void ps_main()
971997
{
998+
FragCoord = gl_FragCoord;
999+
1000+
#if NEEDS_DEPTH && (PS_ZTST == ZTST_GEQUAL || PS_ZTST == ZTST_GREATER)
1001+
#if PS_ZTST == ZTST_GEQUAL
1002+
if (FragCoord.z < sample_from_depth().r)
1003+
discard;
1004+
#elif PS_ZTST == ZTST_GREATER
1005+
if (FragCoord.z <= sample_from_depth().r)
1006+
discard;
1007+
#endif
1008+
#endif // PS_ZTST
1009+
9721010
#if PS_SCANMSK & 2
9731011
// fail depth test on prohibited lines
974-
if ((int(gl_FragCoord.y) & 1) == (PS_SCANMSK & 1))
1012+
if ((int(FragCoord.y) & 1) == (PS_SCANMSK & 1))
9751013
discard;
9761014
#endif
9771015

@@ -1007,7 +1045,7 @@ void ps_main()
10071045
#endif
10081046

10091047
#if PS_DATE == 3
1010-
int stencil_ceil = int(texelFetch(img_prim_min, ivec2(gl_FragCoord.xy), 0).r);
1048+
int stencil_ceil = int(texelFetch(img_prim_min, ivec2(FragCoord.xy), 0).r);
10111049
// Note gl_PrimitiveID == stencil_ceil will be the primitive that will update
10121050
// the bad alpha value so we must keep it.
10131051

@@ -1017,18 +1055,17 @@ void ps_main()
10171055
#endif
10181056

10191057
vec4 C = ps_color();
1020-
bool atst_pass = atst(C);
10211058

1022-
#if PS_AFAIL == 0 // KEEP or ATST off
1023-
if (!atst_pass)
1024-
discard;
1059+
#if PS_FIXED_ONE_A
1060+
// AA (Fixed one) will output a coverage of 1.0 as alpha
1061+
C.a = 128.0f;
10251062
#endif
10261063

1027-
// Must be done before alpha correction
1064+
bool atst_pass = atst(C);
10281065

1029-
// AA (Fixed one) will output a coverage of 1.0 as alpha
1030-
#if PS_FIXED_ONE_A
1031-
C.a = 128.0f;
1066+
#if PS_AFAIL == AFAIL_KEEP
1067+
if (!atst_pass)
1068+
discard;
10321069
#endif
10331070

10341071
#if SW_AD_TO_HW
@@ -1066,7 +1103,6 @@ void ps_main()
10661103

10671104
ps_blend(C, alpha_blend);
10681105

1069-
10701106
#if PS_SHUFFLE
10711107
#if !PS_READ16_SRC && !PS_SHUFFLE_SAME && !(PS_PROCESS_BA == SHUFFLE_READWRITE && PS_PROCESS_RG == SHUFFLE_READWRITE)
10721108
uvec4 denorm_c_after = uvec4(C);
@@ -1118,7 +1154,7 @@ void ps_main()
11181154

11191155
ps_fbmask(C);
11201156

1121-
#if PS_AFAIL == 3 && !PS_NO_COLOR1 // RGB_ONLY
1157+
#if PS_AFAIL == AFAIL_RGB && !PS_NO_COLOR1
11221158
// Use alpha blend factor to determine whether to update A.
11231159
alpha_blend.a = float(atst_pass);
11241160
#endif
@@ -1134,16 +1170,34 @@ void ps_main()
11341170
#else
11351171
SV_Target0.rgb = C.rgb / 255.0f;
11361172
#endif
1137-
#if PS_AFAIL == 3 && PS_NO_COLOR1 // RGB_ONLY, no dual src blend
1173+
1174+
// Alpha test with feedback
1175+
#if (PS_AFAIL == AFAIL_FB_ONLY) && NEEDS_DEPTH
1176+
if (!atst_pass)
1177+
FragCoord.z = sample_from_depth().r;
1178+
#elif (PS_AFAIL == AFAIL_ZB_ONLY) && NEEDS_RT
11381179
if (!atst_pass)
1180+
SV_Target0 = sample_from_rt();
1181+
#elif (PS_AFAIL == AFAIL_RGB_ONLY)
1182+
if (!atst_pass)
1183+
{
1184+
#if NEEDS_RT && PS_NO_COLOR1 // No dual src blend
11391185
SV_Target0.a = sample_from_rt().a;
1186+
#endif
1187+
#if NEEDS_DEPTH
1188+
FragCoord.z = sample_from_depth().r;
1189+
#endif
1190+
}
11401191
#endif
1192+
11411193
#if !PS_NO_COLOR1
11421194
SV_Target1 = alpha_blend;
11431195
#endif
11441196
#endif
11451197

11461198
#if PS_ZCLAMP
1147-
gl_FragDepth = min(gl_FragCoord.z, MaxDepthPS);
1199+
gl_FragDepth = min(FragCoord.z, MaxDepthPS);
1200+
#elif NEEDS_DEPTH && AFAIL_NEEDS_DEPTH
1201+
gl_FragDepth = FragCoord.z; // Output depth value for ATST pass/fail
11481202
#endif
11491203
}

0 commit comments

Comments
 (0)