Skip to content

Commit 102700e

Browse files
committed
GS/DX12/GL: Implement depth feedback with temp color target.
1 parent 387cb2f commit 102700e

File tree

19 files changed

+461
-178
lines changed

19 files changed

+461
-178
lines changed

bin/resources/shaders/dx11/convert.fx

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -214,6 +214,16 @@ float rgb5a1_to_depth16(float4 val)
214214
return float(((c.r & 0xF8u) >> 3) | ((c.g & 0xF8u) << 2) | ((c.b & 0xF8u) << 7) | ((c.a & 0x80u) << 8)) * exp2(-32.0f);
215215
}
216216

217+
float ps_convert_float32_depth_to_color(PS_INPUT input) : SV_Target0
218+
{
219+
return sample_c(input.t).r;
220+
}
221+
222+
float ps_convert_float32_color_to_depth(PS_INPUT input) : SV_Depth
223+
{
224+
return sample_c(input.t).r;
225+
}
226+
217227
float ps_convert_float32_float24(PS_INPUT input) : SV_Depth
218228
{
219229
// Truncates depth value to 24bits

bin/resources/shaders/dx11/tfx.fx

Lines changed: 14 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -153,19 +153,31 @@ struct PS_INPUT
153153

154154
struct PS_OUTPUT
155155
{
156+
#define NUM_RTS 0
156157
#if !PS_NO_COLOR
157158
#if PS_DATE == 1 || PS_DATE == 2
158159
float c : SV_Target;
159160
#else
160161
float4 c0 : SV_Target0;
162+
#undef NUM_RTS
163+
#define NUM_RTS 1
161164
#if !PS_NO_COLOR1
162165
float4 c1 : SV_Target1;
163166
#endif
164167
#endif
165168
#endif
166169
#if PS_ZCLAMP
167-
float depth : SV_Depth;
170+
#if PS_DEPTH_FEEDBACK && PS_NO_COLOR1 && DX12
171+
#if NUM_RTS > 0
172+
float depth : SV_Target1;
173+
#else
174+
float depth : SV_Target0;
175+
#endif
176+
#else
177+
float depth : SV_Depth;
178+
#endif
168179
#endif
180+
#undef NUM_RTS
169181
};
170182

171183
Texture2D<float4> Texture : register(t0);
@@ -1043,7 +1055,6 @@ void ps_blend(inout float4 Color, inout float4 As_rgba, float2 pos_xy)
10431055

10441056
PS_OUTPUT ps_main(PS_INPUT input)
10451057
{
1046-
10471058
#if PS_DEPTH_FEEDBACK && (PS_ZTST == ZTST_GEQUAL || PS_ZTST == ZTST_GREATER)
10481059
#if PS_ZTST == ZTST_GEQUAL
10491060
if (input.p.z < DepthTexture.Load(int3(input.p.xy, 0)).r)
@@ -1257,7 +1268,7 @@ PS_OUTPUT ps_main(PS_INPUT input)
12571268
#endif // PS_DATE != 1/2
12581269

12591270
#if PS_ZCLAMP
1260-
output.depth = min(input.p.z, MaxDepthPS);
1271+
output.depth = min(input.p.z, MaxDepthPS);
12611272
#endif
12621273

12631274
return output;

bin/resources/shaders/opengl/convert.glsl

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,8 @@ layout(binding = 0) uniform sampler2D TextureSampler;
4343
// Give a different name so I remember there is a special case!
4444
#if defined(ps_convert_rgba8_16bits) || defined(ps_convert_float32_32bits)
4545
layout(location = 0) out uint SV_Target1;
46+
#elif defined(ps_convert_float32_depth_to_color)
47+
layout(location = 0) out float SV_Target0;
4648
#else
4749
layout(location = 0) out vec4 SV_Target0;
4850
#endif
@@ -145,6 +147,20 @@ float rgb5a1_to_depth16(vec4 unorm)
145147
return float(((c.r & 0xF8u) >> 3) | ((c.g & 0xF8u) << 2) | ((c.b & 0xF8u) << 7) | ((c.a & 0x80u) << 8)) * exp2(-32.0f);
146148
}
147149

150+
#ifdef ps_convert_float32_depth_to_color
151+
void ps_convert_float32_depth_to_color()
152+
{
153+
SV_Target0 = sample_c().r;
154+
}
155+
#endif
156+
157+
#ifdef ps_convert_float32_color_to_depth
158+
void ps_convert_float32_color_to_depth()
159+
{
160+
gl_FragDepth = sample_c().r;
161+
}
162+
#endif
163+
148164
#ifdef ps_convert_float32_float24
149165
void ps_convert_float32_float24()
150166
{

bin/resources/shaders/opengl/tfx_fs.glsl

Lines changed: 19 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -121,6 +121,14 @@ in SHADER
121121
layout(location = 0) TARGET_0_QUALIFIER vec4 SV_Target0;
122122
#endif
123123

124+
#if NEEDS_DEPTH && PS_NO_COLOR1
125+
#if HAS_FRAMEBUFFER_FETCH
126+
layout(location = 1, index = 0) inout float SV_Target1;
127+
#else
128+
layout(location = 1, index = 0) out float SV_Target1;
129+
#endif
130+
#endif
131+
124132
#if NEEDS_TEX
125133
layout(binding = 0) uniform sampler2D TextureSampler;
126134
layout(binding = 1) uniform sampler2D PaletteSampler;
@@ -134,7 +142,7 @@ layout(binding = 2) uniform sampler2D RtSampler; // note 2 already use by the im
134142
layout(binding = 3) uniform sampler2D img_prim_min;
135143
#endif
136144

137-
#if NEEDS_DEPTH
145+
#if !HAS_FRAMEBUFFER_FETCH && NEEDS_DEPTH
138146
layout(binding = 4) uniform sampler2D DepthSampler;
139147
#endif
140148

@@ -153,6 +161,8 @@ vec4 sample_from_depth()
153161
{
154162
#if !NEEDS_DEPTH
155163
return vec4(0.0);
164+
#elif HAS_FRAMEBUFFER_FETCH
165+
return SV_Target1;
156166
#else
157167
return texelFetch(DepthSampler, ivec2(FragCoord.xy), 0);
158168
#endif
@@ -1218,6 +1228,13 @@ void ps_main()
12181228
#endif
12191229

12201230
#if PS_ZCLAMP
1221-
gl_FragDepth = min(FragCoord.z, MaxDepthPS);
1231+
FragCoord.z = min(FragCoord.z, MaxDepthPS);
1232+
#if NEEDS_DEPTH && PS_NO_COLOR1
1233+
// Warning: do not write SV_Target1 until the end since the value might be needed for
1234+
// FB fetch in sample_from_depth().
1235+
SV_Target1 = FragCoord.z;
1236+
#else
1237+
gl_FragDepth = FragCoord.z;
1238+
#endif
12221239
#endif
12231240
}

bin/resources/shaders/vulkan/convert.glsl

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,8 @@ layout(location = 0) in vec2 v_tex;
2222

2323
#if defined(ps_convert_rgba8_16bits) || defined(ps_convert_float32_32bits)
2424
layout(location = 0) out uint o_col0;
25+
#elif defined(ps_convert_float32_depth_to_color)
26+
layout(location = 0) out float o_col0;
2527
#elif !defined(ps_datm1) && \
2628
!defined(ps_datm0) && \
2729
!defined(ps_datm1_rta_correction) && \
@@ -166,6 +168,20 @@ void ps_colclip_resolve()
166168
}
167169
#endif
168170

171+
#ifdef ps_convert_float32_depth_to_color
172+
void ps_convert_float32_depth_to_color()
173+
{
174+
o_col0 = sample_c(v_tex).r;
175+
}
176+
#endif
177+
178+
#ifdef ps_convert_float32_color_to_depth
179+
void ps_convert_float32_color_to_depth()
180+
{
181+
gl_FragDepth = sample_c(v_tex).r;
182+
}
183+
#endif
184+
169185
#ifdef ps_convert_float32_32bits
170186
void ps_convert_float32_32bits()
171187
{

pcsx2/GS/Renderers/Common/GSDevice.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,8 @@ const char* shaderName(ShaderConvert value)
6464
case ShaderConvert::RGBA8_TO_FLOAT24_BILN: return "ps_convert_rgba8_float24_biln";
6565
case ShaderConvert::RGBA8_TO_FLOAT16_BILN: return "ps_convert_rgba8_float16_biln";
6666
case ShaderConvert::RGB5A1_TO_FLOAT16_BILN: return "ps_convert_rgb5a1_float16_biln";
67+
case ShaderConvert::FLOAT32_DEPTH_TO_COLOR: return "ps_convert_float32_depth_to_color";
68+
case ShaderConvert::FLOAT32_COLOR_TO_DEPTH: return "ps_convert_float32_color_to_depth";
6769
case ShaderConvert::FLOAT32_TO_FLOAT24: return "ps_convert_float32_float24";
6870
case ShaderConvert::DEPTH_COPY: return "ps_depth_copy";
6971
case ShaderConvert::DOWNSAMPLE_COPY: return "ps_downsample_copy";

pcsx2/GS/Renderers/Common/GSDevice.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,8 @@ enum class ShaderConvert
3939
RGBA8_TO_FLOAT24_BILN,
4040
RGBA8_TO_FLOAT16_BILN,
4141
RGB5A1_TO_FLOAT16_BILN,
42+
FLOAT32_DEPTH_TO_COLOR,
43+
FLOAT32_COLOR_TO_DEPTH,
4244
FLOAT32_TO_FLOAT24,
4345
DEPTH_COPY,
4446
DOWNSAMPLE_COPY,
@@ -101,6 +103,7 @@ static inline bool HasDepthOutput(ShaderConvert shader)
101103
case ShaderConvert::RGBA8_TO_FLOAT24_BILN:
102104
case ShaderConvert::RGBA8_TO_FLOAT16_BILN:
103105
case ShaderConvert::RGB5A1_TO_FLOAT16_BILN:
106+
case ShaderConvert::FLOAT32_COLOR_TO_DEPTH:
104107
case ShaderConvert::FLOAT32_TO_FLOAT24:
105108
case ShaderConvert::DEPTH_COPY:
106109
return true;
@@ -767,6 +770,7 @@ struct alignas(16) GSHWDrawConfig
767770
};
768771

769772
GSTexture* rt; ///< Render target
773+
GSTexture* ds_as_rt; ///< Depth stencil as color (if supported)
770774
GSTexture* ds; ///< Depth stencil
771775
GSTexture* tex; ///< Source texture
772776
GSTexture* pal; ///< Palette texture
@@ -896,6 +900,7 @@ class GSDevice : public GSAlignedClass<32>
896900
bool stencil_buffer : 1; ///< Supports stencil buffer, and can use for DATE.
897901
bool cas_sharpening : 1; ///< Supports sufficient functionality for contrast adaptive sharpening.
898902
bool test_and_sample_depth: 1; ///< Supports concurrently binding the depth-stencil buffer for sampling and depth testing.
903+
bool depth_as_rt_feedback : 1; ///< Depth feedback loops/barriers by converting depth to a temporary color target.
899904
FeatureSupport()
900905
{
901906
memset(this, 0, sizeof(*this));

pcsx2/GS/Renderers/Common/GSTexture.cpp

Lines changed: 56 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@ bool GSTexture::Save(const std::string& fn)
2020
{
2121
// Depth textures need special treatment - we have a stencil component.
2222
// Just re-use the existing conversion shader instead.
23-
if (m_format == Format::DepthStencil)
23+
if (m_format == Format::DepthStencil || m_format == Format::Float32)
2424
{
2525
GSTexture* temp = g_gs_device->CreateRenderTarget(GetWidth(), GetHeight(), Format::Color, false);
2626
if (!temp)
@@ -63,23 +63,40 @@ bool GSTexture::Save(const std::string& fn)
6363

6464
const char* GSTexture::GetFormatName(Format format)
6565
{
66-
static constexpr const char* format_names[] = {
67-
"Invalid",
68-
"Color",
69-
"ColorHQ",
70-
"ColorHDR",
71-
"ColorClip",
72-
"DepthStencil",
73-
"UNorm8",
74-
"UInt16",
75-
"UInt32",
76-
"PrimID",
77-
"BC1",
78-
"BC2",
79-
"BC3",
80-
"BC7",
81-
};
82-
return format_names[(static_cast<u32>(format) < std::size(format_names)) ? static_cast<u32>(format) : 0];
66+
switch (format)
67+
{
68+
default:
69+
pxFailRel("Invalid texture format");
70+
case Format::Invalid: return "Invalid";
71+
case Format::Color: return "Color";
72+
case Format::ColorHQ: return "ColorHQ";
73+
case Format::ColorHDR: return "ColorHDR";
74+
case Format::ColorClip: return "ColorClip";
75+
case Format::DepthStencil: return "DepthStencil";
76+
case Format::Float32: return "Float32";
77+
case Format::UNorm8: return "UNorm8";
78+
case Format::UInt16: return "UInt16";
79+
case Format::UInt32: return "UInt32";
80+
case Format::PrimID: return "PrimID";
81+
case Format::BC1: return "BC1";
82+
case Format::BC2: return "BC2";
83+
case Format::BC3: return "BC3";
84+
case Format::BC7: return "BC7";
85+
}
86+
}
87+
88+
bool GSTexture::IsBlockCompressedFormat(Format format)
89+
{
90+
switch (format)
91+
{
92+
case Format::BC1:
93+
case Format::BC2:
94+
case Format::BC3:
95+
case Format::BC7:
96+
return true;
97+
default:
98+
return false;
99+
}
83100
}
84101

85102
u32 GSTexture::GetCompressedBytesPerBlock() const
@@ -89,24 +106,26 @@ u32 GSTexture::GetCompressedBytesPerBlock() const
89106

90107
u32 GSTexture::GetCompressedBytesPerBlock(Format format)
91108
{
92-
static constexpr u32 bytes_per_block[] = {
93-
1, // Invalid
94-
4, // Color/RGBA8
95-
4, // ColorHQ/RGB10A2
96-
8, // ColorHDR/RGBA16F
97-
8, // ColorClip/RGBA16
98-
4, // DepthStencil
99-
1, // UNorm8/R8
100-
2, // UInt16/R16UI
101-
4, // UInt32/R32UI
102-
4, // Int32/R32I
103-
8, // BC1 - 16 pixels in 64 bits
104-
16, // BC2 - 16 pixels in 128 bits
105-
16, // BC3 - 16 pixels in 128 bits
106-
16, // BC7 - 16 pixels in 128 bits
107-
};
108-
109-
return bytes_per_block[static_cast<u32>(format)];
109+
switch (format)
110+
{
111+
default:
112+
pxFailRel("Invalid texture format");
113+
case Format::Invalid: return 1; // Invalid
114+
case Format::Color: return 4; // Color/RGBA8
115+
case Format::ColorHQ: return 4; // ColorHQ/RGB10A2
116+
case Format::ColorHDR: return 8; // ColorHDR/RGBA16F
117+
case Format::ColorClip: return 8; // ColorClip/RGBA16
118+
case Format::DepthStencil: return 4; // DepthStencil
119+
case Format::Float32: return 4; // Float32/R32
120+
case Format::UNorm8: return 1; // UNorm8/R8
121+
case Format::UInt16: return 2; // UInt16/R16UI
122+
case Format::UInt32: return 4; // UInt32/R32UI
123+
case Format::PrimID: return 4; // Int32/R32I
124+
case Format::BC1: return 8; // BC1 - 16 pixels in 64 bits
125+
case Format::BC2: return 16; // BC2 - 16 pixels in 128 bits
126+
case Format::BC3: return 16; // BC3 - 16 pixels in 128 bits
127+
case Format::BC7: return 16; // BC7 - 16 pixels in 128 bits
128+
}
110129
}
111130

112131
u32 GSTexture::GetCompressedBlockSize() const
@@ -116,10 +135,7 @@ u32 GSTexture::GetCompressedBlockSize() const
116135

117136
u32 GSTexture::GetCompressedBlockSize(Format format)
118137
{
119-
if (format >= Format::BC1 && format <= Format::BC7)
120-
return 4;
121-
else
122-
return 1;
138+
return IsBlockCompressedFormat(format) ? 4 : 1;
123139
}
124140

125141
u32 GSTexture::CalcUploadPitch(Format format, u32 width)

pcsx2/GS/Renderers/Common/GSTexture.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,7 @@ class GSTexture
3434
ColorHDR, ///< High dynamic range (RGBA16F) color texture
3535
ColorClip, ///< Color texture with more bits for colclip (wrap) emulation, given that blending requires 9bpc (RGBA16Unorm)
3636
DepthStencil, ///< Depth stencil texture
37+
Float32, ///< For treating depth texture as RT
3738
UNorm8, ///< A8UNorm texture for paletted textures and the OSD font
3839
UInt16, ///< UInt16 texture for reading back 16-bit depth
3940
UInt32, ///< UInt32 texture for reading back 24 and 32-bit depth
@@ -103,6 +104,7 @@ class GSTexture
103104
__fi bool IsCompressedFormat() const { return IsCompressedFormat(m_format); }
104105

105106
static const char* GetFormatName(Format format);
107+
static bool IsBlockCompressedFormat(Format format);
106108
static u32 GetCompressedBytesPerBlock(Format format);
107109
static u32 GetCompressedBlockSize(Format format);
108110
static u32 CalcUploadPitch(Format format, u32 width);

0 commit comments

Comments
 (0)