Skip to content

Commit c1bf4d5

Browse files
GS/GL: Backport multidraw fb copy from DX11.
Will be used when GL_ARB_texture_barrier isn't supported. Can also be used for Depth feedback loops.
1 parent eab8b92 commit c1bf4d5

File tree

2 files changed

+56
-52
lines changed

2 files changed

+56
-52
lines changed

pcsx2/GS/Renderers/OpenGL/GSDeviceOGL.cpp

Lines changed: 55 additions & 51 deletions
Original file line numberDiff line numberDiff line change
@@ -752,8 +752,10 @@ bool GSDeviceOGL::CheckFeatures()
752752
if (!GLAD_GL_ARB_texture_barrier)
753753
{
754754
glTextureBarrier = ReplaceGL::TextureBarrier;
755+
// Switch to fallback.
756+
m_features.multidraw_fb_copy = true;
755757
Host::AddOSDMessage(
756-
"GL_ARB_texture_barrier is not supported, blending will not be accurate.", Host::OSD_ERROR_DURATION);
758+
"GL_ARB_texture_barrier is not supported, blending will be slower.", Host::OSD_ERROR_DURATION);
757759
}
758760

759761
if (!GLAD_GL_ARB_direct_state_access)
@@ -787,18 +789,20 @@ bool GSDeviceOGL::CheckFeatures()
787789
}
788790

789791
if (GSConfig.OverrideTextureBarriers == 0)
792+
{
790793
m_features.texture_barrier = m_features.framebuffer_fetch; // Force Disabled
794+
m_features.multidraw_fb_copy = false;
795+
Host::AddOSDMessage(
796+
"Texture Barrier is disabled, blending will not be accurate.", Host::OSD_ERROR_DURATION);
797+
}
791798
else if (GSConfig.OverrideTextureBarriers == 1)
799+
{
792800
m_features.texture_barrier = true; // Force Enabled
801+
m_features.multidraw_fb_copy = false;
802+
}
793803
else
794804
m_features.texture_barrier = m_features.framebuffer_fetch || GLAD_GL_ARB_texture_barrier;
795-
if (!m_features.texture_barrier)
796-
{
797-
Host::AddOSDMessage(
798-
"GL_ARB_texture_barrier is not supported, blending will not be accurate.", Host::OSD_ERROR_DURATION);
799-
}
800805

801-
m_features.multidraw_fb_copy = false;
802806
m_features.provoking_vertex_last = true;
803807
m_features.dxt_textures = GLAD_GL_EXT_texture_compression_s3tc;
804808
m_features.bptc_textures =
@@ -2640,6 +2644,7 @@ void GSDeviceOGL::RenderHW(GSHWDrawConfig& config)
26402644
}
26412645

26422646
// Destination Alpha Setup
2647+
const bool multidraw_fb_copy = m_features.multidraw_fb_copy && (config.require_one_barrier || config.require_full_barrier);
26432648
switch (config.destination_alpha)
26442649
{
26452650
case GSHWDrawConfig::DestinationAlphaMode::Off:
@@ -2654,7 +2659,7 @@ void GSDeviceOGL::RenderHW(GSHWDrawConfig& config)
26542659
}
26552660
break;
26562661
case GSHWDrawConfig::DestinationAlphaMode::StencilOne:
2657-
if (m_features.texture_barrier)
2662+
if (m_features.texture_barrier || multidraw_fb_copy)
26582663
{
26592664
// Cleared after RT bind.
26602665
break;
@@ -2833,23 +2838,12 @@ void GSDeviceOGL::RenderHW(GSHWDrawConfig& config)
28332838
glTextureBarrier();
28342839
}
28352840

2836-
if (draw_rt && (config.require_one_barrier || (config.tex && config.tex == config.rt)) && !m_features.texture_barrier)
2841+
if (draw_rt && (config.require_one_barrier || (config.require_full_barrier && m_features.multidraw_fb_copy) || (config.tex && config.tex == config.rt)) &&
2842+
!m_features.texture_barrier)
28372843
{
28382844
// Requires a copy of the RT.
28392845
draw_rt_clone = CreateTexture(rtsize.x, rtsize.y, 1, draw_rt->GetFormat(), true);
2840-
if (draw_rt_clone)
2841-
{
2842-
GL_PUSH("GL: Copy RT to temp texture {%d,%d %dx%d}",
2843-
config.drawarea.left, config.drawarea.top,
2844-
config.drawarea.width(), config.drawarea.height());
2845-
const GSVector4i snapped_drawarea = ProcessCopyArea(GSVector4i(0, 0, rtsize.x, rtsize.y), config.drawarea);
2846-
CopyRect(draw_rt, draw_rt_clone, snapped_drawarea, snapped_drawarea.left, snapped_drawarea.top);
2847-
if (config.require_one_barrier)
2848-
PSSetShaderResource(2, draw_rt_clone);
2849-
if (config.tex && config.tex == config.rt)
2850-
PSSetShaderResource(0, draw_rt_clone);
2851-
}
2852-
else
2846+
if (!draw_rt_clone)
28532847
Console.Warning("GL: Failed to allocate temp texture for RT copy.");
28542848
}
28552849

@@ -2858,13 +2852,13 @@ void GSDeviceOGL::RenderHW(GSHWDrawConfig& config)
28582852
SetupOM(config.depth);
28592853

28602854
// Clear stencil as close as possible to the RT bind, to avoid framebuffer swaps.
2861-
if (config.destination_alpha == GSHWDrawConfig::DestinationAlphaMode::StencilOne && m_features.texture_barrier)
2855+
if (config.destination_alpha == GSHWDrawConfig::DestinationAlphaMode::StencilOne && (m_features.texture_barrier || multidraw_fb_copy))
28622856
{
28632857
constexpr GLint clear_color = 1;
28642858
glClearBufferiv(GL_STENCIL, 0, &clear_color);
28652859
}
28662860

2867-
SendHWDraw(config, config.require_one_barrier, config.require_full_barrier);
2861+
SendHWDraw(config, draw_rt_clone, draw_rt, config.require_one_barrier, config.require_full_barrier);
28682862

28692863
if (config.blend_multi_pass.enable)
28702864
{
@@ -2911,7 +2905,8 @@ void GSDeviceOGL::RenderHW(GSHWDrawConfig& config)
29112905
OMSetBlendState();
29122906
}
29132907
SetupOM(config.alpha_second_pass.depth);
2914-
SendHWDraw(config, config.alpha_second_pass.require_one_barrier, config.alpha_second_pass.require_full_barrier);
2908+
SendHWDraw(config, draw_rt_clone, draw_rt, m_features.texture_barrier ? config.alpha_second_pass.require_one_barrier : false,
2909+
config.alpha_second_pass.require_full_barrier);
29152910
}
29162911

29172912
if (primid_texture)
@@ -2936,47 +2931,48 @@ void GSDeviceOGL::RenderHW(GSHWDrawConfig& config)
29362931
}
29372932
}
29382933

2939-
void GSDeviceOGL::SendHWDraw(const GSHWDrawConfig& config, bool one_barrier, bool full_barrier)
2934+
void GSDeviceOGL::SendHWDraw(const GSHWDrawConfig& config, GSTexture* draw_rt_clone, GSTexture* draw_rt, bool one_barrier, bool full_barrier)
29402935
{
2941-
if (!m_features.texture_barrier) [[unlikely]]
2942-
{
2943-
DrawIndexedPrimitive();
2944-
return;
2945-
}
2946-
29472936
#ifdef PCSX2_DEVBUILD
29482937
if ((one_barrier || full_barrier) && !(config.ps.IsFeedbackLoopRT() || config.ps.IsFeedbackLoopDepth())) [[unlikely]]
29492938
Console.Warning("OpenGL: Possible unnecessary barrier detected.");
29502939
#endif
29512940

2941+
auto CopyAndBind = [&](GSVector4i drawarea) {
2942+
if (draw_rt_clone)
2943+
CopyRect(draw_rt, draw_rt_clone, drawarea, drawarea.left, drawarea.top);
2944+
if ((one_barrier || full_barrier) && draw_rt_clone)
2945+
PSSetShaderResource(2, draw_rt_clone);
2946+
if (config.tex && config.tex == draw_rt)
2947+
PSSetShaderResource(0, draw_rt_clone);
2948+
};
2949+
2950+
const GSVector4i rtsize(0, 0, (draw_rt ? draw_rt : draw_ds)->GetWidth(), (draw_rt ? draw_rt : draw_ds)->GetHeight());
2951+
29522952
if (full_barrier)
29532953
{
29542954
pxAssert(config.drawlist && !config.drawlist->empty());
29552955

2956-
GL_PUSH("Split the draw");
2957-
#if defined(_DEBUG)
2958-
// Check how draw call is split.
2959-
std::map<size_t, size_t> frequency;
2960-
for (const auto& it : *config.drawlist)
2961-
++frequency[it];
2962-
2963-
std::string message;
2964-
for (const auto& it : frequency)
2965-
message += " " + std::to_string(it.first) + "(" + std::to_string(it.second) + ")";
2966-
2967-
GL_PERF("Split single draw (%d primitives) into %zu draws: consecutive draws(frequency):%s",
2968-
config.nindices / config.indices_per_prim, config.drawlist->size(), message.c_str());
2969-
#endif
2970-
29712956
const u32 indices_per_prim = config.indices_per_prim;
29722957
const u32 draw_list_size = static_cast<u32>(config.drawlist->size());
29732958

2974-
g_perfmon.Put(GSPerfMon::Barriers, static_cast<u32>(draw_list_size));
2959+
if (m_features.texture_barrier)
2960+
g_perfmon.Put(GSPerfMon::Barriers, static_cast<u32>(draw_list_size));
2961+
else
2962+
pxAssert(config.drawlist_bbox && static_cast<u32>(config.drawlist_bbox->size()) == draw_list_size);
29752963

29762964
for (u32 n = 0, p = 0; n < draw_list_size; n++)
29772965
{
29782966
const u32 count = (*config.drawlist)[n] * indices_per_prim;
2979-
glTextureBarrier();
2967+
2968+
if (m_features.texture_barrier)
2969+
glTextureBarrier();
2970+
else
2971+
{
2972+
const GSVector4i original_bbox = (*config.drawlist_bbox)[n].rintersect(config.drawarea);
2973+
CopyAndBind(ProcessCopyArea(rtsize, original_bbox));
2974+
}
2975+
29802976
DrawIndexedPrimitive(p, count);
29812977
p += count;
29822978
}
@@ -2986,8 +2982,16 @@ void GSDeviceOGL::SendHWDraw(const GSHWDrawConfig& config, bool one_barrier, boo
29862982

29872983
if (one_barrier)
29882984
{
2989-
g_perfmon.Put(GSPerfMon::Barriers, 1);
2990-
glTextureBarrier();
2985+
if (m_features.texture_barrier)
2986+
{
2987+
g_perfmon.Put(GSPerfMon::Barriers, 1);
2988+
glTextureBarrier();
2989+
}
2990+
else
2991+
{
2992+
// Optimization: For alpha second pass we can reuse the copy snapshot from the first pass.
2993+
CopyAndBind(ProcessCopyArea(rtsize, config.drawarea));
2994+
}
29912995
}
29922996

29932997
DrawIndexedPrimitive();

pcsx2/GS/Renderers/OpenGL/GSDeviceOGL.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -340,7 +340,7 @@ class GSDeviceOGL final : public GSDevice
340340
void DoMultiStretchRects(const MultiStretchRect* rects, u32 num_rects, const GSVector2& ds);
341341

342342
void RenderHW(GSHWDrawConfig& config) override;
343-
void SendHWDraw(const GSHWDrawConfig& config, bool one_barrier, bool full_barrier);
343+
void SendHWDraw(const GSHWDrawConfig& config, GSTexture* draw_rt_clone, GSTexture* draw_rt, bool one_barrier, bool full_barrier);
344344

345345
void SetupDATE(GSTexture* rt, GSTexture* ds, SetDATM datm, const GSVector4i& bbox);
346346

0 commit comments

Comments
 (0)