Skip to content

Commit a35c9f3

Browse files
Handle mixed samples attachments (V2) (shadps4-emu#3667)
* video_core: Refactor render target bind to allow disabling MSAA * video_core: Implement swapping of backing samples * clang format * video_core: Better implementation Instead of downgrading to 1 sample, always try to match depth samples. This avoids needing to copy depth-stencil attachment and copying multisampled stencil is not possible on some vendors * video_core: Small bugfixes * image: Add null check * vk_rasterizer: Swap backing samples on resolve dst * vk_presenter: Reset backing samples before present * video_core: Small refactor to make this implementation better * reinterpret: Fix channel check for degamma Seems this was simpler than I thought, hardware doesn't apply degamma on the W channel regardless of swizzle * image: Add missing end rendering call * blit_helper: Fix bug in old reinterpret path * blit_helper: Remove unused layer vertex Should be used in the future if copying many layers is needed * vk_rasterizer: Apply suggestion * vk_rasterizer: More bind refactor * vk_instance: Re-enable extensions
1 parent cad0278 commit a35c9f3

32 files changed

+1151
-832
lines changed

src/core/libraries/videoout/driver.cpp

Lines changed: 5 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -233,29 +233,23 @@ bool VideoOutDriver::SubmitFlip(VideoOutPort* port, s32 index, s64 flip_arg,
233233
}
234234

235235
if (!is_eop) {
236-
// Before processing the flip we need to ask GPU thread to flush command list as at this
237-
// point VO surface is ready to be presented, and we will need have an actual state of
238-
// Vulkan image at the time of frame presentation.
239-
liverpool->SendCommand([=, this]() {
240-
presenter->FlushDraw();
241-
SubmitFlipInternal(port, index, flip_arg, is_eop);
242-
});
236+
// Non EOP flips can arrive from any thread so ask GPU thread to perform them
237+
liverpool->SendCommand([=, this]() { SubmitFlipInternal(port, index, flip_arg, is_eop); });
243238
} else {
244239
SubmitFlipInternal(port, index, flip_arg, is_eop);
245240
}
246241

247242
return true;
248243
}
249244

250-
void VideoOutDriver::SubmitFlipInternal(VideoOutPort* port, s32 index, s64 flip_arg,
251-
bool is_eop /*= false*/) {
245+
void VideoOutDriver::SubmitFlipInternal(VideoOutPort* port, s32 index, s64 flip_arg, bool is_eop) {
252246
Vulkan::Frame* frame;
253247
if (index == -1) {
254-
frame = presenter->PrepareBlankFrame(is_eop);
248+
frame = presenter->PrepareBlankFrame(false);
255249
} else {
256250
const auto& buffer = port->buffer_slots[index];
257251
const auto& group = port->groups[buffer.group_index];
258-
frame = presenter->PrepareFrame(group, buffer.address_left, is_eop);
252+
frame = presenter->PrepareFrame(group, buffer.address_left);
259253
}
260254

261255
std::scoped_lock lock{mutex};

src/shader_recompiler/backend/spirv/emit_spirv.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -301,7 +301,8 @@ void SetupCapabilities(const Info& info, const Profile& profile, const RuntimeIn
301301
ctx.AddExtension("SPV_KHR_fragment_shader_barycentric");
302302
ctx.AddCapability(spv::Capability::FragmentBarycentricKHR);
303303
}
304-
if (runtime_info.fs_info.addr_flags.linear_sample_ena ||
304+
if (info.loads.Get(IR::Attribute::SampleIndex) ||
305+
runtime_info.fs_info.addr_flags.linear_sample_ena ||
305306
runtime_info.fs_info.addr_flags.persp_sample_ena) {
306307
ctx.AddCapability(spv::Capability::SampleRateShading);
307308
}

src/shader_recompiler/frontend/fetch_shader.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33

44
#pragma once
55

6+
#include <optional>
67
#include <vector>
78
#include "common/types.h"
89
#include "shader_recompiler/info.h"

src/shader_recompiler/ir/passes/resource_tracking_pass.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1022,7 +1022,7 @@ void PatchImageSampleArgs(IR::Block& block, IR::Inst& inst, Info& info,
10221022

10231023
auto converted = ApplyReadNumberConversionVec4(ir, texel, image.GetNumberConversion());
10241024
if (sampler.force_degamma && image.GetNumberFmt() != AmdGpu::NumberFormat::Srgb) {
1025-
converted = ApplyForceDegamma(ir, texel, image.DstSelect());
1025+
converted = ApplyForceDegamma(ir, texel);
10261026
}
10271027
inst.ReplaceUsesWith(converted);
10281028
}

src/shader_recompiler/ir/reinterpret.h

Lines changed: 4 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -29,25 +29,15 @@ inline F32 ApplyGammaToLinear(IREmitter& ir, const F32& c) {
2929
return IR::F32{ir.Select(ir.FPGreaterThan(c, ir.Imm32(0.04045f)), a, b)};
3030
}
3131

32-
inline Value ApplyForceDegamma(IREmitter& ir, const Value& value,
33-
const AmdGpu::CompMapping& mapping) {
32+
inline Value ApplyForceDegamma(IREmitter& ir, const Value& value) {
3433
auto x = F32{ir.CompositeExtract(value, 0)};
3534
auto y = F32{ir.CompositeExtract(value, 1)};
3635
auto z = F32{ir.CompositeExtract(value, 2)};
3736
auto w = F32{ir.CompositeExtract(value, 3)};
3837
// Gamma correction is only applied to RGB components
39-
if (AmdGpu::IsRgb(mapping.r)) {
40-
x = ApplyGammaToLinear(ir, x);
41-
}
42-
if (AmdGpu::IsRgb(mapping.g)) {
43-
y = ApplyGammaToLinear(ir, y);
44-
}
45-
if (AmdGpu::IsRgb(mapping.b)) {
46-
z = ApplyGammaToLinear(ir, z);
47-
}
48-
if (AmdGpu::IsRgb(mapping.a)) {
49-
w = ApplyGammaToLinear(ir, w);
50-
}
38+
x = ApplyGammaToLinear(ir, x);
39+
y = ApplyGammaToLinear(ir, y);
40+
z = ApplyGammaToLinear(ir, z);
5141
return ir.CompositeConstruct(x, y, z, w);
5242
}
5343

src/video_core/amdgpu/liverpool.h

Lines changed: 0 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -1485,26 +1485,6 @@ struct Liverpool {
14851485
return nullptr;
14861486
}
14871487

1488-
u32 NumSamples() const {
1489-
// It seems that the number of samples > 1 set in the AA config doesn't mean we're
1490-
// always rendering with MSAA, so we need to derive MS ratio from the CB and DB
1491-
// settings.
1492-
u32 num_samples = 1u;
1493-
if (color_control.mode != ColorControl::OperationMode::Disable) {
1494-
for (auto cb = 0u; cb < NumColorBuffers; ++cb) {
1495-
const auto& col_buf = color_buffers[cb];
1496-
if (!col_buf) {
1497-
continue;
1498-
}
1499-
num_samples = std::max(num_samples, col_buf.NumSamples());
1500-
}
1501-
}
1502-
if (depth_buffer.DepthValid() || depth_buffer.StencilValid()) {
1503-
num_samples = std::max(num_samples, depth_buffer.NumSamples());
1504-
}
1505-
return num_samples;
1506-
}
1507-
15081488
bool IsClipDisabled() const {
15091489
return clipper_control.clip_disable || primitive_type == PrimitiveType::RectList;
15101490
}

src/video_core/buffer_cache/buffer_cache.cpp

Lines changed: 1 addition & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -984,44 +984,8 @@ bool BufferCache::SynchronizeBufferFromImage(Buffer& buffer, VAddr device_addr,
984984
if (copy_size == 0) {
985985
return false;
986986
}
987-
scheduler.EndRendering();
988-
const vk::BufferMemoryBarrier2 pre_barrier = {
989-
.srcStageMask = vk::PipelineStageFlagBits2::eAllCommands,
990-
.srcAccessMask = vk::AccessFlagBits2::eMemoryRead,
991-
.dstStageMask = vk::PipelineStageFlagBits2::eTransfer,
992-
.dstAccessMask = vk::AccessFlagBits2::eTransferWrite,
993-
.buffer = buffer.Handle(),
994-
.offset = buf_offset,
995-
.size = copy_size,
996-
};
997-
const vk::BufferMemoryBarrier2 post_barrier = {
998-
.srcStageMask = vk::PipelineStageFlagBits2::eTransfer,
999-
.srcAccessMask = vk::AccessFlagBits2::eTransferWrite,
1000-
.dstStageMask = vk::PipelineStageFlagBits2::eAllCommands,
1001-
.dstAccessMask = vk::AccessFlagBits2::eMemoryRead,
1002-
.buffer = buffer.Handle(),
1003-
.offset = buf_offset,
1004-
.size = copy_size,
1005-
};
1006-
auto barriers =
1007-
image.GetBarriers(vk::ImageLayout::eTransferSrcOptimal, vk::AccessFlagBits2::eTransferRead,
1008-
vk::PipelineStageFlagBits2::eTransfer, {});
1009-
auto cmdbuf = scheduler.CommandBuffer();
1010-
cmdbuf.pipelineBarrier2(vk::DependencyInfo{
1011-
.dependencyFlags = vk::DependencyFlagBits::eByRegion,
1012-
.bufferMemoryBarrierCount = 1,
1013-
.pBufferMemoryBarriers = &pre_barrier,
1014-
.imageMemoryBarrierCount = static_cast<u32>(barriers.size()),
1015-
.pImageMemoryBarriers = barriers.data(),
1016-
});
1017987
auto& tile_manager = texture_cache.GetTileManager();
1018-
tile_manager.TileImage(image.image, buffer_copies, buffer.Handle(), buf_offset, image.info);
1019-
cmdbuf = scheduler.CommandBuffer();
1020-
cmdbuf.pipelineBarrier2(vk::DependencyInfo{
1021-
.dependencyFlags = vk::DependencyFlagBits::eByRegion,
1022-
.bufferMemoryBarrierCount = 1,
1023-
.pBufferMemoryBarriers = &post_barrier,
1024-
});
988+
tile_manager.TileImage(image, buffer_copies, buffer.Handle(), buf_offset, copy_size);
1025989
return true;
1026990
}
1027991

src/video_core/host_shaders/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@ set(SHADER_FILES
1212
detilers/micro_64bpp.comp
1313
detilers/micro_8bpp.comp
1414
color_to_ms_depth.frag
15+
ms_image_blit.frag
1516
fault_buffer_process.comp
1617
fs_tri.vert
1718
fsr.comp

src/video_core/host_shaders/fs_tri.vert

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,10 @@
33

44
#version 450
55

6+
#if defined(INSTANCE_AS_LAYER)
7+
#extension GL_ARB_shader_viewport_layer_array : require
8+
#endif
9+
610
layout(location = 0) out vec2 uv;
711

812
void main() {
@@ -11,5 +15,8 @@ void main() {
1115
float((gl_VertexIndex & 2u) << 1u)
1216
);
1317
gl_Position = vec4(pos - vec2(1.0, 1.0), 0.0, 1.0);
18+
#if defined(INSTANCE_AS_LAYER)
19+
gl_Layer = gl_InstanceIndex;
20+
#endif
1421
uv = pos * 0.5;
1522
}
Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
// SPDX-FileCopyrightText: Copyright 2025 shadPS4 Emulator Project
2+
// SPDX-License-Identifier: GPL-2.0-or-later
3+
4+
#version 450 core
5+
#extension GL_EXT_samplerless_texture_functions : require
6+
7+
#if defined(SRC_MSAA)
8+
layout (binding = 0, set = 0) uniform texture2DMS in_tex;
9+
#else
10+
layout (binding = 0, set = 0) uniform texture2D in_tex;
11+
#endif
12+
13+
layout (location = 0) in vec2 uv;
14+
layout (location = 0) out vec4 out_color;
15+
16+
void main()
17+
{
18+
#if defined(SRC_MSAA)
19+
out_color = texelFetch(in_tex, ivec2(gl_FragCoord.xy), gl_SampleID);
20+
#else
21+
out_color = texelFetch(in_tex, ivec2(gl_FragCoord.xy), 0);
22+
#endif
23+
}

0 commit comments

Comments
 (0)