Skip to content

Commit 2858d2d

Browse files
committed
GS/VK/GL/DX12/DX11: Use default buffer instead of upload buffer for accurate prims data.
Should hopefully give better performance. Also refactor some upload/staging buffer handling in VK/DX12.
1 parent 86f47c7 commit 2858d2d

17 files changed

+370
-208
lines changed

pcsx2/GS/Renderers/DX11/GSDevice11.cpp

Lines changed: 22 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -396,8 +396,12 @@ bool GSDevice11::Create(GSVSyncMode vsync_mode, bool allow_present_throttle)
396396
}
397397
}
398398

399+
bd = {};
400+
399401
if (m_features.accurate_prims)
400402
{
403+
bd.Usage = D3D11_USAGE_DEFAULT;
404+
bd.CPUAccessFlags = 0;
401405
bd.ByteWidth = ACCURATE_PRIMS_BUFFER_SIZE;
402406
bd.BindFlags = D3D11_BIND_SHADER_RESOURCE;
403407
bd.StructureByteStride = sizeof(AccuratePrimsEdgeData);
@@ -410,16 +414,19 @@ bool GSDevice11::Create(GSVSyncMode vsync_mode, bool allow_present_throttle)
410414
}
411415

412416
const CD3D11_SHADER_RESOURCE_VIEW_DESC accurate_prims_b_srv_desc(
413-
D3D11_SRV_DIMENSION_BUFFER, DXGI_FORMAT_UNKNOWN, 0, ACCURATE_PRIMS_BUFFER_SIZE / sizeof(AccuratePrimsEdgeData));
414-
if (FAILED(m_dev->CreateShaderResourceView(m_accurate_prims_b.get(), &accurate_prims_b_srv_desc, m_accurate_prims_b_srv.put())))
417+
D3D11_SRV_DIMENSION_BUFFER, DXGI_FORMAT_UNKNOWN, 0,
418+
ACCURATE_PRIMS_BUFFER_SIZE / sizeof(AccuratePrimsEdgeData));
419+
420+
if (FAILED(m_dev->CreateShaderResourceView(m_accurate_prims_b.get(), &accurate_prims_b_srv_desc,
421+
m_accurate_prims_b_srv.put())))
415422
{
416423
Console.Error("D3D11: Failed to create accurate prims buffer SRV.");
417424
return false;
418425
}
419426

420427
// If MAX_TEXTURES changes, please change the register for this buffer in the shader.
421428
static_assert(MAX_TEXTURES == 5);
422-
m_ctx->PSSetShaderResources(MAX_TEXTURES, 1, m_accurate_prims_b_srv.addressof());
429+
m_ctx->PSSetShaderResources(5, 1, m_accurate_prims_b_srv.addressof());
423430
}
424431

425432
// rasterizer
@@ -2326,29 +2333,18 @@ bool GSDevice11::SetupAccuratePrims(GSHWDrawConfig& config)
23262333
if (size > ACCURATE_PRIMS_BUFFER_SIZE)
23272334
return false;
23282335

2329-
D3D11_MAP type = D3D11_MAP_WRITE_NO_OVERWRITE;
2330-
2331-
pxAssert(m_accurate_prims_b_pos % sizeof(AccuratePrimsEdgeData) == 0);
2332-
2333-
if (m_accurate_prims_b_pos + size > ACCURATE_PRIMS_BUFFER_SIZE)
2334-
{
2335-
m_accurate_prims_b_pos = 0;
2336-
type = D3D11_MAP_WRITE_DISCARD;
2337-
}
2338-
2339-
D3D11_MAPPED_SUBRESOURCE m;
2340-
if (FAILED(m_ctx->Map(m_accurate_prims_b.get(), 0, type, 0, &m)))
2341-
return false;
2342-
2343-
void* map = static_cast<u8*>(m.pData) + m_accurate_prims_b_pos;
2344-
2345-
GSVector4i::storent(map, config.accurate_prims_edge_data->data(), size);
2346-
2347-
m_ctx->Unmap(m_accurate_prims_b.get(), 0);
2348-
2349-
config.cb_ps.accurate_prims_base_index.x = m_accurate_prims_b_pos / sizeof(AccuratePrimsEdgeData);
2350-
2351-
m_accurate_prims_b_pos += size;
2336+
// Performance note: UpdateSubresource() copies data to a temp staging buffer to avoid stalling the GPU,
2337+
// so a manual ring buffer is not needed here like VK/DX12.
2338+
D3D11_BOX dst_region{};
2339+
dst_region.left = 0;
2340+
dst_region.right = size;
2341+
dst_region.top = 0;
2342+
dst_region.bottom = 1;
2343+
dst_region.front = 0;
2344+
dst_region.back = 1;
2345+
m_ctx->UpdateSubresource(m_accurate_prims_b.get(), 0, &dst_region, config.accurate_prims_edge_data->data(), size, 0);
2346+
2347+
config.cb_ps.accurate_prims_base_index.x = 0; // No offsetting needed like DX12/VK since we don't use a ring buffer.
23522348
}
23532349
return true;
23542350
}

pcsx2/GS/Renderers/DX11/GSDevice11.h

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -137,7 +137,6 @@ class GSDevice11 final : public GSDevice
137137
u32 m_vb_pos = 0; // bytes
138138
u32 m_ib_pos = 0; // indices/sizeof(u32)
139139
u32 m_structured_vb_pos = 0; // bytes
140-
u32 m_accurate_prims_b_pos = 0; // bytes/sizeof(AccuratePrimsEdgeData)
141140

142141
bool m_allow_tearing_supported = false;
143142
bool m_using_flip_model_swap_chain = true;

pcsx2/GS/Renderers/DX12/D3D12StreamBuffer.cpp

Lines changed: 14 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -20,29 +20,33 @@ D3D12StreamBuffer::~D3D12StreamBuffer()
2020
Destroy();
2121
}
2222

23-
bool D3D12StreamBuffer::Create(u32 size)
23+
bool D3D12StreamBuffer::Create(u32 size, bool default_heap)
2424
{
2525
const D3D12_RESOURCE_DESC resource_desc = {D3D12_RESOURCE_DIMENSION_BUFFER, 0, size, 1, 1, 1, DXGI_FORMAT_UNKNOWN,
2626
{1, 0}, D3D12_TEXTURE_LAYOUT_ROW_MAJOR, D3D12_RESOURCE_FLAG_NONE};
2727

2828
D3D12MA::ALLOCATION_DESC allocationDesc = {};
2929
allocationDesc.Flags = D3D12MA::ALLOCATION_FLAG_COMMITTED;
30-
allocationDesc.HeapType = D3D12_HEAP_TYPE_UPLOAD;
30+
allocationDesc.HeapType = default_heap ? D3D12_HEAP_TYPE_DEFAULT : D3D12_HEAP_TYPE_UPLOAD;
3131

3232
wil::com_ptr_nothrow<ID3D12Resource> buffer;
3333
wil::com_ptr_nothrow<D3D12MA::Allocation> allocation;
3434
HRESULT hr = GSDevice12::GetInstance()->GetAllocator()->CreateResource(&allocationDesc, &resource_desc,
35-
D3D12_RESOURCE_STATE_GENERIC_READ, nullptr, allocation.put(), IID_PPV_ARGS(buffer.put()));
35+
default_heap ? D3D12_RESOURCE_STATE_COMMON : D3D12_RESOURCE_STATE_GENERIC_READ,
36+
nullptr, allocation.put(), IID_PPV_ARGS(buffer.put()));
3637
pxAssertMsg(SUCCEEDED(hr), "Allocate buffer");
3738
if (FAILED(hr))
3839
return false;
3940

4041
static const D3D12_RANGE read_range = {};
41-
u8* host_pointer;
42-
hr = buffer->Map(0, &read_range, reinterpret_cast<void**>(&host_pointer));
43-
pxAssertMsg(SUCCEEDED(hr), "Map buffer");
44-
if (FAILED(hr))
45-
return false;
42+
u8* host_pointer = nullptr;
43+
if (!default_heap)
44+
{
45+
hr = buffer->Map(0, &read_range, reinterpret_cast<void**>(&host_pointer));
46+
pxAssertMsg(SUCCEEDED(hr), "Map buffer");
47+
if (FAILED(hr))
48+
return false;
49+
}
4650

4751
Destroy(true);
4852

@@ -51,6 +55,7 @@ bool D3D12StreamBuffer::Create(u32 size)
5155
m_host_pointer = host_pointer;
5256
m_size = size;
5357
m_gpu_pointer = m_buffer->GetGPUVirtualAddress();
58+
m_default_heap = default_heap;
5459
return true;
5560
}
5661

@@ -148,6 +153,7 @@ void D3D12StreamBuffer::Destroy(bool defer)
148153
m_current_offset = 0;
149154
m_current_space = 0;
150155
m_current_gpu_position = 0;
156+
m_default_heap = false;
151157
m_tracked_fences.clear();
152158
}
153159

pcsx2/GS/Renderers/DX12/D3D12StreamBuffer.h

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ class D3D12StreamBuffer
2222
D3D12StreamBuffer();
2323
~D3D12StreamBuffer();
2424

25-
bool Create(u32 size);
25+
bool Create(u32 size, bool default_heap = false);
2626

2727
__fi bool IsValid() const { return static_cast<bool>(m_buffer); }
2828
__fi ID3D12Resource* GetBuffer() const { return m_buffer.get(); }
@@ -54,7 +54,8 @@ class D3D12StreamBuffer
5454
wil::com_ptr_nothrow<ID3D12Resource> m_buffer;
5555
wil::com_ptr_nothrow<D3D12MA::Allocation> m_allocation;
5656
D3D12_GPU_VIRTUAL_ADDRESS m_gpu_pointer = {};
57-
u8* m_host_pointer = nullptr;
57+
u8* m_host_pointer = nullptr; // Only used for upload heaps.
58+
bool m_default_heap = false; // False for upload heap; true for default heap.
5859

5960
// List of fences and the corresponding positions in the buffer
6061
std::deque<std::pair<u64, u32>> m_tracked_fences;

0 commit comments

Comments
 (0)