Skip to content

Commit 27925a4

Browse files
committed
do vertex buffer restride in a void vertex function
1 parent 5e9537c commit 27925a4

File tree

5 files changed

+96
-23
lines changed

5 files changed

+96
-23
lines changed

src/Cafe/HW/Latte/Renderer/Metal/MetalMemoryManager.cpp

Lines changed: 47 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,10 @@
11
#include "Cafe/HW/Latte/Renderer/Metal/MetalCommon.h"
22
#include "Cafe/HW/Latte/Renderer/Metal/MetalMemoryManager.h"
33
#include "Cafe/HW/Latte/Renderer/Metal/MetalRenderer.h"
4-
#include "Metal/MTLResource.hpp"
4+
#include "Cafe/HW/Latte/Renderer/Metal/MetalHybridComputePipeline.h"
5+
#include "Common/precompiled.h"
6+
#include "Foundation/NSRange.hpp"
7+
#include "Metal/MTLRenderCommandEncoder.hpp"
58

69
const size_t BUFFER_ALLOCATION_SIZE = 8 * 1024 * 1024;
710

@@ -93,21 +96,51 @@ MetalRestridedBufferRange MetalVertexBufferCache::RestrideBufferIfNeeded(MTL::Bu
9396

9497
if (restrideInfo.memoryInvalidated || stride != restrideInfo.lastStride)
9598
{
96-
// TODO: use compute/void vertex function instead
9799
size_t newStride = Align(stride, 4);
98100
size_t newSize = vertexBufferRange.size / stride * newStride;
99-
// TODO: use one big buffer for all restrided buffers
100-
restrideInfo.buffer = m_mtlr->GetDevice()->newBuffer(newSize, MTL::StorageModeShared);
101-
102-
uint8* oldPtr = (uint8*)bufferCache->contents() + vertexBufferRange.offset;
103-
uint8* newPtr = (uint8*)restrideInfo.buffer->contents();
104-
105-
for (size_t elem = 0; elem < vertexBufferRange.size / stride; elem++)
106-
{
107-
memcpy(newPtr + elem * newStride, oldPtr + elem * stride, stride);
108-
}
109-
// TODO: remove
110-
debug_printf("Restrided vertex buffer (old stride: %zu, new stride: %zu, old size: %zu, new size: %zu)\n", stride, newStride, vertexBufferRange.size, newSize);
101+
if (!restrideInfo.buffer || newSize != restrideInfo.buffer->length())
102+
{
103+
if (restrideInfo.buffer)
104+
restrideInfo.buffer->release();
105+
// TODO: use one big buffer for all restrided buffers
106+
restrideInfo.buffer = m_mtlr->GetDevice()->newBuffer(newSize, MTL::StorageModeShared);
107+
}
108+
109+
//uint8* oldPtr = (uint8*)bufferCache->contents() + vertexBufferRange.offset;
110+
//uint8* newPtr = (uint8*)restrideInfo.buffer->contents();
111+
112+
//for (size_t elem = 0; elem < vertexBufferRange.size / stride; elem++)
113+
//{
114+
// memcpy(newPtr + elem * newStride, oldPtr + elem * stride, stride);
115+
//}
116+
//debug_printf("Restrided vertex buffer (old stride: %zu, new stride: %zu, old size: %zu, new size: %zu)\n", stride, newStride, vertexBufferRange.size, newSize);
117+
118+
if (m_mtlr->GetEncoderType() == MetalEncoderType::Render)
119+
{
120+
auto renderCommandEncoder = static_cast<MTL::RenderCommandEncoder*>(m_mtlr->GetCommandEncoder());
121+
122+
renderCommandEncoder->setRenderPipelineState(m_restrideBufferPipeline->GetRenderPipelineState());
123+
MTL::Buffer* buffers[] = {bufferCache, restrideInfo.buffer};
124+
size_t offsets[] = {vertexBufferRange.offset, 0};
125+
renderCommandEncoder->setVertexBuffers(buffers, offsets, NS::Range(0, 2));
126+
127+
struct
128+
{
129+
uint32 oldStride;
130+
uint32 newStride;
131+
} strideData = {static_cast<uint32>(stride), static_cast<uint32>(newStride)};
132+
renderCommandEncoder->setVertexBytes(&strideData, sizeof(strideData), 2);
133+
134+
renderCommandEncoder->drawPrimitives(MTL::PrimitiveTypePoint, NS::UInteger(0), vertexBufferRange.size / stride);
135+
136+
MTL::Resource* barrierBuffers[] = {restrideInfo.buffer};
137+
renderCommandEncoder->memoryBarrier(barrierBuffers, 1, MTL::RenderStageVertex, MTL::RenderStageVertex);
138+
}
139+
else
140+
{
141+
debug_printf("vertex buffer restride needs an active render encoder\n");
142+
cemu_assert_suspicious();
143+
}
111144

112145
restrideInfo.memoryInvalidated = false;
113146
restrideInfo.lastStride = newStride;

src/Cafe/HW/Latte/Renderer/Metal/MetalMemoryManager.h

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -80,7 +80,11 @@ class MetalVertexBufferCache
8080
MetalVertexBufferCache(class MetalRenderer* metalRenderer) : m_mtlr{metalRenderer} {}
8181
~MetalVertexBufferCache();
8282

83-
// Vertex buffer cache
83+
void SetRestrideBufferPipeline(class MetalHybridComputePipeline* restrideBufferPipeline)
84+
{
85+
m_restrideBufferPipeline = restrideBufferPipeline;
86+
}
87+
8488
void TrackVertexBuffer(uint32 bufferIndex, size_t offset, size_t size, MetalRestrideInfo* restrideInfo)
8589
{
8690
m_bufferRanges[bufferIndex] = MetalVertexBufferRange{offset, size, restrideInfo};
@@ -101,6 +105,8 @@ class MetalVertexBufferCache
101105
private:
102106
class MetalRenderer* m_mtlr;
103107

108+
class MetalHybridComputePipeline* m_restrideBufferPipeline = nullptr;
109+
104110
MetalVertexBufferRange m_bufferRanges[LATTE_MAX_VERTEX_BUFFERS] = {};
105111

106112
void MemoryRangeChanged(size_t offset, size_t size);
@@ -112,6 +118,12 @@ class MetalMemoryManager
112118
MetalMemoryManager(class MetalRenderer* metalRenderer) : m_mtlr{metalRenderer}, m_bufferAllocator(metalRenderer), m_vertexBufferCache(metalRenderer) {}
113119
~MetalMemoryManager();
114120

121+
// Pipelines
122+
void SetRestrideBufferPipeline(class MetalHybridComputePipeline* restrideBufferPipeline)
123+
{
124+
m_vertexBufferCache.SetRestrideBufferPipeline(restrideBufferPipeline);
125+
}
126+
115127
void ResetTemporaryBuffers()
116128
{
117129
m_bufferAllocator/*s[m_bufferAllocatorIndex]*/.ResetTemporaryBuffers();

src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.cpp

Lines changed: 11 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -62,8 +62,9 @@ MetalRenderer::MetalRenderer()
6262
MTL::Library* utilityLibrary = m_device->newLibrary(NS::String::string(utilityShaderSource, NS::ASCIIStringEncoding), nullptr, &error);
6363
if (error)
6464
{
65-
debug_printf("failed to create present library (error: %s)\n", error->localizedDescription()->utf8String());
65+
debug_printf("failed to create utility library (error: %s)\n", error->localizedDescription()->utf8String());
6666
error->release();
67+
throw;
6768
return;
6869
}
6970

@@ -98,12 +99,16 @@ MetalRenderer::MetalRenderer()
9899

99100
// Hybrid pipelines
100101
m_copyTextureToTexturePipeline = new MetalHybridComputePipeline(this, utilityLibrary, "vertexCopyTextureToTexture", "kernelCopyTextureToTexture");
102+
m_restrideBufferPipeline = new MetalHybridComputePipeline(this, utilityLibrary, "vertexRestrideBuffer", "kernelRestrideBuffer");
101103
utilityLibrary->release();
104+
105+
m_memoryManager->SetRestrideBufferPipeline(m_restrideBufferPipeline);
102106
}
103107

104108
MetalRenderer::~MetalRenderer()
105109
{
106110
delete m_copyTextureToTexturePipeline;
111+
delete m_restrideBufferPipeline;
107112

108113
m_presentPipelineLinear->release();
109114
m_presentPipelineSRGB->release();
@@ -688,11 +693,6 @@ void MetalRenderer::draw_execute(uint32 baseVertex, uint32 baseInstance, uint32
688693
}
689694
const auto fetchShader = LatteSHRC_GetActiveFetchShader();
690695

691-
// Render pipeline state
692-
// TODO: use `m_lastUsedFBO` instead of `m_activeFBO`
693-
MTL::RenderPipelineState* renderPipelineState = m_pipelineCache->GetPipelineState(fetchShader, vertexShader, pixelShader, m_state.m_activeFBO, LatteGPUState.contextNew);
694-
renderCommandEncoder->setRenderPipelineState(renderPipelineState);
695-
696696
// Depth stencil state
697697
MTL::DepthStencilState* depthStencilState = m_depthStencilCache->GetDepthStencilState(LatteGPUState.contextNew);
698698
renderCommandEncoder->setDepthStencilState(depthStencilState);
@@ -794,6 +794,10 @@ void MetalRenderer::draw_execute(uint32 baseVertex, uint32 baseInstance, uint32
794794
}
795795
}
796796

797+
// Render pipeline state
798+
MTL::RenderPipelineState* renderPipelineState = m_pipelineCache->GetPipelineState(fetchShader, vertexShader, pixelShader, m_state.m_activeFBO, LatteGPUState.contextNew);
799+
renderCommandEncoder->setRenderPipelineState(renderPipelineState);
800+
797801
// Uniform buffers, textures and samplers
798802
BindStageResources(renderCommandEncoder, vertexShader);
799803
BindStageResources(renderCommandEncoder, pixelShader);
@@ -1308,7 +1312,7 @@ void MetalRenderer::BindStageResources(MTL::RenderCommandEncoder* renderCommandE
13081312
{
13091313
LatteMRT::GetCurrentFragCoordScale(GET_UNIFORM_DATA_PTR(shader->uniform.loc_fragCoordScale));
13101314
}
1311-
// TODO: uncomment?
1315+
// TODO: uncomment
13121316
/*
13131317
if (shader->uniform.loc_verticesPerInstance >= 0)
13141318
{

src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.h

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
#include "Cafe/HW/Latte/Renderer/Metal/MetalMemoryManager.h"
1010
#include "Common/precompiled.h"
1111
#include "Metal/MTLCommandBuffer.hpp"
12+
#include "Metal/MTLCommandEncoder.hpp"
1213
#include "Metal/MTLRenderPass.hpp"
1314

1415
#define MAX_MTL_BUFFERS 31
@@ -244,6 +245,16 @@ class MetalRenderer : public Renderer
244245
return m_commandBuffers[m_commandBuffers.size() - 1].m_commandBuffer;
245246
}
246247

248+
MTL::CommandEncoder* GetCommandEncoder()
249+
{
250+
return m_commandEncoder;
251+
}
252+
253+
MetalEncoderType GetEncoderType()
254+
{
255+
return m_encoderType;
256+
}
257+
247258
MTL::CommandBuffer* GetCommandBuffer();
248259
bool CommandBufferCompleted(MTL::CommandBuffer* commandBuffer);
249260
void WaitForCommandBufferCompletion(MTL::CommandBuffer* commandBuffer);
@@ -284,6 +295,7 @@ class MetalRenderer : public Renderer
284295

285296
// Hybrid pipelines
286297
class MetalHybridComputePipeline* m_copyTextureToTexturePipeline;
298+
class MetalHybridComputePipeline* m_restrideBufferPipeline;
287299

288300
// Basic
289301
MTL::SamplerState* m_nearestSampler;

src/Cafe/HW/Latte/Renderer/Metal/UtilityShaderSource.h

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,8 +30,20 @@ inline const char* utilityShaderSource = \
3030
" uint dstSlice;\n" \
3131
"};\n" \
3232
"\n" \
33-
"vertex void vertexCopyTextureToTexture(uint vid [[vertex_id]], texture2d_array<float, access::read> src [[texture(0)]], texture2d_array<float, access::write> dst [[texture(1)]], constant CopyParams& params) {\n" \
33+
"vertex void vertexCopyTextureToTexture(uint vid [[vertex_id]], texture2d_array<float, access::read> src [[texture(0)]], texture2d_array<float, access::write> dst [[texture(1)]], constant CopyParams& params [[buffer(0)]]) {\n" \
3434
" uint2 coord = uint2(vid % params.width, vid / params.width);\n" \
3535
" return dst.write(float4(src.read(coord, params.srcSlice, params.srcMip).r, 0.0, 0.0, 0.0), coord, params.dstSlice, params.dstMip);\n" \
3636
"}\n" \
37+
"\n" \
38+
"struct RestrideParams {\n" \
39+
" uint oldStride;\n" \
40+
" uint newStride;\n" \
41+
"};\n" \
42+
"\n" \
43+
/* TODO: use uint32? Since that would require less iterations */ \
44+
"vertex void vertexRestrideBuffer(uint vid [[vertex_id]], device uint8_t* src [[buffer(0)]], device uint8_t* dst [[buffer(1)]], constant RestrideParams& params [[buffer(2)]]) {\n" \
45+
" for (uint32_t i = 0; i < params.oldStride; i++) {\n" \
46+
" dst[vid * params.newStride + i] = src[vid * params.oldStride + i];\n" \
47+
" }\n" \
48+
"}\n" \
3749
"\n";

0 commit comments

Comments
 (0)