Skip to content

Commit a3476c7

Browse files
macOS: Workaround for MoltenVK stride limitation (#534)
1 parent 79e731d commit a3476c7

File tree

6 files changed

+57
-3
lines changed

6 files changed

+57
-3
lines changed

src/Cafe/HW/Latte/Core/LatteBufferData.cpp

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
#include "Cafe/GameProfile/GameProfile.h"
1010

1111
#include "Cafe/HW/Latte/Core/LatteBufferCache.h"
12+
#include "Cafe/HW/Latte/Renderer/Vulkan/VulkanRenderer.h"
1213

1314
template<int vectorLen>
1415
void rectGenerate4thVertex(uint32be* output, uint32be* input0, uint32be* input1, uint32be* input2)
@@ -198,6 +199,19 @@ bool LatteBufferCache_Sync(uint32 minIndex, uint32 maxIndex, uint32 baseInstance
198199
if (fixedBufferSize == 0 || bufferStride == 0)
199200
fixedBufferSize += 128;
200201

202+
203+
#if BOOST_OS_MACOS
204+
if(bufferStride % 4 != 0)
205+
{
206+
if (VulkanRenderer* vkRenderer = VulkanRenderer::GetInstance())
207+
{
208+
auto fixedBuffer = vkRenderer->buffer_genStrideWorkaroundVertexBuffer(bufferAddress, fixedBufferSize, bufferStride);
209+
vkRenderer->buffer_bindVertexStrideWorkaroundBuffer(fixedBuffer.first, fixedBuffer.second, bufferIndex, fixedBufferSize);
210+
continue;
211+
}
212+
}
213+
#endif
214+
201215
uint32 bindOffset = LatteBufferCache_retrieveDataInCache(bufferAddress, fixedBufferSize);
202216
g_renderer->buffer_bindVertexBuffer(bufferIndex, bindOffset, fixedBufferSize);
203217
}

src/Cafe/HW/Latte/Renderer/Vulkan/VKRMemoryManager.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,8 @@ void VKRSynchronizedRingAllocator::allocateAdditionalUploadBuffer(uint32 sizeReq
2727
m_vkrMemMgr->CreateBuffer(bufferAllocSize, VK_BUFFER_USAGE_TRANSFER_SRC_BIT, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT, newBuffer.vk_buffer, newBuffer.vk_mem);
2828
else if (m_bufferType == BUFFER_TYPE::INDEX)
2929
m_vkrMemMgr->CreateBuffer(bufferAllocSize, VK_BUFFER_USAGE_INDEX_BUFFER_BIT, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT, newBuffer.vk_buffer, newBuffer.vk_mem);
30+
else if (m_bufferType == BUFFER_TYPE::STRIDE)
31+
m_vkrMemMgr->CreateBuffer(bufferAllocSize, VK_BUFFER_USAGE_VERTEX_BUFFER_BIT, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT, newBuffer.vk_buffer, newBuffer.vk_mem);
3032
else
3133
cemu_assert_debug(false);
3234

src/Cafe/HW/Latte/Renderer/Vulkan/VKRMemoryManager.h

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -75,6 +75,7 @@ class VKRSynchronizedRingAllocator
7575
{
7676
STAGING, // staging upload buffer
7777
INDEX, // buffer for index data
78+
STRIDE, // buffer for stride-adjusted vertex data
7879
};
7980

8081
VKRSynchronizedRingAllocator(class VulkanRenderer* vkRenderer, class VKRMemoryManager* vkMemoryManager, BUFFER_TYPE bufferType, uint32 minimumBufferAllocSize) : m_vkr(vkRenderer), m_vkrMemMgr(vkMemoryManager), m_bufferType(bufferType), m_minimumBufferAllocSize(minimumBufferAllocSize) {};
@@ -138,7 +139,10 @@ class VKRMemoryManager
138139
{
139140
friend class VKRSynchronizedRingAllocator;
140141
public:
141-
VKRMemoryManager(class VulkanRenderer* renderer) : m_stagingBuffer(renderer, this, VKRSynchronizedRingAllocator::BUFFER_TYPE::STAGING, 32u * 1024 * 1024), m_indexBuffer(renderer, this, VKRSynchronizedRingAllocator::BUFFER_TYPE::INDEX, 4u * 1024 * 1024)
142+
VKRMemoryManager(class VulkanRenderer* renderer) :
143+
m_stagingBuffer(renderer, this, VKRSynchronizedRingAllocator::BUFFER_TYPE::STAGING, 32u * 1024 * 1024),
144+
m_indexBuffer(renderer, this, VKRSynchronizedRingAllocator::BUFFER_TYPE::INDEX, 4u * 1024 * 1024),
145+
m_vertexStrideMetalBuffer(renderer, this, VKRSynchronizedRingAllocator::BUFFER_TYPE::STRIDE, 4u * 1024 * 1024)
142146
{
143147
m_vkr = renderer;
144148
}
@@ -164,12 +168,14 @@ class VKRMemoryManager
164168

165169
VKRSynchronizedRingAllocator& getStagingAllocator() { return m_stagingBuffer; }; // allocator for texture/attribute/uniform uploads
166170
VKRSynchronizedRingAllocator& getIndexAllocator() { return m_indexBuffer; }; // allocator for index data
171+
VKRSynchronizedRingAllocator& getMetalStrideWorkaroundAllocator() { return m_vertexStrideMetalBuffer; }; // allocator for stride-adjusted vertex data
167172

168173
void cleanupBuffers(uint64 latestFinishedCommandBufferId)
169174
{
170175
LatteIndices_invalidateAll();
171176
m_stagingBuffer.CleanupBuffer(latestFinishedCommandBufferId);
172177
m_indexBuffer.CleanupBuffer(latestFinishedCommandBufferId);
178+
m_vertexStrideMetalBuffer.CleanupBuffer(latestFinishedCommandBufferId);
173179
}
174180

175181
// memory helpers
@@ -197,4 +203,5 @@ class VKRMemoryManager
197203
class VulkanRenderer* m_vkr;
198204
VKRSynchronizedRingAllocator m_stagingBuffer;
199205
VKRSynchronizedRingAllocator m_indexBuffer;
206+
VKRSynchronizedRingAllocator m_vertexStrideMetalBuffer;
200207
};

src/Cafe/HW/Latte/Renderer/Vulkan/VulkanPipelineCompiler.cpp

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -459,8 +459,7 @@ void PipelineCompiler::InitVertexInputState(const LatteContextRegister& latteReg
459459
VkVertexInputBindingDescription entry{};
460460
#if BOOST_OS_MACOS
461461
if (bufferStride % 4 != 0) {
462-
forceLog_printf("MoltenVK error: vertex stride was %d, expected multiple of 4", bufferStride);
463-
bufferStride = 0;
462+
bufferStride = bufferStride + (4-(bufferStride % 4));
464463
}
465464
#endif
466465
entry.stride = bufferStride;

src/Cafe/HW/Latte/Renderer/Vulkan/VulkanRenderer.cpp

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3458,6 +3458,36 @@ void VulkanRenderer::buffer_bindVertexBuffer(uint32 bufferIndex, uint32 offset,
34583458
vkCmdBindVertexBuffers(m_state.currentCommandBuffer, bufferIndex, 1, &attrBuffer, &attrOffset);
34593459
}
34603460

3461+
void VulkanRenderer::buffer_bindVertexStrideWorkaroundBuffer(VkBuffer fixedBuffer, uint32 offset, uint32 bufferIndex, uint32 size)
3462+
{
3463+
cemu_assert_debug(bufferIndex < LATTE_MAX_VERTEX_BUFFERS);
3464+
m_state.currentVertexBinding[bufferIndex].offset = 0xFFFFFFFF;
3465+
VkBuffer attrBuffer = fixedBuffer;
3466+
VkDeviceSize attrOffset = offset;
3467+
vkCmdBindVertexBuffers(m_state.currentCommandBuffer, bufferIndex, 1, &attrBuffer, &attrOffset);
3468+
}
3469+
3470+
std::pair<VkBuffer, uint32> VulkanRenderer::buffer_genStrideWorkaroundVertexBuffer(MPTR buffer, uint32 size, uint32 oldStride)
3471+
{
3472+
cemu_assert_debug(oldStride % 4 != 0);
3473+
3474+
std::span<uint8> old_buffer{memory_getPointerFromPhysicalOffset(buffer), size};
3475+
3476+
//new stride is the nearest multiple of 4
3477+
uint32 newStride = oldStride + (4-(oldStride % 4));
3478+
uint32 newSize = size / oldStride * newStride;
3479+
3480+
auto new_buffer_alloc = memoryManager->getMetalStrideWorkaroundAllocator().AllocateBufferMemory(newSize, 128);
3481+
3482+
std::span<uint8> new_buffer{new_buffer_alloc.memPtr, new_buffer_alloc.size};
3483+
3484+
for(size_t elem = 0; elem < size / oldStride; elem++)
3485+
{
3486+
memcpy(&new_buffer[elem * newStride], &old_buffer[elem * oldStride], oldStride);
3487+
}
3488+
return {new_buffer_alloc.vkBuffer, new_buffer_alloc.bufferOffset};
3489+
}
3490+
34613491
void VulkanRenderer::buffer_bindUniformBuffer(LatteConst::ShaderType shaderType, uint32 bufferIndex, uint32 offset, uint32 size)
34623492
{
34633493
cemu_assert_debug(!m_useHostMemoryForCache);

src/Cafe/HW/Latte/Renderer/Vulkan/VulkanRenderer.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -342,6 +342,8 @@ class VulkanRenderer : public Renderer
342342
void bufferCache_copy(uint32 srcOffset, uint32 dstOffset, uint32 size) override;
343343

344344
void buffer_bindVertexBuffer(uint32 bufferIndex, uint32 buffer, uint32 size) override;
345+
void buffer_bindVertexStrideWorkaroundBuffer(VkBuffer fixedBuffer, uint32 offset, uint32 bufferIndex, uint32 size);
346+
std::pair<VkBuffer, uint32> buffer_genStrideWorkaroundVertexBuffer(MPTR buffer, uint32 size, uint32 oldStride);
345347
void buffer_bindUniformBuffer(LatteConst::ShaderType shaderType, uint32 bufferIndex, uint32 offset, uint32 size) override;
346348

347349
RendererShader* shader_create(RendererShader::ShaderType type, uint64 baseHash, uint64 auxHash, const std::string& source, bool isGameShader, bool isGfxPackShader) override;

0 commit comments

Comments
 (0)