Skip to content

Commit 5c246d5

Browse files
committed
implement transform feedback
1 parent e0791c3 commit 5c246d5

File tree

7 files changed

+65
-54
lines changed

7 files changed

+65
-54
lines changed

src/Cafe/HW/Latte/Core/LatteShader.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
#include "Cafe/HW/Latte/Renderer/Vulkan/VulkanRenderer.h"
1010
#include "Cafe/OS/libs/gx2/GX2.h" // todo - remove dependency
1111
#include "Cafe/GraphicPack/GraphicPack2.h"
12+
#include "HW/Latte/Renderer/Renderer.h"
1213
#include "util/helpers/StringParser.h"
1314
#include "config/ActiveSettings.h"
1415
#include "Cafe/GameProfile/GameProfile.h"
@@ -688,9 +689,9 @@ void LatteShader_GetDecompilerOptions(LatteDecompilerOptions& options, LatteCons
688689
{
689690
options.usesGeometryShader = geometryShaderEnabled;
690691
options.spirvInstrinsics.hasRoundingModeRTEFloat32 = false;
692+
options.useTFViaSSBO = g_renderer->UseTFViaSSBO();
691693
if (g_renderer->GetType() == RendererAPI::Vulkan)
692694
{
693-
options.useTFViaSSBO = VulkanRenderer::GetInstance()->UseTFViaSSBO();
694695
options.spirvInstrinsics.hasRoundingModeRTEFloat32 = VulkanRenderer::GetInstance()->HasSPRIVRoundingModeRTE32();
695696
}
696697
options.strictMul = g_current_game_profile->GetAccurateShaderMul() != AccurateShaderMulOption::False;

src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompilerEmitMSL.cpp

Lines changed: 12 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -2752,9 +2752,9 @@ static void _emitTEXGetGradientsHV(LatteDecompilerShaderContext* shaderContext,
27522752

27532753
const char* funcName;
27542754
if (texInstruction->opcode == GPU7_TEX_INST_GET_GRADIENTS_H)
2755-
funcName = "dFdx";
2755+
funcName = "dfdx";
27562756
else
2757-
funcName = "dFdy";
2757+
funcName = "dfdy";
27582758

27592759
src->add(" = ");
27602760

@@ -3273,15 +3273,8 @@ static void _emitCFRingWriteCode(LatteDecompilerShaderContext* shaderContext, La
32733273
if ((cfInstruction->memWriteCompMask&(1 << i)) == 0)
32743274
continue;
32753275

3276-
if (shaderContext->options->useTFViaSSBO)
3277-
{
3278-
uint32 u32Offset = streamWrite->exportArrayBase + i;
3279-
src->addFmt("sb_buffer[sbBase{} + {}]", streamWrite->bufferIndex, u32Offset);
3280-
}
3281-
else
3282-
{
3283-
src->addFmt("sb{}[{}]", streamWrite->bufferIndex, streamWrite->exportArrayBase + i);
3284-
}
3276+
uint32 u32Offset = streamWrite->exportArrayBase + i;
3277+
src->addFmt("sb[sbBase{} + {}]", streamWrite->bufferIndex, u32Offset);
32853278

32863279
src->add(" = ");
32873280

@@ -3393,15 +3386,8 @@ static void _emitStreamWriteCode(LatteDecompilerShaderContext* shaderContext, La
33933386
if ((cfInstruction->memWriteCompMask&(1 << i)) == 0)
33943387
continue;
33953388

3396-
if (shaderContext->options->useTFViaSSBO)
3397-
{
3398-
uint32 u32Offset = cfInstruction->exportArrayBase + i;
3399-
src->addFmt("sb_buffer[sbBase{} + {}]", streamoutBufferIndex, u32Offset);
3400-
}
3401-
else
3402-
{
3403-
src->addFmt("sb{}[{}]", streamoutBufferIndex, cfInstruction->exportArrayBase + i);
3404-
}
3389+
uint32 u32Offset = cfInstruction->exportArrayBase + i;
3390+
src->addFmt("sb[sbBase{} + {}]", streamoutBufferIndex, u32Offset);
34053391

34063392
src->add(" = ");
34073393

@@ -3595,15 +3581,12 @@ void LatteDecompiler_emitClauseCodeMSL(LatteDecompilerShaderContext* shaderConte
35953581
// emit vertex
35963582
src->add("EmitVertex();" _CRLF);
35973583
// increment transform feedback pointer
3598-
if (shaderContext->analyzer.useSSBOForStreamout)
3584+
for (sint32 i = 0; i < LATTE_NUM_STREAMOUT_BUFFER; i++)
35993585
{
3600-
for (sint32 i = 0; i < LATTE_NUM_STREAMOUT_BUFFER; i++)
3601-
{
3602-
if (!shaderContext->output->streamoutBufferWriteMask[i])
3603-
continue;
3604-
cemu_assert_debug((shaderContext->output->streamoutBufferStride[i] & 3) == 0);
3605-
src->addFmt("sbBase{} += {};" _CRLF, i, shaderContext->output->streamoutBufferStride[i] / 4);
3606-
}
3586+
if (!shaderContext->output->streamoutBufferWriteMask[i])
3587+
continue;
3588+
cemu_assert_debug((shaderContext->output->streamoutBufferStride[i] & 3) == 0);
3589+
src->addFmt("sbBase{} += {};" _CRLF, i, shaderContext->output->streamoutBufferStride[i] / 4);
36073590
}
36083591

36093592
if( shaderContext->analyzer.modifiesPixelActiveState )
@@ -3970,7 +3953,7 @@ void LatteDecompiler_emitMSLShader(LatteDecompilerShaderContext* shaderContext,
39703953
src->addFmt("float cubeMapArrayIndex{} = 0.0;" _CRLF, i);
39713954
}
39723955
// init base offset for streamout buffer writes
3973-
if (shaderContext->analyzer.useSSBOForStreamout && (shader->shaderType == LatteConst::ShaderType::Vertex || shader->shaderType == LatteConst::ShaderType::Geometry))
3956+
if (shader->shaderType == LatteConst::ShaderType::Vertex || shader->shaderType == LatteConst::ShaderType::Geometry)
39743957
{
39753958
for (sint32 i = 0; i < LATTE_NUM_STREAMOUT_BUFFER; i++)
39763959
{

src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompilerEmitMSLHeader.hpp

Lines changed: 9 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -94,9 +94,8 @@ namespace LatteDecompiler
9494
uniformCurrentOffset += 8;
9595
}
9696
// define verticesPerInstance + streamoutBufferBaseX
97-
if (decompilerContext->analyzer.useSSBOForStreamout &&
98-
(shader->shaderType == LatteConst::ShaderType::Vertex && decompilerContext->options->usesGeometryShader == false) ||
99-
(shader->shaderType == LatteConst::ShaderType::Geometry) )
97+
if ((shader->shaderType == LatteConst::ShaderType::Vertex && decompilerContext->options->usesGeometryShader == false) ||
98+
(shader->shaderType == LatteConst::ShaderType::Geometry))
10099
{
101100
src->add("int verticesPerInstance;" _CRLF);
102101
uniformOffsets.offset_verticesPerInstance = uniformCurrentOffset;
@@ -251,8 +250,6 @@ namespace LatteDecompiler
251250
{
252251
_emitAttributes(decompilerContext);
253252
_emitVSOutputs(decompilerContext);
254-
255-
// TODO: transform feedback
256253
}
257254
else if (decompilerContext->shaderType == LatteConst::ShaderType::Pixel)
258255
{
@@ -379,6 +376,13 @@ namespace LatteDecompiler
379376
case LatteConst::ShaderType::Vertex:
380377
src->add(", uint vid [[vertex_id]]");
381378
src->add(", uint iid [[instance_id]]");
379+
380+
// streamout buffer (transform feedback)
381+
if (decompilerContext->analyzer.hasStreamoutEnable && decompilerContext->analyzer.hasStreamoutWrite)
382+
{
383+
src->addFmt(", device int* sb [[buffer({})]]" _CRLF, decompilerContext->output->resourceMappingVK.getTFStorageBufferBindingPoint());
384+
}
385+
382386
break;
383387
case LatteConst::ShaderType::Pixel:
384388
src->add(", bool frontFacing [[front_facing]]");

src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.cpp

Lines changed: 19 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
#include "Cemu/Logging/CemuDebugLogging.h"
1717
#include "HW/Latte/Core/Latte.h"
1818
#include "HW/Latte/ISA/LatteReg.h"
19+
#include "Metal/MTLResource.hpp"
1920
#include "Metal/MTLTypes.hpp"
2021
#include "gui/guiWrapper.h"
2122

@@ -39,6 +40,9 @@ MetalRenderer::MetalRenderer()
3940
// Texture readback
4041
m_readbackBuffer = m_device->newBuffer(TEXTURE_READBACK_SIZE, MTL::StorageModeShared);
4142

43+
// Transform feedback
44+
m_xfbRingBuffer = m_device->newBuffer(LatteStreamout_GetRingBufferSize(), MTL::StorageModeShared);
45+
4246
// Initialize state
4347
for (uint32 i = 0; i < (uint32)LatteConst::ShaderType::TotalCount; i++)
4448
{
@@ -1185,7 +1189,21 @@ void MetalRenderer::BindStageResources(MTL::RenderCommandEncoder* renderCommandE
11851189
// Storage buffer
11861190
if (shader->resourceMapping.tfStorageBindingPoint >= 0)
11871191
{
1188-
debug_printf("storage buffer not implemented, index: %i\n", shader->resourceMapping.tfStorageBindingPoint);
1192+
switch (shader->shaderType)
1193+
{
1194+
case LatteConst::ShaderType::Vertex:
1195+
{
1196+
renderCommandEncoder->setVertexBuffer(m_xfbRingBuffer, 0, shader->resourceMapping.tfStorageBindingPoint);
1197+
break;
1198+
}
1199+
case LatteConst::ShaderType::Pixel:
1200+
{
1201+
renderCommandEncoder->setFragmentBuffer(m_xfbRingBuffer, 0, shader->resourceMapping.tfStorageBindingPoint);
1202+
break;
1203+
}
1204+
default:
1205+
UNREACHABLE;
1206+
}
11891207
}
11901208
}
11911209

src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -147,6 +147,7 @@ class MetalRenderer : public Renderer
147147
cemuLog_log(LogType::MetalLogging, "Imgui is not yet supported on Metal");
148148
};
149149

150+
bool UseTFViaSSBO() const override { return true; }
150151
void AppendOverlayDebugInfo() override;
151152

152153
// rendertarget
@@ -265,6 +266,9 @@ class MetalRenderer : public Renderer
265266
MTL::Buffer* m_readbackBuffer;
266267
uint32 m_readbackBufferWriteOffset = 0;
267268

269+
// Transform feedback
270+
MTL::Buffer* m_xfbRingBuffer;
271+
268272
// Active objects
269273
MTL::CommandBuffer* m_commandBuffer = nullptr;
270274
MetalEncoderType m_encoderType = MetalEncoderType::None;

src/Cafe/HW/Latte/Renderer/Renderer.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -85,6 +85,7 @@ class Renderer
8585
virtual void DeleteFontTextures() = 0;
8686

8787
GfxVendor GetVendor() const { return m_vendor; }
88+
virtual bool UseTFViaSSBO() const { return false; }
8889
virtual void AppendOverlayDebugInfo() = 0;
8990

9091
// rendertarget

src/Cafe/HW/Latte/Renderer/Vulkan/VulkanRenderer.h

Lines changed: 18 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -73,11 +73,11 @@ class PipelineInfo
7373
return true;
7474
}
7575

76-
76+
7777
template<typename T>
7878
struct direct_hash
7979
{
80-
size_t operator()(const uint64& k) const noexcept
80+
size_t operator()(const uint64& k) const noexcept
8181
{
8282
return k;
8383
}
@@ -277,7 +277,6 @@ class VulkanRenderer : public Renderer
277277
// texture functions
278278
void* texture_acquireTextureUploadBuffer(uint32 size) override;
279279
void texture_releaseTextureUploadBuffer(uint8* mem) override;
280-
281280

282281
TextureDecoder* texture_chooseDecodedFormat(Latte::E_GX2SURFFMT format, bool isDepth, Latte::E_DIM dim, uint32 width, uint32 height) override;
283282

@@ -370,7 +369,7 @@ class VulkanRenderer : public Renderer
370369
VkRect2D currentScissorRect{};
371370

372371
// vertex bindings
373-
struct
372+
struct
374373
{
375374
uint32 offset;
376375
}currentVertexBinding[LATTE_MAX_VERTEX_BUFFERS]{};
@@ -457,17 +456,17 @@ class VulkanRenderer : public Renderer
457456
bool shaderRoundingModeRTEFloat32{ false };
458457
}shaderFloatControls; // from VK_KHR_shader_float_controls
459458

460-
struct
459+
struct
461460
{
462461
bool debug_utils = false; // VK_EXT_DEBUG_UTILS
463462
}instanceExtensions;
464463

465-
struct
464+
struct
466465
{
467466
bool useTFEmulationViaSSBO = true; // emulate transform feedback via shader writes to a storage buffer
468467
}mode;
469468

470-
struct
469+
struct
471470
{
472471
uint32 minUniformBufferOffsetAlignment = 256;
473472
uint32 nonCoherentAtomSize = 256;
@@ -497,7 +496,7 @@ class VulkanRenderer : public Renderer
497496
void CreateCommandBuffers();
498497

499498
void swapchain_createDescriptorSetLayout();
500-
499+
501500
// shader
502501

503502
bool IsAsyncPipelineAllowed(uint32 numIndices);
@@ -512,6 +511,8 @@ class VulkanRenderer : public Renderer
512511
void DeleteFontTextures() override;
513512
bool BeginFrame(bool mainWindow) override;
514513

514+
bool UseTFViaSSBO() const override { return m_featureControl.mode.useTFEmulationViaSSBO; }
515+
515516
// drawcall emulation
516517
PipelineInfo* draw_createGraphicsPipeline(uint32 indexCount);
517518
PipelineInfo* draw_getOrCreateGraphicsPipeline(uint32 indexCount);
@@ -574,7 +575,7 @@ class VulkanRenderer : public Renderer
574575
VkDevice m_logicalDevice = VK_NULL_HANDLE;
575576
VkDebugUtilsMessengerEXT m_debugCallback = nullptr;
576577
volatile bool m_destructionRequested = false;
577-
578+
578579
QueueFamilyIndices m_indices{};
579580

580581
Semaphore m_pipeline_cache_semaphore;
@@ -583,7 +584,7 @@ class VulkanRenderer : public Renderer
583584
VkPipelineCache m_pipeline_cache{ nullptr };
584585
VkPipelineLayout m_pipelineLayout{nullptr};
585586
VkCommandPool m_commandPool{ nullptr };
586-
587+
587588
// buffer to cache uniform vars
588589
VkBuffer m_uniformVarBuffer = VK_NULL_HANDLE;
589590
VkDeviceMemory m_uniformVarBufferMemory = VK_NULL_HANDLE;
@@ -652,19 +653,19 @@ class VulkanRenderer : public Renderer
652653
bool m_submitOnIdle{}; // submit current buffer if Latte command processor goes into idle state (no more commands or waiting for externally signaled condition)
653654

654655
// tracking for dynamic offsets
655-
struct
656+
struct
656657
{
657658
uint32 uniformVarBufferOffset[VulkanRendererConst::SHADER_STAGE_INDEX_COUNT];
658-
struct
659+
struct
659660
{
660661
uint32 unformBufferOffset[LATTE_NUM_MAX_UNIFORM_BUFFERS];
661662
}shaderUB[VulkanRendererConst::SHADER_STAGE_INDEX_COUNT];
662663
}dynamicOffsetInfo{};
663664

664665
// streamout
665-
struct
666+
struct
666667
{
667-
struct
668+
struct
668669
{
669670
bool enabled;
670671
uint32 ringBufferOffset;
@@ -714,11 +715,11 @@ class VulkanRenderer : public Renderer
714715
accessFlags = 0;
715716
if constexpr ((TSyncOp & BUFFER_SHADER_READ) != 0)
716717
{
717-
// in theory: VK_ACCESS_INDEX_READ_BIT should be set here too but indices are currently separated
718+
// in theory: VK_ACCESS_INDEX_READ_BIT should be set here too but indices are currently separated
718719
stages |= VK_PIPELINE_STAGE_VERTEX_INPUT_BIT | VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | VK_PIPELINE_STAGE_GEOMETRY_SHADER_BIT | VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT;
719720
accessFlags |= VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT | VK_ACCESS_UNIFORM_READ_BIT | VK_ACCESS_SHADER_READ_BIT;
720721
}
721-
722+
722723
if constexpr ((TSyncOp & BUFFER_SHADER_WRITE) != 0)
723724
{
724725
stages |= VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | VK_PIPELINE_STAGE_GEOMETRY_SHADER_BIT | VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT;
@@ -921,7 +922,6 @@ class VulkanRenderer : public Renderer
921922

922923
public:
923924
bool GetDisableMultithreadedCompilation() const { return m_featureControl.disableMultithreadedCompilation; }
924-
bool UseTFViaSSBO() const { return m_featureControl.mode.useTFEmulationViaSSBO; }
925925
bool HasSPRIVRoundingModeRTE32() const { return m_featureControl.shaderFloatControls.shaderRoundingModeRTEFloat32; }
926926
bool IsDebugUtilsEnabled() const { return m_featureControl.debugMarkersSupported && m_featureControl.instanceExtensions.debug_utils; }
927927

@@ -931,7 +931,7 @@ class VulkanRenderer : public Renderer
931931
void debug_genericBarrier();
932932

933933
// shaders
934-
struct
934+
struct
935935
{
936936
RendererShaderVk* copySurface_vs{};
937937
RendererShaderVk* copySurface_psDepth2Color{};

0 commit comments

Comments
 (0)