Skip to content

Commit 8316cee

Browse files
committed
prepare for surface copy
1 parent eb573fc commit 8316cee

File tree

9 files changed

+124
-48
lines changed

9 files changed

+124
-48
lines changed

src/Cafe/CMakeLists.txt

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -560,7 +560,9 @@ if(ENABLE_METAL)
560560
HW/Latte/Renderer/Metal/MetalPipelineCache.h
561561
HW/Latte/Renderer/Metal/MetalDepthStencilCache.cpp
562562
HW/Latte/Renderer/Metal/MetalDepthStencilCache.h
563-
HW/Latte/Renderer/Metal/ShaderSourcePresent.h
563+
HW/Latte/Renderer/Metal/MetalHybridComputePipeline.cpp
564+
HW/Latte/Renderer/Metal/MetalHybridComputePipeline.h
565+
HW/Latte/Renderer/Metal/UtilityShaderSource.h
564566
)
565567

566568
#target_link_libraries(CemuCafe PRIVATE

src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompilerEmitMSL.cpp

Lines changed: 0 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -3732,13 +3732,6 @@ void LatteDecompiler_emitHelperFunctions(LatteDecompilerShaderContext* shaderCon
37323732
"return round(x / 2.0) * 2.0;\r\n"
37333733
"}\r\n");
37343734

3735-
// unpackHalf2x16
3736-
fCStr_shaderSource->add(""
3737-
"template<typename T>\r\n"
3738-
"float2 unpackHalf2x16(T x) {\r\n"
3739-
"return float2(as_type<half2>(x));\r\n"
3740-
"}\r\n");
3741-
37423735
// Bit cast
37433736

37443737
// Scalar

src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompilerEmitMSLAttrDecoder.cpp

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -256,7 +256,7 @@ void LatteDecompiler_emitAttributeDecodeMSL(LatteDecompilerShader* shaderContext
256256
// seen in Giana Sisters: Twisted Dreams
257257
_readLittleEndianAttributeU16x4(shaderContext, src, attributeInputIndex);
258258
// TODO: uint4?
259-
src->add("attrDecoder.xyzw = as_type<uint4>(float4(unpackHalf2x16(attrDecoder.x|(attrDecoder.y<<16)),unpackHalf2x16(attrDecoder.z|(attrDecoder.w<<16))));" _CRLF);
259+
src->add("attrDecoder.xyzw = as_type<uint4>(float4(float2(as_type<half2>(attrDecoder.x|(attrDecoder.y<<16))),float2(as_type<half2>(attrDecoder.z|(attrDecoder.w<<16)))));" _CRLF);
260260
}
261261
else if (attrib->format == FMT_16_16 && attrib->nfa == 0 && attrib->isSigned != 0)
262262
{
@@ -271,7 +271,7 @@ void LatteDecompiler_emitAttributeDecodeMSL(LatteDecompilerShader* shaderContext
271271
{
272272
// seen in Giana Sisters: Twisted Dreams
273273
_readLittleEndianAttributeU16x2(shaderContext, src, attributeInputIndex);
274-
src->add("attrDecoder.xy = as_type<uint2>(unpackHalf2x16(attrDecoder.x|(attrDecoder.y<<16)));" _CRLF);
274+
src->add("attrDecoder.xy = as_type<uint2>(float2(as_type<half2>(attrDecoder.x|(attrDecoder.y<<16))));" _CRLF);
275275
src->add("attrDecoder.zw = uint2(0);" _CRLF);
276276
}
277277
else if( attrib->format == FMT_8_8_8_8 && attrib->nfa == 0 && attrib->isSigned == 0 )
@@ -394,7 +394,7 @@ void LatteDecompiler_emitAttributeDecodeMSL(LatteDecompilerShader* shaderContext
394394
{
395395
_readBigEndianAttributeU16x4(shaderContext, src, attributeInputIndex);
396396
// TODO: uint4?
397-
src->add("attrDecoder.xyzw = as_type<uint4>(float4(unpackHalf2x16(attrDecoder.x|(attrDecoder.y<<16)),unpackHalf2x16(attrDecoder.z|(attrDecoder.w<<16))));" _CRLF);
397+
src->add("attrDecoder.xyzw = as_type<uint4>(float4(float2(as_type<half2>(attrDecoder.x|(attrDecoder.y<<16))),float2(as_type<half2>(attrDecoder.z|(attrDecoder.w<<16)))));" _CRLF);
398398
}
399399
else if (attrib->format == FMT_16_16_16_16 && attrib->nfa == 0 && attrib->isSigned != 0)
400400
{
@@ -446,7 +446,7 @@ void LatteDecompiler_emitAttributeDecodeMSL(LatteDecompilerShader* shaderContext
446446
else if( attrib->format == FMT_16_16_FLOAT && attrib->nfa == 2 )
447447
{
448448
_readBigEndianAttributeU16x2(shaderContext, src, attributeInputIndex);
449-
src->add("attrDecoder.xy = as_type<uint2>(unpackHalf2x16(attrDecoder.x|(attrDecoder.y<<16)));" _CRLF);
449+
src->add("attrDecoder.xy = as_type<uint2>(float2(as_type<half2>(attrDecoder.x|(attrDecoder.y<<16))));" _CRLF);
450450
src->add("attrDecoder.zw = uint2(0);" _CRLF);
451451
}
452452
else if( attrib->format == FMT_16_16 && attrib->nfa == 0 && attrib->isSigned == 0 )
Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
#include "Cafe/HW/Latte/Renderer/Metal/MetalHybridComputePipeline.h"
2+
3+
MetalHybridComputePipeline::MetalHybridComputePipeline(class MetalRenderer* mtlRenderer, MTL::Library* library, const char* vertexFunctionName, const char* kernelFunctionName)
4+
{
5+
// Render pipeline state
6+
MTL::Function* vertexFunction = library->newFunction(NS::String::string(vertexFunctionName, NS::ASCIIStringEncoding));
7+
8+
MTL::RenderPipelineDescriptor* renderPipelineDescriptor = MTL::RenderPipelineDescriptor::alloc()->init();
9+
renderPipelineDescriptor->setVertexFunction(vertexFunction);
10+
renderPipelineDescriptor->setRasterizationEnabled(false);
11+
12+
NS::Error* error = nullptr;
13+
m_renderPipelineState = mtlRenderer->GetDevice()->newRenderPipelineState(renderPipelineDescriptor, &error);
14+
renderPipelineDescriptor->release();
15+
vertexFunction->release();
16+
if (error)
17+
{
18+
printf("error creating hybrid render pipeline state: %s\n", error->localizedDescription()->utf8String());
19+
error->release();
20+
}
21+
22+
// Compute pipeline state
23+
// TODO
24+
}
25+
26+
MetalHybridComputePipeline::~MetalHybridComputePipeline()
27+
{
28+
m_renderPipelineState->release();
29+
// TODO: uncomment
30+
//m_computePipelineState->release();
31+
}
Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
#include "Cafe/HW/Latte/Renderer/Metal/MetalCommon.h"
2+
#include "HW/Latte/Renderer/Metal/MetalRenderer.h"
3+
#include "Metal/MTLLibrary.hpp"
4+
#include "Metal/MTLRenderPipeline.hpp"
5+
6+
class MetalHybridComputePipeline
7+
{
8+
public:
9+
MetalHybridComputePipeline(class MetalRenderer* mtlRenderer, MTL::Library* library, const char* vertexFunctionName, const char* kernelFunctionName);
10+
~MetalHybridComputePipeline();
11+
12+
MTL::RenderPipelineState* GetRenderPipelineState() const { return m_renderPipelineState; }
13+
14+
MTL::RenderPipelineState* GetComputePipelineState() const { return m_computePipelineState; }
15+
16+
private:
17+
MTL::RenderPipelineState* m_renderPipelineState;
18+
MTL::RenderPipelineState* m_computePipelineState;
19+
};

src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.cpp

Lines changed: 28 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -7,13 +7,15 @@
77
#include "Cafe/HW/Latte/Renderer/Metal/MetalPipelineCache.h"
88
#include "Cafe/HW/Latte/Renderer/Metal/MetalDepthStencilCache.h"
99
#include "Cafe/HW/Latte/Renderer/Metal/LatteTextureReadbackMtl.h"
10+
#include "Cafe/HW/Latte/Renderer/Metal/MetalHybridComputePipeline.h"
1011
#include "Cafe/HW/Latte/Renderer/Metal/LatteToMtl.h"
1112

12-
#include "Cafe/HW/Latte/Renderer/Metal/ShaderSourcePresent.h"
13+
#include "Cafe/HW/Latte/Renderer/Metal/UtilityShaderSource.h"
1314

1415
#include "Cafe/HW/Latte/Core/LatteShader.h"
1516
#include "Cafe/HW/Latte/Core/LatteIndices.h"
1617
#include "Cemu/Logging/CemuDebugLogging.h"
18+
#include "Foundation/NSError.hpp"
1719
#include "HW/Latte/Core/Latte.h"
1820
#include "HW/Latte/ISA/LatteReg.h"
1921
#include "Metal/MTLPixelFormat.hpp"
@@ -54,10 +56,31 @@ MetalRenderer::MetalRenderer()
5456
m_state.uniformBufferOffsets[i][j] = INVALID_OFFSET;
5557
}
5658
}
59+
60+
// Utility shader source
61+
NS::Error* error = nullptr;
62+
m_utilityLibrary = m_device->newLibrary(NS::String::string(utilityShaderSource, NS::ASCIIStringEncoding), nullptr, &error);
63+
if (error)
64+
{
65+
debug_printf("failed to create present library (error: %s)\n", error->localizedDescription()->utf8String());
66+
error->release();
67+
return;
68+
}
69+
70+
// Hybrid pipelines
71+
m_copyDepthToColorPipeline = new MetalHybridComputePipeline(this, m_utilityLibrary, "vertexCopyDepthToColor", "kernelCopyDepthToColor");
72+
m_copyColorToDepthPipeline = new MetalHybridComputePipeline(this, m_utilityLibrary, "vertexCopyColorToDepth", "kernelCopyColorToDepth");
5773
}
5874

5975
MetalRenderer::~MetalRenderer()
6076
{
77+
delete m_copyDepthToColorPipeline;
78+
delete m_copyColorToDepthPipeline;
79+
80+
m_presentPipeline->release();
81+
82+
m_utilityLibrary->release();
83+
6184
delete m_depthStencilCache;
6285
delete m_pipelineCache;
6386
delete m_memoryManager;
@@ -82,22 +105,15 @@ void MetalRenderer::InitializeLayer(const Vector2i& size, bool mainWindow)
82105
m_metalLayer->setPixelFormat(MTL::PixelFormatRGBA8Unorm/*_sRGB*/);
83106

84107
// Present pipeline
85-
NS::Error* error = nullptr;
86-
MTL::Library* presentLibrary = m_device->newLibrary(NS::String::string(presentLibrarySource, NS::ASCIIStringEncoding), nullptr, &error);
87-
if (error)
88-
{
89-
debug_printf("failed to create present library (error: %s)\n", error->localizedDescription()->utf8String());
90-
error->release();
91-
return;
92-
}
93-
MTL::Function* presentVertexFunction = presentLibrary->newFunction(NS::String::string("presentVertex", NS::ASCIIStringEncoding));
94-
MTL::Function* presentFragmentFunction = presentLibrary->newFunction(NS::String::string("presentFragment", NS::ASCIIStringEncoding));
95-
presentLibrary->release();
108+
MTL::Function* presentVertexFunction = m_utilityLibrary->newFunction(NS::String::string("vertexFullscreen", NS::ASCIIStringEncoding));
109+
MTL::Function* presentFragmentFunction = m_utilityLibrary->newFunction(NS::String::string("fragmentPresent", NS::ASCIIStringEncoding));
96110

97111
MTL::RenderPipelineDescriptor* renderPipelineDescriptor = MTL::RenderPipelineDescriptor::alloc()->init();
98112
renderPipelineDescriptor->setVertexFunction(presentVertexFunction);
99113
renderPipelineDescriptor->setFragmentFunction(presentFragmentFunction);
100114
renderPipelineDescriptor->colorAttachments()->object(0)->setPixelFormat(m_metalLayer->pixelFormat());
115+
116+
NS::Error* error = nullptr;
101117
m_presentPipeline = m_device->newRenderPipelineState(renderPipelineDescriptor, &error);
102118
renderPipelineDescriptor->release();
103119
presentVertexFunction->release();
@@ -106,7 +122,6 @@ void MetalRenderer::InitializeLayer(const Vector2i& size, bool mainWindow)
106122
{
107123
debug_printf("failed to create present pipeline (error: %s)\n", error->localizedDescription()->utf8String());
108124
error->release();
109-
return;
110125
}
111126
}
112127

src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -257,8 +257,13 @@ class MetalRenderer : public Renderer
257257
MTL::CommandQueue* m_commandQueue;
258258

259259
// Pipelines
260+
MTL::Library* m_utilityLibrary;
260261
MTL::RenderPipelineState* m_presentPipeline;
261262

263+
// Hybrid pipelines
264+
class MetalHybridComputePipeline* m_copyDepthToColorPipeline;
265+
class MetalHybridComputePipeline* m_copyColorToDepthPipeline;
266+
262267
// Basic
263268
MTL::SamplerState* m_nearestSampler;
264269

src/Cafe/HW/Latte/Renderer/Metal/ShaderSourcePresent.h

Lines changed: 0 additions & 23 deletions
This file was deleted.
Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
inline const char* utilityShaderSource = \
2+
"#include <metal_stdlib>\n" \
3+
"using namespace metal;\n" \
4+
"\n" \
5+
"constant float2 positions[] = {float2(-1.0, -3.0), float2(-1.0, 1.0), float2(3.0, 1.0)};\n"
6+
"\n" \
7+
"struct VertexOut {\n" \
8+
" float4 position [[position]];\n" \
9+
" float2 texCoord;\n" \
10+
"};\n" \
11+
"\n" \
12+
"vertex VertexOut vertexFullscreen(ushort vid [[vertex_id]]) {\n" \
13+
" VertexOut out;\n" \
14+
" out.position = float4(positions[vid], 0.0, 1.0);\n" \
15+
" out.texCoord = positions[vid] * 0.5 + 0.5;\n" \
16+
" out.texCoord.y = 1.0 - out.texCoord.y;\n" \
17+
"\n" \
18+
" return out;\n" \
19+
"}\n" \
20+
"\n" \
21+
"fragment float4 fragmentPresent(VertexOut in [[stage_in]], texture2d<float> tex [[texture(0)]], sampler samplr [[sampler(0)]]) {\n" \
22+
" return tex.sample(samplr, in.texCoord);\n" \
23+
"}\n" \
24+
"\n" \
25+
"vertex void vertexCopyDepthToColor(uint vid [[vertex_id]], depth2d<float, access::read> src [[texture(0)]], texture2d<float, access::write> dst [[texture(1)]], constant uint& copyWidth) {\n" \
26+
" uint2 coord = uint2(vid % copyWidth, vid / copyWidth);\n" \
27+
" return dst.write(float4(src.read(coord), 0.0, 0.0, 0.0), coord);\n" \
28+
"}\n" \
29+
"\n" \
30+
"vertex void vertexCopyColorToDepth(uint vid [[vertex_id]], texture2d<float, access::read> src [[texture(0)]], texture2d<float, access::write> dst [[texture(1)]], constant uint& copyWidth) {\n" \
31+
" uint2 coord = uint2(vid % copyWidth, vid / copyWidth);\n" \
32+
" return dst.write(float4(src.read(coord).r), coord);\n" \
33+
"}\n" \
34+
"\n";

0 commit comments

Comments
 (0)