apple: small cleanups to the metal video driver. don't use it.

warmenhoven · warmenhoven · commit 63931092dd30 · 2026-01-10T23:31:03.000-05:00
fixes #8655 #8983 #15747 #17860 #18442 #18408
diff --git a/gfx/common/metal/menu_pipeline.metal b/gfx/common/metal/menu_pipeline.metal
@@ -111,9 +111,10 @@ fragment float4 ribbon_fragment(RibbonOutIn in [[ stage_in ]])
    const float3 up = float3(0.0, 0.0, 1.0);
    float3 x = dfdx(in.vEC);
    float3 y = dfdy(in.vEC);
+   y = -y; /* Flip Y derivative to match Vulkan's yflip */
    float3 normal = normalize(cross(x, y));
    float c = 1.0 - dot(normal, up);
-   c = (1.0 - cos(c * c)) / 13.0;
+   c = (1.0 - cos(c * c)) / 3.0;
    return float4(c, c, c, 1.0);
 }
 
@@ -195,6 +196,7 @@ fragment float4 bokeh_fragment(FontFragmentIn        in         [[ stage_in ]],
 {
     float speed = constants.time * 4.0;
     float2 uv = -1.0 + 2.0 * in.position.xy / constants.outputSize;
+    uv.y = -uv.y; /* Flip Y to match Vulkan's yflip */
     uv.x *= constants.outputSize.x / constants.outputSize.y;
     float3 color = float3(0.0);
 
@@ -270,7 +272,7 @@ fragment float4 snowflake_fragment(FontFragmentIn        in         [[ stage_in
    uv = uv * 2.0 - 1.0;
    float2 p = uv;
    p.x *= constants.outputSize.x / constants.outputSize.y;
-   //p.y  = -p.y;
+   p.y = -p.y; /* Flip Y so snowflakes fall down */
 
    float c = snowflake::col(p, constants);
    return float4(c,c,c,c);
diff --git a/gfx/common/metal/metal_common.h b/gfx/common/metal/metal_common.h
@@ -25,9 +25,8 @@
 
 #include "../../gfx_display.h"
 
-/* TODO/FIXME: implement triple buffering */
-/*! @brief maximum inflight frames */
-#define MAX_INFLIGHT 1
+/*! @brief maximum inflight frames for triple buffering */
+#define MAX_INFLIGHT 3
 #define CHAIN_LENGTH 3
 
 /* macOS requires constants in a buffer to have a 256 byte alignment. */
@@ -139,6 +138,10 @@ typedef NS_ENUM(NSUInteger, ViewportResetMode) {
 /*! @brief end commits the command buffer */
 - (void)end;
 
+/*! @brief swapBuffers acquires the next drawable, blocking if needed for vsync.
+ *  This should be called after end to match Vulkan's swap_buffers timing. */
+- (void)swapBuffers;
+
 - (void)setRotation:(unsigned)rotation;
 - (bool)readBackBuffer:(uint8_t *)buffer;
 
diff --git a/gfx/common/metal/metal_renderer.m b/gfx/common/metal/metal_renderer.m
@@ -178,6 +178,9 @@ - (instancetype)initWithDevice:(id<MTLDevice>)d
       _layer.framebufferOnly     = NO;
       _layer.displaySyncEnabled  = YES;
 #endif
+      /* Configure drawable pool for triple-buffering */
+      if (@available(iOS 13.0, macOS 10.15.4, tvOS 13.0, *))
+         _layer.maximumDrawableCount = MAX_INFLIGHT;
       _library                   = l;
       _commandQueue              = [_device newCommandQueue];
       _clearColor                = MTLClearColorMake(0, 0, 0, 1);
@@ -319,6 +322,12 @@ - (bool)_initClearState
    psd.vertexFunction   = [_library newFunctionWithName:@"stock_vertex"];
    psd.fragmentFunction = [_library newFunctionWithName:@"stock_fragment_color"];
 
+   if (!psd.vertexFunction || !psd.fragmentFunction)
+   {
+      RARCH_ERR("[Metal] Failed to load clear state shader functions.\n");
+      return NO;
+   }
+
    _clearState = [_device newRenderPipelineStateWithDescriptor:psd error:&err];
    if (err != nil)
    {
@@ -349,6 +358,12 @@ - (bool)_initMenuStates
    psd.vertexFunction   = [_library newFunctionWithName:@"stock_vertex"];
    psd.fragmentFunction = [_library newFunctionWithName:@"stock_fragment"];
 
+   if (!psd.vertexFunction || !psd.fragmentFunction)
+   {
+      RARCH_ERR("[Metal] Failed to load stock shader functions.\n");
+      return NO;
+   }
+
    _states[VIDEO_SHADER_STOCK_BLEND][0] = [_device newRenderPipelineStateWithDescriptor:psd error:&err];
    if (err != nil)
    {
@@ -572,10 +587,22 @@ - (Texture *)newTexture:(struct texture_image)image filter:(enum texture_filter_
 
 - (void)convertFormat:(RPixelFormat)fmt from:(id<MTLTexture>)src to:(id<MTLTexture>)dst
 {
-   assert(src.width == dst.width && src.height == dst.height);
-   assert(fmt >= 0 && fmt < RPixelFormatCount);
+   if (src.width != dst.width || src.height != dst.height)
+   {
+      RARCH_ERR("[Metal] convertFormat: texture dimensions mismatch\n");
+      return;
+   }
+   if (fmt < 0 || fmt >= RPixelFormatCount)
+   {
+      RARCH_ERR("[Metal] convertFormat: invalid pixel format %u\n", (unsigned)fmt);
+      return;
+   }
    Filter *conv = _filters[fmt];
-   assert(conv != nil);
+   if (!conv)
+   {
+      RARCH_ERR("[Metal] convertFormat: no filter for format %u\n", (unsigned)fmt);
+      return;
+   }
    [conv apply:self.blitCommandBuffer in:src out:dst];
 }
 
@@ -656,24 +683,46 @@ - (bool)readBackBuffer:(uint8_t *)buffer
 
 - (void)begin
 {
-   assert(_commandBuffer == nil);
-   dispatch_semaphore_wait(_inflightSemaphore, DISPATCH_TIME_FOREVER);
+   if (_commandBuffer != nil)
+   {
+      RARCH_WARN("[Metal] begin called with active command buffer - resetting\n");
+      _commandBuffer = nil;
+   }
+
+   /* Don't use semaphore for frame pacing - let nextDrawable handle it.
+    * CAMetalLayer.nextDrawable will block if no drawable is available,
+    * which naturally paces us to the display refresh rate.
+    * Using a semaphore on top of this causes timing mismatches because
+    * the semaphore signals on presentation but the drawable isn't
+    * released until the NEXT vsync. */
+
    _commandBuffer = [_commandQueue commandBuffer];
    _commandBuffer.label = @"Frame command buffer";
    _backBuffer = nil;
 }
 
 - (id<MTLRenderCommandEncoder>)rce
 {
-   assert(_commandBuffer != nil);
+   if (_commandBuffer == nil)
+   {
+      RARCH_ERR("[Metal] rce called without active command buffer\n");
+      return nil;
+   }
    if (_rce == nil)
    {
+      id<CAMetalDrawable> drawable = self.nextDrawable;
+      if (!drawable || !drawable.texture)
+      {
+         RARCH_WARN("[Metal] Failed to acquire drawable - frame dropped\n");
+         return nil;
+      }
+
       MTLRenderPassDescriptor *rpd = [MTLRenderPassDescriptor new];
       rpd.colorAttachments[0].clearColor = _clearColor;
       rpd.colorAttachments[0].loadAction = MTLLoadActionClear;
-      rpd.colorAttachments[0].texture = self.nextDrawable.texture;
+      rpd.colorAttachments[0].texture = drawable.texture;
       if (_captureEnabled)
-         _backBuffer = self.nextDrawable.texture;
+         _backBuffer = drawable.texture;
       _rce       = [_commandBuffer renderCommandEncoderWithDescriptor:rpd];
       _rce.label = @"Frame command encoder";
    }
@@ -729,7 +778,11 @@ - (void)drawQuadX:(float)x y:(float)y w:(float)w h:(float)h
 
 - (void)end
 {
-   assert(_commandBuffer != nil);
+   if (_commandBuffer == nil)
+   {
+      RARCH_WARN("[Metal] end called without active command buffer\n");
+      return;
+   }
 
    [_chain[_currentChain] commitRanges];
 
@@ -743,9 +796,10 @@ - (void)end
          [bce endEncoding];
       }
 #endif
-      /* Pending blits for mipmaps or render passes for slang shaders */
+      /* Pending blits for mipmaps or render passes for slang shaders.
+       * Metal command queues guarantee commit-order execution, so we don't
+       * need to block the CPU waiting for completion. */
       [_blitCommandBuffer commit];
-      [_blitCommandBuffer waitUntilCompleted];
       _blitCommandBuffer = nil;
    }
 
@@ -755,23 +809,40 @@ - (void)end
       _rce = nil;
    }
 
-   __block dispatch_semaphore_t inflight = _inflightSemaphore;
-   [_commandBuffer addCompletedHandler:^(id<MTLCommandBuffer> _) {
-      dispatch_semaphore_signal(inflight);
-   }];
+   id<CAMetalDrawable> drawable = self.nextDrawable;
 
-   if (self.nextDrawable)
+   if (drawable)
    {
-      [_commandBuffer presentDrawable:self.nextDrawable];
+      /* Use addScheduledHandler to present, following Apple's recommendation.
+       * According to Apple (and used by MoltenVK), it is more performant to call
+       * [drawable present] from within a scheduled-handler than to use
+       * [commandBuffer presentDrawable:]. This provides better frame pacing
+       * because presentation is queued when the command buffer is scheduled
+       * (added to GPU queue), not when it completes. */
+      [_commandBuffer addScheduledHandler:^(id<MTLCommandBuffer> _Nonnull buffer) {
+         [drawable present];
+      }];
    }
 
    [_commandBuffer commit];
 
    _commandBuffer = nil;
-   _drawable = nil;
    [self _nextChain];
 }
 
+- (void)swapBuffers
+{
+   /* Acquire the next drawable after presentation, matching Vulkan's
+    * swap_buffers timing where acquisition happens AFTER presenting.
+    *
+    * We explicitly clear _drawable first to force a fresh acquisition.
+    * nextDrawable will block if no drawable is available (all 3 are
+    * in-flight), which naturally paces us to the display refresh rate.
+    * This blocking behavior is intentional for proper frame pacing. */
+   _drawable = nil;
+   _drawable = _layer.nextDrawable;
+}
+
 - (bool)allocRange:(BufferRange *)range length:(NSUInteger)length
 {
    return [_chain[_currentChain] allocRange:range length:length];
@@ -981,7 +1052,7 @@ - (void)apply:(id<MTLCommandBuffer>)cb inBuf:(id<MTLBuffer>)tin outTex:(id<MTLTe
    [self.delegate configure:ce];
 
    MTLSize size  = MTLSizeMake(32, 1, 1);
-   MTLSize count = MTLSizeMake((tin.length + 00) / 32, 1, 1);
+   MTLSize count = MTLSizeMake((tin.length + 31) / 32, 1, 1);
 
    [ce dispatchThreadgroups:count threadsPerThreadgroup:size];
    [ce endEncoding];
diff --git a/gfx/common/metal_common.h b/gfx/common/metal_common.h
@@ -59,6 +59,7 @@ extern MTLPixelFormat SelectOptimalPixelFormat(MTLPixelFormat fmt);
 
 - (void)setFilteringIndex:(int)index smooth:(bool)smooth;
 - (BOOL)setShaderFromPath:(NSString *)path;
+- (void)clearShader;
 - (void)updateFrame:(void const *)src pitch:(NSUInteger)pitch;
 - (bool)readViewport:(uint8_t *)buffer isIdle:(bool)isIdle;
 
diff --git a/gfx/drivers/metal.m b/gfx/drivers/metal.m