metal : simplify

ggerganov · ggerganov · commit 50cc09de2216 · 2025-01-26T15:58:31.000+02:00
diff --git a/ggml/src/ggml-metal/ggml-metal.m b/ggml/src/ggml-metal/ggml-metal.m
@@ -1044,6 +1044,37 @@ static void ggml_metal_free(struct ggml_backend_metal_context * ctx) {
     id<MTLResidencySet> rset;
 };
 
+// rset init
+static bool ggml_backend_metal_buffer_rset_init(struct ggml_backend_metal_buffer_context * ctx, id<MTLDevice> device) {
+    MTLResidencySetDescriptor * desc;
+    desc = [[MTLResidencySetDescriptor alloc] init];
+    desc.label = @"ggml_backend_metal";
+    desc.initialCapacity = ctx->n_buffers;
+
+    NSError * error;
+    ctx->rset = [device newResidencySetWithDescriptor:desc error:&error];
+    if (error) {
+        GGML_LOG_ERROR("%s: error: %s\n", __func__, [[error description] UTF8String]);
+        return false;
+    }
+
+    for (int i = 0; i < ctx->n_buffers; i++) {
+        [ctx->rset addAllocation:ctx->buffers[i].metal];
+    }
+
+    [ctx->rset commit];
+    [ctx->rset requestResidency];
+
+    return true;
+}
+
+// rset free
+static void ggml_backend_metal_buffer_rset_free(struct ggml_backend_metal_buffer_context * ctx) {
+    [ctx->rset endResidency];
+    [ctx->rset removeAllAllocations];
+    [ctx->rset release];
+}
+
 // finds the Metal buffer that contains the tensor data on the GPU device
 // the assumption is that there is 1-to-1 mapping between the host and device memory buffers, so we can find the
 // Metal buffer based on the host memory pointer
@@ -4046,8 +4077,6 @@ static enum ggml_status ggml_metal_graph_compute(
     struct ggml_backend_metal_context        * ctx     = backend->context;
     struct ggml_backend_metal_device_context * ctx_dev = backend->device->context;
 
-    int64_t t_start_us = ggml_time_us();
-
     // number of nodes encoded by the main thread (empirically determined)
     const int n_main = 128;
 
@@ -4172,10 +4201,6 @@ static enum ggml_status ggml_metal_graph_compute(
         }
     }
 
-    int64_t t_end_us = ggml_time_us();
-
-    GGML_LOG_DEBUG("%s: compute graph took %8.2f ms\n", __func__, (t_end_us - t_start_us) / 1000.0);
-
     return GGML_STATUS_SUCCESS;
 }
 
@@ -4190,10 +4215,7 @@ static void ggml_backend_metal_buffer_free_buffer(ggml_backend_buffer_t buffer)
         [ctx->buffers[i].metal release];
     }
 
-    [ctx->rset endResidency];
-    [ctx->rset removeAllAllocations];
-    [ctx->rset release];
-
+    ggml_backend_metal_buffer_rset_free(ctx);
     ggml_backend_metal_device_rel(buffer->buft->device->context);
 
     if (ctx->owned) {
@@ -4330,25 +4352,11 @@ static ggml_backend_buffer_t ggml_backend_metal_buffer_type_alloc_buffer(ggml_ba
         return NULL;
     }
 
-    {
-        MTLResidencySetDescriptor * desc;
-        desc = [[MTLResidencySetDescriptor alloc] init];
-        desc.label = @"ggml_backend_metal";
-        desc.initialCapacity = ctx->n_buffers;
-
-        NSError * error;
-        ctx->rset = [device newResidencySetWithDescriptor:desc error:&error];
-        if (error) {
-            GGML_LOG_ERROR("%s: error: %s\n", __func__, [[error description] UTF8String]);
-            return NULL;
-        }
-
-        for (int i = 0; i < ctx->n_buffers; i++) {
-            [ctx->rset addAllocation:ctx->buffers[i].metal];
-        }
-
-        [ctx->rset commit];
-        [ctx->rset requestResidency];
+    if (!ggml_backend_metal_buffer_rset_init(ctx, device)) {
+        GGML_LOG_ERROR("%s: error: failed to initialize residency set\n", __func__);
+        free(ctx);
+        ggml_backend_metal_device_rel(ctx_dev);
+        return NULL;
     }
 
     //ggml_backend_metal_log_allocated_size(device, size_aligned);
@@ -4494,25 +4502,11 @@ ggml_backend_buffer_t ggml_backend_metal_buffer_from_ptr(void * data, size_t siz
         }
     }
 
-    {
-        MTLResidencySetDescriptor * desc;
-        desc = [[MTLResidencySetDescriptor alloc] init];
-        desc.label = @"ggml_backend_metal";
-        desc.initialCapacity = ctx->n_buffers;
-
-        NSError * error;
-        ctx->rset = [device newResidencySetWithDescriptor:desc error:&error];
-        if (error) {
-            GGML_LOG_ERROR("%s: error: %s\n", __func__, [[error description] UTF8String]);
-            return NULL;
-        }
-
-        for (int i = 0; i < ctx->n_buffers; i++) {
-            [ctx->rset addAllocation:ctx->buffers[i].metal];
-        }
-
-        [ctx->rset commit];
-        [ctx->rset requestResidency];
+    if (!ggml_backend_metal_buffer_rset_init(ctx, device)) {
+        GGML_LOG_ERROR("%s: error: failed to initialize residency set\n", __func__);
+        free(ctx);
+        ggml_backend_metal_device_rel(ctx_dev);
+        return NULL;
     }
 
     return ggml_backend_buffer_init(ggml_backend_metal_buffer_from_ptr_type(), ggml_backend_metal_buffer_i, ctx, size);
@@ -4828,25 +4822,11 @@ static ggml_backend_buffer_t ggml_backend_metal_device_buffer_from_ptr(ggml_back
         }
     }
 
-    {
-        MTLResidencySetDescriptor * desc;
-        desc = [[MTLResidencySetDescriptor alloc] init];
-        desc.label = @"ggml_backend_metal";
-        desc.initialCapacity = ctx->n_buffers;
-
-        NSError * error;
-        ctx->rset = [device newResidencySetWithDescriptor:desc error:&error];
-        if (error) {
-            GGML_LOG_ERROR("%s: error: %s\n", __func__, [[error description] UTF8String]);
-            return NULL;
-        }
-
-        for (int i = 0; i < ctx->n_buffers; i++) {
-            [ctx->rset addAllocation:ctx->buffers[i].metal];
-        }
-
-        [ctx->rset commit];
-        [ctx->rset requestResidency];
+    if (!ggml_backend_metal_buffer_rset_init(ctx, device)) {
+        GGML_LOG_ERROR("%s: error: failed to initialize residency set\n", __func__);
+        free(ctx);
+        ggml_backend_metal_device_rel(ctx_dev);
+        return NULL;
     }
 
     return ggml_backend_buffer_init(ggml_backend_metal_buffer_from_ptr_type(), ggml_backend_metal_buffer_i, ctx, size);