1919// max number of MTLCommandBuffer used to submit a graph for processing
2020#define GGML_METAL_MAX_COMMAND_BUFFERS 8
2121
22- #define UNUSED (x ) (void )(x)
22+ // create residency sets only on macOS >= 15.0
23+ #if TARGET_OS_OSX && __MAC_OS_X_VERSION_MAX_ALLOWED >= 150000
24+ #define GGML_METAL_HAS_RESIDENCY_SETS 1
25+ #endif
2326
2427// globals
2528
3942
4043 bool has_simdgroup_reduction;
4144 bool has_simdgroup_mm;
45+ bool has_residency_sets;
4246 bool has_bfloat;
4347 bool use_bfloat;
4448
4852 /* .mtl_device_ref_count =*/ 0 ,
4953 /* .has_simdgroup_reduction =*/ false ,
5054 /* .has_simdgroup_mm =*/ false ,
55+ /* .has_residency_sets =*/ false ,
5156 /* .has_bfloat =*/ false ,
5257 /* .use_bfloat =*/ false ,
5358 /* .name =*/ " " ,
6469 ctx->has_simdgroup_reduction |= [ctx->mtl_device supportsFamily: MTLGPUFamilyMetal3_GGML];
6570
6671 ctx->has_simdgroup_mm = [ctx->mtl_device supportsFamily: MTLGPUFamilyApple7];
67-
72+ #if defined(GGML_METAL_HAS_RESIDENCY_SETS)
73+ ctx->has_residency_sets = getenv (" GGML_METAL_NO_RESIDENCY" ) == NULL ;
74+ #endif
6875 ctx->has_bfloat = [ctx->mtl_device supportsFamily: MTLGPUFamilyMetal3_GGML];
6976 ctx->has_bfloat |= [ctx->mtl_device supportsFamily: MTLGPUFamilyApple6];
7077
@@ -483,6 +490,10 @@ @implementation GGMLMetalClass
483490 GGML_LOG_INFO (" %s : picking default device: %s \n " , __func__, [[device name ] UTF8String ]);
484491
485492 ctx->queue = [device newCommandQueue ];
493+ if (ctx->queue == nil ) {
494+ GGML_LOG_ERROR (" %s : error: failed to create command queue\n " , __func__);
495+ return NULL ;
496+ }
486497 ctx->d_queue = dispatch_queue_create (" ggml-metal" , DISPATCH_QUEUE_CONCURRENT);
487498
488499 id <MTLLibrary > metal_library;
@@ -649,6 +660,7 @@ @implementation GGMLMetalClass
649660
650661 GGML_LOG_INFO (" %s : simdgroup reduction = %s \n " , __func__, ctx_dev->has_simdgroup_reduction ? " true" : " false" );
651662 GGML_LOG_INFO (" %s : simdgroup matrix mul. = %s \n " , __func__, ctx_dev->has_simdgroup_mm ? " true" : " false" );
663+ GGML_LOG_INFO (" %s : has residency sets = %s \n " , __func__, ctx_dev->has_residency_sets ? " true" : " false" );
652664 GGML_LOG_INFO (" %s : has bfloat = %s \n " , __func__, ctx_dev->has_bfloat ? " true" : " false" );
653665 GGML_LOG_INFO (" %s : use bfloat = %s \n " , __func__, ctx_dev->use_bfloat ? " true" : " false" );
654666 GGML_LOG_INFO (" %s : hasUnifiedMemory = %s \n " , __func__, ctx_dev->mtl_device .hasUnifiedMemory ? " true" : " false" );
@@ -1035,8 +1047,70 @@ static void ggml_metal_free(struct ggml_backend_metal_context * ctx) {
10351047 // multiple buffers are used only to avoid the maximum buffer size limitation when using mmap
10361048 int n_buffers;
10371049 struct ggml_backend_metal_buffer buffers[GGML_METAL_MAX_BUFFERS];
1050+
1051+ // optional MTLResidencySet
1052+ id rset;
10381053};
10391054
1055+ // rset init
1056+ static bool ggml_backend_metal_buffer_rset_init (
1057+ struct ggml_backend_metal_buffer_context * ctx,
1058+ struct ggml_backend_metal_device_context * ctx_dev,
1059+ id <MTLDevice > device) {
1060+ ctx->rset = nil ;
1061+
1062+ if (!ctx_dev->has_residency_sets ) {
1063+ return true ;
1064+ }
1065+
1066+ #if defined(GGML_METAL_HAS_RESIDENCY_SETS)
1067+ if (@available (macOS 15.0 , *)) {
1068+ MTLResidencySetDescriptor * desc = [[MTLResidencySetDescriptor alloc ] init ];
1069+ desc.label = @" ggml_backend_metal" ;
1070+ desc.initialCapacity = ctx->n_buffers ;
1071+
1072+ NSError * error;
1073+ ctx->rset = [device newResidencySetWithDescriptor: desc error: &error];
1074+ if (error) {
1075+ GGML_LOG_ERROR (" %s : error: %s \n " , __func__, [[error description ] UTF8String ]);
1076+ [desc release ];
1077+ return false ;
1078+ }
1079+
1080+ [desc release ];
1081+
1082+ for (int i = 0 ; i < ctx->n_buffers ; i++) {
1083+ [ctx->rset addAllocation: ctx->buffers[i].metal];
1084+ }
1085+
1086+ [ctx->rset commit ];
1087+ [ctx->rset requestResidency ];
1088+
1089+ return true ;
1090+ }
1091+ #else
1092+ GGML_UNUSED (ctx_dev);
1093+ GGML_UNUSED (device);
1094+ #endif
1095+
1096+ return true ;
1097+ }
1098+
1099+ // rset free
1100+ static void ggml_backend_metal_buffer_rset_free (struct ggml_backend_metal_buffer_context * ctx) {
1101+ #if defined(GGML_METAL_HAS_RESIDENCY_SETS)
1102+ if (@available (macOS 15.0 , *)) {
1103+ if (ctx->rset ) {
1104+ [ctx->rset endResidency ];
1105+ [ctx->rset removeAllAllocations ];
1106+ [ctx->rset release ];
1107+ }
1108+ }
1109+ #else
1110+ GGML_UNUSED (ctx);
1111+ #endif
1112+ }
1113+
10401114// finds the Metal buffer that contains the tensor data on the GPU device
10411115// the assumption is that there is 1-to-1 mapping between the host and device memory buffers, so we can find the
10421116// Metal buffer based on the host memory pointer
@@ -4164,6 +4238,8 @@ static void ggml_backend_metal_buffer_free_buffer(ggml_backend_buffer_t buffer)
41644238 for (int i = 0 ; i < ctx->n_buffers ; i++) {
41654239 [ctx->buffers[i].metal release ];
41664240 }
4241+
4242+ ggml_backend_metal_buffer_rset_free (ctx);
41674243 ggml_backend_metal_device_rel (buffer->buft ->device ->context );
41684244
41694245 if (ctx->owned ) {
@@ -4186,19 +4262,19 @@ static void ggml_backend_metal_buffer_free_buffer(ggml_backend_buffer_t buffer)
41864262static void ggml_backend_metal_buffer_memset_tensor (ggml_backend_buffer_t buffer, struct ggml_tensor * tensor, uint8_t value, size_t offset, size_t size) {
41874263 memset ((char *)tensor->data + offset, value, size);
41884264
4189- UNUSED (buffer);
4265+ GGML_UNUSED (buffer);
41904266}
41914267
41924268static void ggml_backend_metal_buffer_set_tensor (ggml_backend_buffer_t buffer, struct ggml_tensor * tensor, const void * data, size_t offset, size_t size) {
41934269 memcpy ((char *)tensor->data + offset, data, size);
41944270
4195- UNUSED (buffer);
4271+ GGML_UNUSED (buffer);
41964272}
41974273
41984274static void ggml_backend_metal_buffer_get_tensor (ggml_backend_buffer_t buffer, const struct ggml_tensor * tensor, void * data, size_t offset, size_t size) {
41994275 memcpy (data, (const char *)tensor->data + offset, size);
42004276
4201- UNUSED (buffer);
4277+ GGML_UNUSED (buffer);
42024278}
42034279
42044280static bool ggml_backend_metal_buffer_cpy_tensor (ggml_backend_buffer_t buffer, const struct ggml_tensor * src, struct ggml_tensor * dst) {
@@ -4208,7 +4284,7 @@ static bool ggml_backend_metal_buffer_cpy_tensor(ggml_backend_buffer_t buffer, c
42084284 }
42094285 return false ;
42104286
4211- UNUSED (buffer);
4287+ GGML_UNUSED (buffer);
42124288}
42134289
42144290static void ggml_backend_metal_buffer_clear (ggml_backend_buffer_t buffer, uint8_t value) {
@@ -4234,7 +4310,7 @@ static void ggml_backend_metal_buffer_clear(ggml_backend_buffer_t buffer, uint8_
42344310static const char * ggml_backend_metal_buffer_type_get_name (ggml_backend_buffer_type_t buft) {
42354311 return " Metal" ;
42364312
4237- UNUSED (buft);
4313+ GGML_UNUSED (buft);
42384314}
42394315
42404316static void ggml_backend_metal_log_allocated_size (id <MTLDevice > device, size_t size_aligned) {
@@ -4258,8 +4334,8 @@ static void ggml_backend_metal_log_allocated_size(id<MTLDevice> device, size_t s
42584334 }
42594335#endif
42604336#endif
4261- UNUSED (device);
4262- UNUSED (size_aligned);
4337+ GGML_UNUSED (device);
4338+ GGML_UNUSED (size_aligned);
42634339}
42644340
42654341static ggml_backend_buffer_t ggml_backend_metal_buffer_type_alloc_buffer (ggml_backend_buffer_type_t buft, size_t size) {
@@ -4272,7 +4348,8 @@ static ggml_backend_buffer_t ggml_backend_metal_buffer_type_alloc_buffer(ggml_ba
42724348 size_aligned += (size_page - (size_aligned % size_page));
42734349 }
42744350
4275- id <MTLDevice > device = ggml_backend_metal_device_acq (buft->device ->context );
4351+ struct ggml_backend_metal_device_context * ctx_dev = (struct ggml_backend_metal_device_context *)buft->device ->context ;
4352+ id <MTLDevice > device = ggml_backend_metal_device_acq (ctx_dev);
42764353
42774354 ctx->all_data = ggml_metal_host_malloc (size_aligned);
42784355 ctx->all_size = size_aligned;
@@ -4295,7 +4372,14 @@ static ggml_backend_buffer_t ggml_backend_metal_buffer_type_alloc_buffer(ggml_ba
42954372 if (size_aligned > 0 && (ctx->all_data == NULL || ctx->buffers [0 ].metal == nil )) {
42964373 GGML_LOG_ERROR (" %s : error: failed to allocate buffer, size = %8.2f MiB\n " , __func__, size_aligned / 1024.0 / 1024.0 );
42974374 free (ctx);
4298- ggml_backend_metal_device_rel (buft->device ->context );
4375+ ggml_backend_metal_device_rel (ctx_dev);
4376+ return NULL ;
4377+ }
4378+
4379+ if (!ggml_backend_metal_buffer_rset_init (ctx, ctx_dev, device)) {
4380+ GGML_LOG_ERROR (" %s : error: failed to initialize residency set\n " , __func__);
4381+ free (ctx);
4382+ ggml_backend_metal_device_rel (ctx_dev);
42994383 return NULL ;
43004384 }
43014385
@@ -4306,7 +4390,7 @@ static ggml_backend_buffer_t ggml_backend_metal_buffer_type_alloc_buffer(ggml_ba
43064390
43074391static size_t ggml_backend_metal_buffer_type_get_alignment (ggml_backend_buffer_type_t buft) {
43084392 return 32 ;
4309- UNUSED (buft);
4393+ GGML_UNUSED (buft);
43104394}
43114395
43124396static size_t ggml_backend_metal_buffer_type_get_max_size (ggml_backend_buffer_type_t buft) {
@@ -4316,13 +4400,13 @@ static size_t ggml_backend_metal_buffer_type_get_max_size(ggml_backend_buffer_ty
43164400
43174401 return max_size;
43184402
4319- UNUSED (buft);
4403+ GGML_UNUSED (buft);
43204404}
43214405
43224406static bool ggml_backend_metal_buffer_type_is_host (ggml_backend_buffer_type_t buft) {
43234407 return true ;
43244408
4325- UNUSED (buft);
4409+ GGML_UNUSED (buft);
43264410}
43274411
43284412ggml_backend_buffer_type_t ggml_backend_metal_buffer_type (void ) {
@@ -4345,7 +4429,7 @@ ggml_backend_buffer_type_t ggml_backend_metal_buffer_type(void) {
43454429static const char * ggml_backend_metal_buffer_from_ptr_type_get_name (ggml_backend_buffer_type_t buft) {
43464430 return " Metal_Mapped" ;
43474431
4348- UNUSED (buft);
4432+ GGML_UNUSED (buft);
43494433}
43504434
43514435static ggml_backend_buffer_type_t ggml_backend_metal_buffer_from_ptr_type (void ) {
@@ -4388,7 +4472,8 @@ ggml_backend_buffer_t ggml_backend_metal_buffer_from_ptr(void * data, size_t siz
43884472 size_aligned += (size_page - (size_aligned % size_page));
43894473 }
43904474
4391- id <MTLDevice > device = ggml_backend_metal_device_acq (&g_ggml_ctx_dev_main);
4475+ struct ggml_backend_metal_device_context * ctx_dev = &g_ggml_ctx_dev_main;
4476+ id <MTLDevice > device = ggml_backend_metal_device_acq (ctx_dev);
43924477
43934478 // the buffer fits into the max buffer size allowed by the device
43944479 if (size_aligned <= device.maxBufferLength ) {
@@ -4441,6 +4526,13 @@ ggml_backend_buffer_t ggml_backend_metal_buffer_from_ptr(void * data, size_t siz
44414526 }
44424527 }
44434528
4529+ if (!ggml_backend_metal_buffer_rset_init (ctx, ctx_dev, device)) {
4530+ GGML_LOG_ERROR (" %s : error: failed to initialize residency set\n " , __func__);
4531+ free (ctx);
4532+ ggml_backend_metal_device_rel (ctx_dev);
4533+ return NULL ;
4534+ }
4535+
44444536 return ggml_backend_buffer_init (ggml_backend_metal_buffer_from_ptr_type (), ggml_backend_metal_buffer_i, ctx, size);
44454537}
44464538
@@ -4449,7 +4541,7 @@ ggml_backend_buffer_t ggml_backend_metal_buffer_from_ptr(void * data, size_t siz
44494541static const char * ggml_backend_metal_name (ggml_backend_t backend) {
44504542 return " Metal" ;
44514543
4452- UNUSED (backend);
4544+ GGML_UNUSED (backend);
44534545}
44544546
44554547static void ggml_backend_metal_free (ggml_backend_t backend) {
@@ -4754,6 +4846,13 @@ static ggml_backend_buffer_t ggml_backend_metal_device_buffer_from_ptr(ggml_back
47544846 }
47554847 }
47564848
4849+ if (!ggml_backend_metal_buffer_rset_init (ctx, ctx_dev, device)) {
4850+ GGML_LOG_ERROR (" %s : error: failed to initialize residency set\n " , __func__);
4851+ free (ctx);
4852+ ggml_backend_metal_device_rel (ctx_dev);
4853+ return NULL ;
4854+ }
4855+
47574856 return ggml_backend_buffer_init (ggml_backend_metal_buffer_from_ptr_type (), ggml_backend_metal_buffer_i, ctx, size);
47584857}
47594858
@@ -4767,7 +4866,7 @@ static bool ggml_backend_metal_device_supports_buft(ggml_backend_dev_t dev, ggml
47674866 return buft->iface .get_name == ggml_backend_metal_buffer_type_get_name ||
47684867 buft->iface .get_name == ggml_backend_metal_buffer_from_ptr_type_get_name;
47694868
4770- UNUSED (dev);
4869+ GGML_UNUSED (dev);
47714870}
47724871
47734872static bool ggml_backend_metal_device_offload_op (ggml_backend_dev_t dev, const struct ggml_tensor * op) {
0 commit comments