1919// max number of MTLCommandBuffer used to submit a graph for processing
2020#define GGML_METAL_MAX_COMMAND_BUFFERS 8
2121
22- #define GGML_METAL_MAX_RESIDENCY_SETS 128
23-
2422#define UNUSED (x ) (void )(x)
2523
2624// globals
3937 id <MTLDevice > mtl_device;
4038 int mtl_device_ref_count;
4139
42- id <MTLResidencySet> mtl_residency_set[GGML_METAL_MAX_RESIDENCY_SETS];
43- int mtl_residency_set_n;
44-
4540 bool has_simdgroup_reduction;
4641 bool has_simdgroup_mm;
4742 bool has_bfloat;
5146} g_ggml_ctx_dev_main = {
5247 /* .mtl_device =*/ nil ,
5348 /* .mtl_device_ref_count =*/ 0 ,
54- /* .mtl_residency_set =*/ { nil },
55- /* .mtl_residency_set_n =*/ 0 ,
5649 /* .has_simdgroup_reduction =*/ false ,
5750 /* .has_simdgroup_mm =*/ false ,
5851 /* .has_bfloat =*/ false ,
@@ -102,41 +95,6 @@ static void ggml_backend_metal_device_rel(struct ggml_backend_metal_device_conte
10295 }
10396}
10497
105- // add residency set
106- static bool ggml_backend_metal_device_add_residency_set (struct ggml_backend_metal_device_context * ctx, id <MTLResidencySet> residency_set) {
107- assert (ctx != NULL );
108- assert (queue != nil );
109-
110- if (ctx->mtl_residency_set_n >= GGML_METAL_MAX_RESIDENCY_SETS) {
111- GGML_LOG_ERROR (" %s : warning: maximum number of residency sets reached\n " , __func__);
112- return false ;
113- }
114-
115- ctx->mtl_residency_set [ctx->mtl_residency_set_n++] = residency_set;
116-
117- return true ;
118- }
119-
120- // remove residency set
121- static bool ggml_backend_metal_device_remove_residency_set (struct ggml_backend_metal_device_context * ctx, id <MTLResidencySet> residency_set) {
122- assert (ctx != NULL );
123- assert (residency_set != nil );
124-
125- for (int i = 0 ; i < ctx->mtl_residency_set_n ; ++i) {
126- if (ctx->mtl_residency_set [i] == residency_set) {
127- for (int j = i; j < ctx->mtl_residency_set_n - 1 ; ++j) {
128- ctx->mtl_residency_set [j] = ctx->mtl_residency_set [j + 1 ];
129- }
130-
131- ctx->mtl_residency_set_n --;
132-
133- return true ;
134- }
135- }
136-
137- return false ;
138- }
139-
14098// kernels
14199
142100struct ggml_metal_kernel {
@@ -1083,7 +1041,7 @@ static void ggml_metal_free(struct ggml_backend_metal_context * ctx) {
10831041 int n_buffers;
10841042 struct ggml_backend_metal_buffer buffers[GGML_METAL_MAX_BUFFERS];
10851043
1086- id <MTLResidencySet> residency_set ;
1044+ id <MTLResidencySet> rset ;
10871045};
10881046
10891047// finds the Metal buffer that contains the tensor data on the GPU device
@@ -4088,21 +4046,6 @@ static enum ggml_status ggml_metal_graph_compute(
40884046 struct ggml_backend_metal_context * ctx = backend->context ;
40894047 struct ggml_backend_metal_device_context * ctx_dev = backend->device ->context ;
40904048
4091- // attached residency sets to the queue on the first run
4092- // also tested to attached them on each run, but it does not make a difference
4093- static bool is_first = true ;
4094- if (is_first) {
4095- is_first = false ;
4096- GGML_LOG_INFO (" %s : adding %d residency sets\n " , __func__, ctx_dev->mtl_residency_set_n );
4097- [ctx->queue addResidencySets: ctx_dev->mtl_residency_set count: ctx_dev->mtl_residency_set_n];
4098- }
4099-
4100- // this does not make a difference
4101- // for (int i = 0; i < ctx_dev->mtl_residency_set_n; ++i) {
4102- // GGML_LOG_INFO("%s: residency set %d allocations size = %zu\n", __func__, i, [ctx_dev->mtl_residency_set[i] allocatedSize]);
4103- // [ctx_dev->mtl_residency_set[i] requestResidency];
4104- // }
4105-
41064049 int64_t t_start_us = ggml_time_us ();
41074050
41084051 // number of nodes encoded by the main thread (empirically determined)
@@ -4155,9 +4098,6 @@ static enum ggml_status ggml_metal_graph_compute(
41554098 id <MTLCommandBuffer > command_buffer = [ctx->queue commandBuffer ];
41564099 ctx->command_buffers [n_cb] = command_buffer;
41574100
4158- // does not make a difference
4159- [command_buffer useResidencySets: ctx_dev->mtl_residency_set count: ctx_dev->mtl_residency_set_n];
4160-
41614101 [command_buffer enqueue ];
41624102 ctx->encode_async (n_cb);
41634103 }
@@ -4168,9 +4108,6 @@ static enum ggml_status ggml_metal_graph_compute(
41684108 id <MTLCommandBuffer > command_buffer = [ctx->queue commandBuffer ];
41694109 ctx->command_buffers [cb_idx] = command_buffer;
41704110
4171- // does not make a difference
4172- [command_buffer useResidencySets: ctx_dev->mtl_residency_set count: ctx_dev->mtl_residency_set_n];
4173-
41744111 // always enqueue the first two command buffers
41754112 // enqueue all of the command buffers if we don't need to abort
41764113 if (cb_idx < 2 || ctx->abort_callback == NULL ) {
@@ -4253,11 +4190,9 @@ static void ggml_backend_metal_buffer_free_buffer(ggml_backend_buffer_t buffer)
42534190 [ctx->buffers[i].metal release ];
42544191 }
42554192
4256- ggml_backend_metal_device_remove_residency_set (buffer->buft ->device ->context , ctx->residency_set );
4257-
4258- [ctx->residency_set endResidency ];
4259- [ctx->residency_set removeAllAllocations ];
4260- [ctx->residency_set release ];
4193+ [ctx->rset endResidency ];
4194+ [ctx->rset removeAllAllocations ];
4195+ [ctx->rset release ];
42614196
42624197 ggml_backend_metal_device_rel (buffer->buft ->device ->context );
42634198
@@ -4398,25 +4333,22 @@ static ggml_backend_buffer_t ggml_backend_metal_buffer_type_alloc_buffer(ggml_ba
43984333 {
43994334 MTLResidencySetDescriptor * desc;
44004335 desc = [[MTLResidencySetDescriptor alloc ] init ];
4401- desc.label = @" Primary residency set " ;
4336+ desc.label = @" ggml_backend_metal " ;
44024337 desc.initialCapacity = ctx->n_buffers ;
44034338
4404- NSError *error;
4405- ctx->residency_set = [device newResidencySetWithDescriptor: desc error: &error];
4339+ NSError * error;
4340+ ctx->rset = [device newResidencySetWithDescriptor: desc error: &error];
44064341 if (error) {
44074342 GGML_LOG_ERROR (" %s : error: %s \n " , __func__, [[error description ] UTF8String ]);
44084343 return NULL ;
44094344 }
44104345
44114346 for (int i = 0 ; i < ctx->n_buffers ; i++) {
4412- [ctx->residency_set addAllocation: ctx->buffers[i].metal];
4347+ [ctx->rset addAllocation: ctx->buffers[i].metal];
44134348 }
44144349
4415- [ctx->residency_set commit ];
4416- [ctx->residency_set requestResidency ];
4417-
4418- // track the residency set in the device context
4419- ggml_backend_metal_device_add_residency_set (ctx_dev, ctx->residency_set );
4350+ [ctx->rset commit ];
4351+ [ctx->rset requestResidency ];
44204352 }
44214353
44224354 // ggml_backend_metal_log_allocated_size(device, size_aligned);
@@ -4565,25 +4497,22 @@ ggml_backend_buffer_t ggml_backend_metal_buffer_from_ptr(void * data, size_t siz
45654497 {
45664498 MTLResidencySetDescriptor * desc;
45674499 desc = [[MTLResidencySetDescriptor alloc ] init ];
4568- desc.label = @" Primary residency set " ;
4500+ desc.label = @" ggml_backend_metal " ;
45694501 desc.initialCapacity = ctx->n_buffers ;
45704502
4571- NSError *error;
4572- ctx->residency_set = [device newResidencySetWithDescriptor: desc error: &error];
4503+ NSError * error;
4504+ ctx->rset = [device newResidencySetWithDescriptor: desc error: &error];
45734505 if (error) {
45744506 GGML_LOG_ERROR (" %s : error: %s \n " , __func__, [[error description ] UTF8String ]);
45754507 return NULL ;
45764508 }
45774509
45784510 for (int i = 0 ; i < ctx->n_buffers ; i++) {
4579- [ctx->residency_set addAllocation: ctx->buffers[i].metal];
4511+ [ctx->rset addAllocation: ctx->buffers[i].metal];
45804512 }
45814513
4582- [ctx->residency_set commit ];
4583- [ctx->residency_set requestResidency ];
4584-
4585- // track the residency set in the device context
4586- ggml_backend_metal_device_add_residency_set (ctx_dev, ctx->residency_set );
4514+ [ctx->rset commit ];
4515+ [ctx->rset requestResidency ];
45874516 }
45884517
45894518 return ggml_backend_buffer_init (ggml_backend_metal_buffer_from_ptr_type (), ggml_backend_metal_buffer_i, ctx, size);
@@ -4902,25 +4831,22 @@ static ggml_backend_buffer_t ggml_backend_metal_device_buffer_from_ptr(ggml_back
49024831 {
49034832 MTLResidencySetDescriptor * desc;
49044833 desc = [[MTLResidencySetDescriptor alloc ] init ];
4905- desc.label = @" Primary residency set " ;
4834+ desc.label = @" ggml_backend_metal " ;
49064835 desc.initialCapacity = ctx->n_buffers ;
49074836
4908- NSError *error;
4909- ctx->residency_set = [device newResidencySetWithDescriptor: desc error: &error];
4837+ NSError * error;
4838+ ctx->rset = [device newResidencySetWithDescriptor: desc error: &error];
49104839 if (error) {
49114840 GGML_LOG_ERROR (" %s : error: %s \n " , __func__, [[error description ] UTF8String ]);
49124841 return NULL ;
49134842 }
49144843
49154844 for (int i = 0 ; i < ctx->n_buffers ; i++) {
4916- [ctx->residency_set addAllocation: ctx->buffers[i].metal];
4845+ [ctx->rset addAllocation: ctx->buffers[i].metal];
49174846 }
49184847
4919- [ctx->residency_set commit ];
4920- [ctx->residency_set requestResidency ];
4921-
4922- // track the residency set in the device context
4923- ggml_backend_metal_device_add_residency_set (ctx_dev, ctx->residency_set );
4848+ [ctx->rset commit ];
4849+ [ctx->rset requestResidency ];
49244850 }
49254851
49264852 return ggml_backend_buffer_init (ggml_backend_metal_buffer_from_ptr_type (), ggml_backend_metal_buffer_i, ctx, size);
0 commit comments