@@ -1044,6 +1044,37 @@ static void ggml_metal_free(struct ggml_backend_metal_context * ctx) {
10441044 id <MTLResidencySet> rset;
10451045};
10461046
1047+ // rset init
1048+ static bool ggml_backend_metal_buffer_rset_init (struct ggml_backend_metal_buffer_context * ctx, id <MTLDevice > device) {
1049+ MTLResidencySetDescriptor * desc;
1050+ desc = [[MTLResidencySetDescriptor alloc ] init ];
1051+ desc.label = @" ggml_backend_metal" ;
1052+ desc.initialCapacity = ctx->n_buffers ;
1053+
1054+ NSError * error;
1055+ ctx->rset = [device newResidencySetWithDescriptor: desc error: &error];
1056+ if (error) {
1057+ GGML_LOG_ERROR (" %s : error: %s \n " , __func__, [[error description ] UTF8String ]);
1058+ return false ;
1059+ }
1060+
1061+ for (int i = 0 ; i < ctx->n_buffers ; i++) {
1062+ [ctx->rset addAllocation: ctx->buffers[i].metal];
1063+ }
1064+
1065+ [ctx->rset commit ];
1066+ [ctx->rset requestResidency ];
1067+
1068+ return true ;
1069+ }
1070+
1071+ // rset free
1072+ static void ggml_backend_metal_buffer_rset_free (struct ggml_backend_metal_buffer_context * ctx) {
1073+ [ctx->rset endResidency ];
1074+ [ctx->rset removeAllAllocations ];
1075+ [ctx->rset release ];
1076+ }
1077+
10471078// finds the Metal buffer that contains the tensor data on the GPU device
10481079// the assumption is that there is 1-to-1 mapping between the host and device memory buffers, so we can find the
10491080// Metal buffer based on the host memory pointer
@@ -4046,8 +4077,6 @@ static enum ggml_status ggml_metal_graph_compute(
40464077 struct ggml_backend_metal_context * ctx = backend->context ;
40474078 struct ggml_backend_metal_device_context * ctx_dev = backend->device ->context ;
40484079
4049- int64_t t_start_us = ggml_time_us ();
4050-
40514080 // number of nodes encoded by the main thread (empirically determined)
40524081 const int n_main = 128 ;
40534082
@@ -4172,10 +4201,6 @@ static enum ggml_status ggml_metal_graph_compute(
41724201 }
41734202 }
41744203
4175- int64_t t_end_us = ggml_time_us ();
4176-
4177- GGML_LOG_DEBUG (" %s : compute graph took %8.2f ms\n " , __func__, (t_end_us - t_start_us) / 1000.0 );
4178-
41794204 return GGML_STATUS_SUCCESS;
41804205}
41814206
@@ -4190,10 +4215,7 @@ static void ggml_backend_metal_buffer_free_buffer(ggml_backend_buffer_t buffer)
41904215 [ctx->buffers[i].metal release ];
41914216 }
41924217
4193- [ctx->rset endResidency ];
4194- [ctx->rset removeAllAllocations ];
4195- [ctx->rset release ];
4196-
4218+ ggml_backend_metal_buffer_rset_free (ctx);
41974219 ggml_backend_metal_device_rel (buffer->buft ->device ->context );
41984220
41994221 if (ctx->owned ) {
@@ -4330,25 +4352,11 @@ static ggml_backend_buffer_t ggml_backend_metal_buffer_type_alloc_buffer(ggml_ba
43304352 return NULL ;
43314353 }
43324354
4333- {
4334- MTLResidencySetDescriptor * desc;
4335- desc = [[MTLResidencySetDescriptor alloc ] init ];
4336- desc.label = @" ggml_backend_metal" ;
4337- desc.initialCapacity = ctx->n_buffers ;
4338-
4339- NSError * error;
4340- ctx->rset = [device newResidencySetWithDescriptor: desc error: &error];
4341- if (error) {
4342- GGML_LOG_ERROR (" %s : error: %s \n " , __func__, [[error description ] UTF8String ]);
4343- return NULL ;
4344- }
4345-
4346- for (int i = 0 ; i < ctx->n_buffers ; i++) {
4347- [ctx->rset addAllocation: ctx->buffers[i].metal];
4348- }
4349-
4350- [ctx->rset commit ];
4351- [ctx->rset requestResidency ];
4355+ if (!ggml_backend_metal_buffer_rset_init (ctx, device)) {
4356+ GGML_LOG_ERROR (" %s : error: failed to initialize residency set\n " , __func__);
4357+ free (ctx);
4358+ ggml_backend_metal_device_rel (ctx_dev);
4359+ return NULL ;
43524360 }
43534361
43544362 // ggml_backend_metal_log_allocated_size(device, size_aligned);
@@ -4494,25 +4502,11 @@ ggml_backend_buffer_t ggml_backend_metal_buffer_from_ptr(void * data, size_t siz
44944502 }
44954503 }
44964504
4497- {
4498- MTLResidencySetDescriptor * desc;
4499- desc = [[MTLResidencySetDescriptor alloc ] init ];
4500- desc.label = @" ggml_backend_metal" ;
4501- desc.initialCapacity = ctx->n_buffers ;
4502-
4503- NSError * error;
4504- ctx->rset = [device newResidencySetWithDescriptor: desc error: &error];
4505- if (error) {
4506- GGML_LOG_ERROR (" %s : error: %s \n " , __func__, [[error description ] UTF8String ]);
4507- return NULL ;
4508- }
4509-
4510- for (int i = 0 ; i < ctx->n_buffers ; i++) {
4511- [ctx->rset addAllocation: ctx->buffers[i].metal];
4512- }
4513-
4514- [ctx->rset commit ];
4515- [ctx->rset requestResidency ];
4505+ if (!ggml_backend_metal_buffer_rset_init (ctx, device)) {
4506+ GGML_LOG_ERROR (" %s : error: failed to initialize residency set\n " , __func__);
4507+ free (ctx);
4508+ ggml_backend_metal_device_rel (ctx_dev);
4509+ return NULL ;
45164510 }
45174511
45184512 return ggml_backend_buffer_init (ggml_backend_metal_buffer_from_ptr_type (), ggml_backend_metal_buffer_i, ctx, size);
@@ -4828,25 +4822,11 @@ static ggml_backend_buffer_t ggml_backend_metal_device_buffer_from_ptr(ggml_back
48284822 }
48294823 }
48304824
4831- {
4832- MTLResidencySetDescriptor * desc;
4833- desc = [[MTLResidencySetDescriptor alloc ] init ];
4834- desc.label = @" ggml_backend_metal" ;
4835- desc.initialCapacity = ctx->n_buffers ;
4836-
4837- NSError * error;
4838- ctx->rset = [device newResidencySetWithDescriptor: desc error: &error];
4839- if (error) {
4840- GGML_LOG_ERROR (" %s : error: %s \n " , __func__, [[error description ] UTF8String ]);
4841- return NULL ;
4842- }
4843-
4844- for (int i = 0 ; i < ctx->n_buffers ; i++) {
4845- [ctx->rset addAllocation: ctx->buffers[i].metal];
4846- }
4847-
4848- [ctx->rset commit ];
4849- [ctx->rset requestResidency ];
4825+ if (!ggml_backend_metal_buffer_rset_init (ctx, device)) {
4826+ GGML_LOG_ERROR (" %s : error: failed to initialize residency set\n " , __func__);
4827+ free (ctx);
4828+ ggml_backend_metal_device_rel (ctx_dev);
4829+ return NULL ;
48504830 }
48514831
48524832 return ggml_backend_buffer_init (ggml_backend_metal_buffer_from_ptr_type (), ggml_backend_metal_buffer_i, ctx, size);
0 commit comments