Skip to content

Commit 898c6c8

Browse files
committed
metal : simplify
1 parent 4dad9fa commit 898c6c8

File tree

1 file changed

+22
-96
lines changed

1 file changed

+22
-96
lines changed

ggml/src/ggml-metal/ggml-metal.m

Lines changed: 22 additions & 96 deletions
Original file line numberDiff line numberDiff line change
@@ -19,8 +19,6 @@
1919
// max number of MTLCommandBuffer used to submit a graph for processing
2020
#define GGML_METAL_MAX_COMMAND_BUFFERS 8
2121

22-
#define GGML_METAL_MAX_RESIDENCY_SETS 128
23-
2422
#define UNUSED(x) (void)(x)
2523

2624
// globals
@@ -39,9 +37,6 @@
3937
id<MTLDevice> mtl_device;
4038
int mtl_device_ref_count;
4139

42-
id<MTLResidencySet> mtl_residency_set[GGML_METAL_MAX_RESIDENCY_SETS];
43-
int mtl_residency_set_n;
44-
4540
bool has_simdgroup_reduction;
4641
bool has_simdgroup_mm;
4742
bool has_bfloat;
@@ -51,8 +46,6 @@
5146
} g_ggml_ctx_dev_main = {
5247
/*.mtl_device =*/ nil,
5348
/*.mtl_device_ref_count =*/ 0,
54-
/*.mtl_residency_set =*/ { nil },
55-
/*.mtl_residency_set_n =*/ 0,
5649
/*.has_simdgroup_reduction =*/ false,
5750
/*.has_simdgroup_mm =*/ false,
5851
/*.has_bfloat =*/ false,
@@ -102,41 +95,6 @@ static void ggml_backend_metal_device_rel(struct ggml_backend_metal_device_conte
10295
}
10396
}
10497

105-
// add residency set
106-
static bool ggml_backend_metal_device_add_residency_set(struct ggml_backend_metal_device_context * ctx, id<MTLResidencySet> residency_set) {
107-
assert(ctx != NULL);
108-
assert(queue != nil);
109-
110-
if (ctx->mtl_residency_set_n >= GGML_METAL_MAX_RESIDENCY_SETS) {
111-
GGML_LOG_ERROR("%s: warning: maximum number of residency sets reached\n", __func__);
112-
return false;
113-
}
114-
115-
ctx->mtl_residency_set[ctx->mtl_residency_set_n++] = residency_set;
116-
117-
return true;
118-
}
119-
120-
// remove residency set
121-
static bool ggml_backend_metal_device_remove_residency_set(struct ggml_backend_metal_device_context * ctx, id<MTLResidencySet> residency_set) {
122-
assert(ctx != NULL);
123-
assert(residency_set != nil);
124-
125-
for (int i = 0; i < ctx->mtl_residency_set_n; ++i) {
126-
if (ctx->mtl_residency_set[i] == residency_set) {
127-
for (int j = i; j < ctx->mtl_residency_set_n - 1; ++j) {
128-
ctx->mtl_residency_set[j] = ctx->mtl_residency_set[j + 1];
129-
}
130-
131-
ctx->mtl_residency_set_n--;
132-
133-
return true;
134-
}
135-
}
136-
137-
return false;
138-
}
139-
14098
// kernels
14199

142100
struct ggml_metal_kernel {
@@ -1083,7 +1041,7 @@ static void ggml_metal_free(struct ggml_backend_metal_context * ctx) {
10831041
int n_buffers;
10841042
struct ggml_backend_metal_buffer buffers[GGML_METAL_MAX_BUFFERS];
10851043

1086-
id<MTLResidencySet> residency_set;
1044+
id<MTLResidencySet> rset;
10871045
};
10881046

10891047
// finds the Metal buffer that contains the tensor data on the GPU device
@@ -4088,21 +4046,6 @@ static enum ggml_status ggml_metal_graph_compute(
40884046
struct ggml_backend_metal_context * ctx = backend->context;
40894047
struct ggml_backend_metal_device_context * ctx_dev = backend->device->context;
40904048

4091-
// attached residency sets to the queue on the first run
4092-
// also tested to attached them on each run, but it does not make a difference
4093-
static bool is_first = true;
4094-
if (is_first) {
4095-
is_first = false;
4096-
GGML_LOG_INFO("%s: adding %d residency sets\n", __func__, ctx_dev->mtl_residency_set_n);
4097-
[ctx->queue addResidencySets:ctx_dev->mtl_residency_set count:ctx_dev->mtl_residency_set_n];
4098-
}
4099-
4100-
// this does not make a difference
4101-
//for (int i = 0; i < ctx_dev->mtl_residency_set_n; ++i) {
4102-
// GGML_LOG_INFO("%s: residency set %d allocations size = %zu\n", __func__, i, [ctx_dev->mtl_residency_set[i] allocatedSize]);
4103-
// [ctx_dev->mtl_residency_set[i] requestResidency];
4104-
//}
4105-
41064049
int64_t t_start_us = ggml_time_us();
41074050

41084051
// number of nodes encoded by the main thread (empirically determined)
@@ -4155,9 +4098,6 @@ static enum ggml_status ggml_metal_graph_compute(
41554098
id<MTLCommandBuffer> command_buffer = [ctx->queue commandBuffer];
41564099
ctx->command_buffers[n_cb] = command_buffer;
41574100

4158-
// does not make a difference
4159-
[command_buffer useResidencySets:ctx_dev->mtl_residency_set count:ctx_dev->mtl_residency_set_n];
4160-
41614101
[command_buffer enqueue];
41624102
ctx->encode_async(n_cb);
41634103
}
@@ -4168,9 +4108,6 @@ static enum ggml_status ggml_metal_graph_compute(
41684108
id<MTLCommandBuffer> command_buffer = [ctx->queue commandBuffer];
41694109
ctx->command_buffers[cb_idx] = command_buffer;
41704110

4171-
// does not make a difference
4172-
[command_buffer useResidencySets:ctx_dev->mtl_residency_set count:ctx_dev->mtl_residency_set_n];
4173-
41744111
// always enqueue the first two command buffers
41754112
// enqueue all of the command buffers if we don't need to abort
41764113
if (cb_idx < 2 || ctx->abort_callback == NULL) {
@@ -4253,11 +4190,9 @@ static void ggml_backend_metal_buffer_free_buffer(ggml_backend_buffer_t buffer)
42534190
[ctx->buffers[i].metal release];
42544191
}
42554192

4256-
ggml_backend_metal_device_remove_residency_set(buffer->buft->device->context, ctx->residency_set);
4257-
4258-
[ctx->residency_set endResidency];
4259-
[ctx->residency_set removeAllAllocations];
4260-
[ctx->residency_set release];
4193+
[ctx->rset endResidency];
4194+
[ctx->rset removeAllAllocations];
4195+
[ctx->rset release];
42614196

42624197
ggml_backend_metal_device_rel(buffer->buft->device->context);
42634198

@@ -4398,25 +4333,22 @@ static ggml_backend_buffer_t ggml_backend_metal_buffer_type_alloc_buffer(ggml_ba
43984333
{
43994334
MTLResidencySetDescriptor * desc;
44004335
desc = [[MTLResidencySetDescriptor alloc] init];
4401-
desc.label = @"Primary residency set";
4336+
desc.label = @"ggml_backend_metal";
44024337
desc.initialCapacity = ctx->n_buffers;
44034338

4404-
NSError *error;
4405-
ctx->residency_set = [device newResidencySetWithDescriptor:desc error:&error];
4339+
NSError * error;
4340+
ctx->rset = [device newResidencySetWithDescriptor:desc error:&error];
44064341
if (error) {
44074342
GGML_LOG_ERROR("%s: error: %s\n", __func__, [[error description] UTF8String]);
44084343
return NULL;
44094344
}
44104345

44114346
for (int i = 0; i < ctx->n_buffers; i++) {
4412-
[ctx->residency_set addAllocation:ctx->buffers[i].metal];
4347+
[ctx->rset addAllocation:ctx->buffers[i].metal];
44134348
}
44144349

4415-
[ctx->residency_set commit];
4416-
[ctx->residency_set requestResidency];
4417-
4418-
// track the residency set in the device context
4419-
ggml_backend_metal_device_add_residency_set(ctx_dev, ctx->residency_set);
4350+
[ctx->rset commit];
4351+
[ctx->rset requestResidency];
44204352
}
44214353

44224354
//ggml_backend_metal_log_allocated_size(device, size_aligned);
@@ -4565,25 +4497,22 @@ ggml_backend_buffer_t ggml_backend_metal_buffer_from_ptr(void * data, size_t siz
45654497
{
45664498
MTLResidencySetDescriptor * desc;
45674499
desc = [[MTLResidencySetDescriptor alloc] init];
4568-
desc.label = @"Primary residency set";
4500+
desc.label = @"ggml_backend_metal";
45694501
desc.initialCapacity = ctx->n_buffers;
45704502

4571-
NSError *error;
4572-
ctx->residency_set = [device newResidencySetWithDescriptor:desc error:&error];
4503+
NSError * error;
4504+
ctx->rset = [device newResidencySetWithDescriptor:desc error:&error];
45734505
if (error) {
45744506
GGML_LOG_ERROR("%s: error: %s\n", __func__, [[error description] UTF8String]);
45754507
return NULL;
45764508
}
45774509

45784510
for (int i = 0; i < ctx->n_buffers; i++) {
4579-
[ctx->residency_set addAllocation:ctx->buffers[i].metal];
4511+
[ctx->rset addAllocation:ctx->buffers[i].metal];
45804512
}
45814513

4582-
[ctx->residency_set commit];
4583-
[ctx->residency_set requestResidency];
4584-
4585-
// track the residency set in the device context
4586-
ggml_backend_metal_device_add_residency_set(ctx_dev, ctx->residency_set);
4514+
[ctx->rset commit];
4515+
[ctx->rset requestResidency];
45874516
}
45884517

45894518
return ggml_backend_buffer_init(ggml_backend_metal_buffer_from_ptr_type(), ggml_backend_metal_buffer_i, ctx, size);
@@ -4902,25 +4831,22 @@ static ggml_backend_buffer_t ggml_backend_metal_device_buffer_from_ptr(ggml_back
49024831
{
49034832
MTLResidencySetDescriptor * desc;
49044833
desc = [[MTLResidencySetDescriptor alloc] init];
4905-
desc.label = @"Primary residency set";
4834+
desc.label = @"ggml_backend_metal";
49064835
desc.initialCapacity = ctx->n_buffers;
49074836

4908-
NSError *error;
4909-
ctx->residency_set = [device newResidencySetWithDescriptor:desc error:&error];
4837+
NSError * error;
4838+
ctx->rset = [device newResidencySetWithDescriptor:desc error:&error];
49104839
if (error) {
49114840
GGML_LOG_ERROR("%s: error: %s\n", __func__, [[error description] UTF8String]);
49124841
return NULL;
49134842
}
49144843

49154844
for (int i = 0; i < ctx->n_buffers; i++) {
4916-
[ctx->residency_set addAllocation:ctx->buffers[i].metal];
4845+
[ctx->rset addAllocation:ctx->buffers[i].metal];
49174846
}
49184847

4919-
[ctx->residency_set commit];
4920-
[ctx->residency_set requestResidency];
4921-
4922-
// track the residency set in the device context
4923-
ggml_backend_metal_device_add_residency_set(ctx_dev, ctx->residency_set);
4848+
[ctx->rset commit];
4849+
[ctx->rset requestResidency];
49244850
}
49254851

49264852
return ggml_backend_buffer_init(ggml_backend_metal_buffer_from_ptr_type(), ggml_backend_metal_buffer_i, ctx, size);

0 commit comments

Comments
 (0)