Skip to content

Commit 5948346

Browse files
committed
metal : remove mem pool implementation
ggml-ci
1 parent 29f5903 commit 5948346

File tree

1 file changed

+2
-185
lines changed

1 file changed

+2
-185
lines changed

ggml/src/ggml-metal/ggml-metal.m

Lines changed: 2 additions & 185 deletions
Original file line numberDiff line numberDiff line change
@@ -622,171 +622,9 @@ @interface ggml_metal_heap_ptr : NSObject
622622
@implementation ggml_metal_heap_ptr
623623
@end
624624

625-
//
626-
// ggml_metal_mem_pool [TAG_MEM_POOL_REMOVE]
627-
//
628-
629-
struct ggml_metal_mem_pool {
630-
id<MTLDevice> device;
631-
632-
int n_heaps; // total number of heaps ever created (including those that were removed)
633-
634-
NSMutableArray * heaps;
635-
NSMutableArray * heaps_to_remove;
636-
};
637-
638-
static struct ggml_metal_mem_pool * ggml_metal_mem_pool_init(void) {
639-
struct ggml_metal_mem_pool * mem_pool = calloc(1, sizeof(struct ggml_metal_mem_pool));
640-
641-
mem_pool->n_heaps = 0;
642-
643-
mem_pool->heaps = [[NSMutableArray alloc] init];
644-
mem_pool->heaps_to_remove = [[NSMutableArray alloc] init];
645-
646-
return mem_pool;
647-
}
648-
649-
static void ggml_metal_mem_pool_free(struct ggml_metal_mem_pool * mem_pool) {
650-
GGML_LOG_DEBUG("%s: freeing memory pool, num heaps = %zu (total = %d)\n", __func__, [mem_pool->heaps count], mem_pool->n_heaps);
651-
652-
size_t size_all = 0;
653-
size_t size_cur = 0;
654-
655-
for (ggml_metal_heap_ptr * ptr in mem_pool->heaps) {
656-
GGML_LOG_DEBUG("%s: heap: %p\n", __func__, (void *) ptr.data);
657-
GGML_LOG_DEBUG("%s: n_alloc: %" PRId64 "\n", __func__, ptr.data->n_alloc);
658-
GGML_LOG_DEBUG("%s: n_unused: %d\n", __func__, ptr.data->n_unused);
659-
GGML_LOG_DEBUG("%s: size: %.2f MiB\n", __func__, [ptr.data->obj size] / 1024.0 / 1024.0);
660-
GGML_LOG_DEBUG("%s: bufs: %zu\n", __func__, [ptr.data->bufs count]);
661-
662-
if ([ptr.data->bufs count] > 0) {
663-
size_cur += [ptr.data->obj size];
664-
}
665-
size_all += [ptr.data->obj size];
666-
667-
ggml_metal_heap_free(ptr.data);
668-
[ptr release];
669-
}
670-
[mem_pool->heaps release];
671-
[mem_pool->heaps_to_remove release];
672-
673-
if (size_all > 0) {
674-
GGML_LOG_DEBUG("%s: size_all: %.2f MiB\n", __func__, size_all / 1024.0 / 1024.0);
675-
GGML_LOG_DEBUG("%s: size_cur: %.2f MiB\n", __func__, size_cur / 1024.0 / 1024.0);
676-
}
677-
678-
free(mem_pool);
679-
}
680-
681-
static void ggml_metal_mem_pool_reset(struct ggml_metal_mem_pool * mem_pool) {
682-
for (NSUInteger i = 0; i < [mem_pool->heaps count]; i++) {
683-
ggml_metal_heap_ptr * ptr = [mem_pool->heaps objectAtIndex:i];
684-
685-
struct ggml_metal_heap * heap = ptr.data;
686-
ggml_metal_heap_reset(heap);
687-
688-
// if the heap hasn't been used for a while, remove it
689-
if (heap->n_unused >= 128) {
690-
[mem_pool->heaps_to_remove addObject:@(i)];
691-
}
692-
}
693-
694-
if (mem_pool->heaps_to_remove.count > 0) {
695-
// remove in reverse order
696-
for (NSUInteger i = [mem_pool->heaps_to_remove count] - 1; ; --i) {
697-
NSUInteger index = [[mem_pool->heaps_to_remove objectAtIndex:i] intValue];
698-
ggml_metal_heap_ptr * ptr = [mem_pool->heaps objectAtIndex:index];
699-
700-
struct ggml_metal_heap * heap = ptr.data;
701-
ggml_metal_heap_free(heap);
702-
703-
[mem_pool->heaps removeObjectAtIndex:index];
704-
[ptr release];
705-
706-
if (i == 0) {
707-
break;
708-
}
709-
}
710-
711-
[mem_pool->heaps_to_remove removeAllObjects];
712-
}
713-
}
714-
715-
static void ggml_metal_mem_pool_clear(struct ggml_metal_mem_pool * mem_pool) {
716-
for (ggml_metal_heap_ptr * ptr in mem_pool->heaps) {
717-
ptr.data->offs = 0;
718-
}
719-
}
720-
721-
static id<MTLBuffer> ggml_metal_mem_pool_alloc(struct ggml_metal_mem_pool * mem_pool, size_t size) {
722-
const size_t alignment = 256;
723-
724-
const size_t size_aligned = GGML_PAD(size, alignment);
725-
726-
// try one of the existing heaps
727-
for (ggml_metal_heap_ptr * ptr in mem_pool->heaps) {
728-
struct ggml_metal_heap * heap = ptr.data;
729-
if (heap->offs + size_aligned <= [heap->obj size]) {
730-
// if this is the first buffer in the heap for the current command buffer, tell the OS that
731-
// it cannot free the memory used by the heap
732-
// ref: https://developer.apple.com/documentation/metal/mtlpurgeablestate?language=objc
733-
if ([heap->bufs count] == 0) {
734-
[heap->obj setPurgeableState:MTLPurgeableStateNonVolatile];
735-
}
736-
737-
id<MTLBuffer> buf = [heap->obj newBufferWithLength:size_aligned options:MTLResourceStorageModePrivate offset:heap->offs];
738-
if (buf == nil) {
739-
GGML_LOG_ERROR("%s: error: failed to create MTLBuffer with size %zu\n", __func__, size_aligned);
740-
return nil;
741-
}
742-
743-
heap->n_alloc++;
744-
heap->offs += size_aligned;
745-
746-
[heap->bufs addObject:buf];
747-
748-
return buf;
749-
}
750-
}
751-
752-
// create a new heap that can fit this buffer
753-
ggml_metal_heap_ptr * heap_ptr = [ggml_metal_heap_ptr new];
754-
755-
struct ggml_metal_heap * heap = ggml_metal_heap_init(mem_pool->device, size_aligned);
756-
if (heap == NULL) {
757-
GGML_LOG_ERROR("%s: error: failed to create heap of size %zu\n", __func__, size_aligned);
758-
return NULL;
759-
}
760-
761-
//GGML_LOG_DEBUG("%s: creating new heap of size %zu, got %zu\n", __func__, size_aligned, [heap->obj size]);
762-
763-
heap_ptr.data = heap;
764-
ggml_metal_heap_reset(heap);
765-
766-
[heap->obj setPurgeableState:MTLPurgeableStateNonVolatile];
767-
id<MTLBuffer> buf = [heap->obj newBufferWithLength:size_aligned options:MTLResourceStorageModePrivate offset:heap->offs];
768-
if (buf == nil) {
769-
GGML_LOG_ERROR("%s: error: failed to create MTLBuffer with size %zu\n", __func__, size_aligned);
770-
return NULL;
771-
}
772-
773-
heap->n_alloc++;
774-
heap->offs += size_aligned;
775-
776-
[heap->bufs addObject:buf];
777-
778-
[mem_pool->heaps addObject:heap_ptr];
779-
mem_pool->n_heaps++;
780-
781-
return buf;
782-
}
783-
784625
struct ggml_metal_command_buffer {
785626
id<MTLCommandBuffer> obj;
786627

787-
// each command buffer has a memory pool from which it can allocate temporary buffers during the compute
788-
struct ggml_metal_mem_pool * mem_pool;
789-
790628
// used to enable concurrent execution of ops in the command buffers
791629
struct ggml_mem_ranges * mem_ranges;
792630
};
@@ -1103,9 +941,6 @@ @implementation GGMLMetalClass
1103941
for (int i = 0; i < GGML_METAL_MAX_COMMAND_BUFFERS; ++i) {
1104942
ctx->cmd_bufs[i].obj = nil;
1105943

1106-
ctx->cmd_bufs[i].mem_pool = ggml_metal_mem_pool_init();
1107-
ctx->cmd_bufs[i].mem_pool->device = device;
1108-
1109944
if (ctx_dev->use_concurrency) {
1110945
ctx->cmd_bufs[i].mem_ranges = ggml_mem_ranges_init(ctx_dev->debug_graph);
1111946
}
@@ -1760,8 +1595,6 @@ static void ggml_metal_free(struct ggml_backend_metal_context * ctx) {
17601595
[ctx->cmd_bufs[i].obj release];
17611596
}
17621597

1763-
ggml_metal_mem_pool_free(ctx->cmd_bufs[i].mem_pool);
1764-
17651598
if (ctx->cmd_bufs[i].mem_ranges) {
17661599
ggml_mem_ranges_free(ctx->cmd_bufs[i].mem_ranges);
17671600
}
@@ -2126,8 +1959,6 @@ static bool ggml_metal_supports_op(const struct ggml_backend_metal_device_contex
21261959

21271960
id<MTLComputeCommandEncoder> encoder;
21281961

2129-
struct ggml_metal_mem_pool * mem_pool;
2130-
21311962
struct ggml_mem_ranges * mem_ranges;
21321963
};
21331964

@@ -2164,8 +1995,6 @@ static int ggml_metal_encode_node(struct ggml_metal_encode_context * ctx_enc, in
21641995

21651996
id<MTLComputeCommandEncoder> encoder = ctx_enc->encoder;
21661997

2167-
struct ggml_metal_mem_pool * mem_pool = ctx_enc->mem_pool;
2168-
21691998
struct ggml_backend_metal_context * ctx = backend->context;
21701999
struct ggml_backend_metal_device_context * ctx_dev = backend->device->context;
21712000

@@ -2206,8 +2035,6 @@ static int ggml_metal_encode_node(struct ggml_metal_encode_context * ctx_enc, in
22062035
GGML_ABORT("unsupported op");
22072036
}
22082037

2209-
ggml_metal_mem_pool_clear(mem_pool);
2210-
22112038
const int64_t ne00 = src0 ? src0->ne[0] : 0;
22122039
const int64_t ne01 = src0 ? src0->ne[1] : 0;
22132040
const int64_t ne02 = src0 ? src0->ne[2] : 0;
@@ -5819,12 +5646,7 @@ static enum ggml_status ggml_metal_graph_compute(
58195646
// the main thread commits the first few commands immediately
58205647
// cmd_buf[n_cb]
58215648
{
5822-
// cannot use commandBufferWithUnretainedReferences because the buffers from the memory pool can get destroyed
5823-
// TODO: when the memory pools are removed, we can again use commandBufferWithUnretainedReferences
5824-
// https://github.com/ggml-org/llama.cpp/pull/15832#discussion_r2334215009
5825-
// [TAG_MEM_POOL_REMOVE]
5826-
//id<MTLCommandBuffer> cmd_buf = [ctx->queue commandBufferWithUnretainedReferences];
5827-
id<MTLCommandBuffer> cmd_buf = [ctx->queue commandBuffer];
5649+
id<MTLCommandBuffer> cmd_buf = [ctx->queue commandBufferWithUnretainedReferences];
58285650
[cmd_buf retain];
58295651

58305652
if (ctx->cmd_bufs[n_cb].obj) {
@@ -5843,8 +5665,7 @@ static enum ggml_status ggml_metal_graph_compute(
58435665
// prepare the rest of the command buffers asynchronously (optional)
58445666
// cmd_buf[0.. n_cb)
58455667
for (int cb_idx = 0; cb_idx < n_cb; ++cb_idx) {
5846-
//id<MTLCommandBuffer> cmd_buf = [ctx->queue commandBufferWithUnretainedReferences];
5847-
id<MTLCommandBuffer> cmd_buf = [ctx->queue commandBuffer];
5668+
id<MTLCommandBuffer> cmd_buf = [ctx->queue commandBufferWithUnretainedReferences];
58485669
[cmd_buf retain];
58495670

58505671
if (ctx->cmd_bufs[cb_idx].obj) {
@@ -6674,11 +6495,8 @@ static void ggml_backend_metal_set_n_cb(ggml_backend_t backend, int n_cb) {
66746495
const int n_nodes_per_cb = ctx->n_nodes_per_cb;
66756496

66766497
id<MTLCommandBuffer> cmd_buf = ctx->cmd_bufs[cb_idx].obj;
6677-
struct ggml_metal_mem_pool * mem_pool = ctx->cmd_bufs[cb_idx].mem_pool;
66786498
struct ggml_mem_ranges * mem_ranges = ctx->cmd_bufs[cb_idx].mem_ranges;
66796499

6680-
ggml_metal_mem_pool_reset(mem_pool);
6681-
66826500
if (mem_ranges) {
66836501
ggml_mem_ranges_reset(mem_ranges);
66846502
}
@@ -6706,7 +6524,6 @@ static void ggml_backend_metal_set_n_cb(ggml_backend_t backend, int n_cb) {
67066524
struct ggml_metal_encode_context ctx_enc = {
67076525
/*.backend =*/ backend,
67086526
/*.encoder =*/ encoder,
6709-
/*.mem_pool =*/ mem_pool,
67106527
/*.mem_ranges =*/ mem_ranges,
67116528
};
67126529

0 commit comments

Comments
 (0)