@@ -622,171 +622,9 @@ @interface ggml_metal_heap_ptr : NSObject
622622@implementation ggml_metal_heap_ptr
623623@end
624624
625- //
626- // ggml_metal_mem_pool [TAG_MEM_POOL_REMOVE]
627- //
628-
629- struct ggml_metal_mem_pool {
630- id <MTLDevice > device;
631-
632- int n_heaps; // total number of heaps ever created (including those that were removed)
633-
634- NSMutableArray * heaps;
635- NSMutableArray * heaps_to_remove;
636- };
637-
638- static struct ggml_metal_mem_pool * ggml_metal_mem_pool_init (void ) {
639- struct ggml_metal_mem_pool * mem_pool = calloc (1 , sizeof (struct ggml_metal_mem_pool));
640-
641- mem_pool->n_heaps = 0 ;
642-
643- mem_pool->heaps = [[NSMutableArray alloc ] init ];
644- mem_pool->heaps_to_remove = [[NSMutableArray alloc ] init ];
645-
646- return mem_pool;
647- }
648-
649- static void ggml_metal_mem_pool_free (struct ggml_metal_mem_pool * mem_pool) {
650- GGML_LOG_DEBUG (" %s : freeing memory pool, num heaps = %zu (total = %d )\n " , __func__, [mem_pool->heaps count ], mem_pool->n_heaps );
651-
652- size_t size_all = 0 ;
653- size_t size_cur = 0 ;
654-
655- for (ggml_metal_heap_ptr * ptr in mem_pool->heaps ) {
656- GGML_LOG_DEBUG (" %s : heap: %p \n " , __func__, (void *) ptr.data );
657- GGML_LOG_DEBUG (" %s : n_alloc: %" PRId64 " \n " , __func__, ptr.data ->n_alloc );
658- GGML_LOG_DEBUG (" %s : n_unused: %d \n " , __func__, ptr.data ->n_unused );
659- GGML_LOG_DEBUG (" %s : size: %.2f MiB\n " , __func__, [ptr.data->obj size ] / 1024.0 / 1024.0 );
660- GGML_LOG_DEBUG (" %s : bufs: %zu \n " , __func__, [ptr.data->bufs count ]);
661-
662- if ([ptr.data->bufs count ] > 0 ) {
663- size_cur += [ptr.data->obj size ];
664- }
665- size_all += [ptr.data->obj size ];
666-
667- ggml_metal_heap_free (ptr.data );
668- [ptr release ];
669- }
670- [mem_pool->heaps release ];
671- [mem_pool->heaps_to_remove release ];
672-
673- if (size_all > 0 ) {
674- GGML_LOG_DEBUG (" %s : size_all: %.2f MiB\n " , __func__, size_all / 1024.0 / 1024.0 );
675- GGML_LOG_DEBUG (" %s : size_cur: %.2f MiB\n " , __func__, size_cur / 1024.0 / 1024.0 );
676- }
677-
678- free (mem_pool);
679- }
680-
681- static void ggml_metal_mem_pool_reset (struct ggml_metal_mem_pool * mem_pool) {
682- for (NSUInteger i = 0 ; i < [mem_pool->heaps count ]; i++) {
683- ggml_metal_heap_ptr * ptr = [mem_pool->heaps objectAtIndex: i];
684-
685- struct ggml_metal_heap * heap = ptr.data ;
686- ggml_metal_heap_reset (heap);
687-
688- // if the heap hasn't been used for a while, remove it
689- if (heap->n_unused >= 128 ) {
690- [mem_pool->heaps_to_remove addObject: @(i)];
691- }
692- }
693-
694- if (mem_pool->heaps_to_remove .count > 0 ) {
695- // remove in reverse order
696- for (NSUInteger i = [mem_pool->heaps_to_remove count ] - 1 ; ; --i) {
697- NSUInteger index = [[mem_pool->heaps_to_remove objectAtIndex: i] intValue ];
698- ggml_metal_heap_ptr * ptr = [mem_pool->heaps objectAtIndex: index];
699-
700- struct ggml_metal_heap * heap = ptr.data ;
701- ggml_metal_heap_free (heap);
702-
703- [mem_pool->heaps removeObjectAtIndex: index];
704- [ptr release ];
705-
706- if (i == 0 ) {
707- break ;
708- }
709- }
710-
711- [mem_pool->heaps_to_remove removeAllObjects ];
712- }
713- }
714-
715- static void ggml_metal_mem_pool_clear (struct ggml_metal_mem_pool * mem_pool) {
716- for (ggml_metal_heap_ptr * ptr in mem_pool->heaps ) {
717- ptr.data ->offs = 0 ;
718- }
719- }
720-
721- static id <MTLBuffer > ggml_metal_mem_pool_alloc (struct ggml_metal_mem_pool * mem_pool, size_t size) {
722- const size_t alignment = 256 ;
723-
724- const size_t size_aligned = GGML_PAD (size, alignment);
725-
726- // try one of the existing heaps
727- for (ggml_metal_heap_ptr * ptr in mem_pool->heaps ) {
728- struct ggml_metal_heap * heap = ptr.data ;
729- if (heap->offs + size_aligned <= [heap->obj size ]) {
730- // if this is the first buffer in the heap for the current command buffer, tell the OS that
731- // it cannot free the memory used by the heap
732- // ref: https://developer.apple.com/documentation/metal/mtlpurgeablestate?language=objc
733- if ([heap->bufs count ] == 0 ) {
734- [heap->obj setPurgeableState: MTLPurgeableStateNonVolatile ];
735- }
736-
737- id <MTLBuffer > buf = [heap->obj newBufferWithLength: size_aligned options: MTLResourceStorageModePrivate offset: heap->offs];
738- if (buf == nil ) {
739- GGML_LOG_ERROR (" %s : error: failed to create MTLBuffer with size %zu \n " , __func__, size_aligned);
740- return nil ;
741- }
742-
743- heap->n_alloc ++;
744- heap->offs += size_aligned;
745-
746- [heap->bufs addObject: buf];
747-
748- return buf;
749- }
750- }
751-
752- // create a new heap that can fit this buffer
753- ggml_metal_heap_ptr * heap_ptr = [ggml_metal_heap_ptr new ];
754-
755- struct ggml_metal_heap * heap = ggml_metal_heap_init (mem_pool->device , size_aligned);
756- if (heap == NULL ) {
757- GGML_LOG_ERROR (" %s : error: failed to create heap of size %zu \n " , __func__, size_aligned);
758- return NULL ;
759- }
760-
761- // GGML_LOG_DEBUG("%s: creating new heap of size %zu, got %zu\n", __func__, size_aligned, [heap->obj size]);
762-
763- heap_ptr.data = heap;
764- ggml_metal_heap_reset (heap);
765-
766- [heap->obj setPurgeableState: MTLPurgeableStateNonVolatile ];
767- id <MTLBuffer > buf = [heap->obj newBufferWithLength: size_aligned options: MTLResourceStorageModePrivate offset: heap->offs];
768- if (buf == nil ) {
769- GGML_LOG_ERROR (" %s : error: failed to create MTLBuffer with size %zu \n " , __func__, size_aligned);
770- return NULL ;
771- }
772-
773- heap->n_alloc ++;
774- heap->offs += size_aligned;
775-
776- [heap->bufs addObject: buf];
777-
778- [mem_pool->heaps addObject: heap_ptr];
779- mem_pool->n_heaps ++;
780-
781- return buf;
782- }
783-
784625struct ggml_metal_command_buffer {
785626 id <MTLCommandBuffer > obj;
786627
787- // each command buffer has a memory pool from which it can allocate temporary buffers during the compute
788- struct ggml_metal_mem_pool * mem_pool;
789-
790628 // used to enable concurrent execution of ops in the command buffers
791629 struct ggml_mem_ranges * mem_ranges;
792630};
@@ -1103,9 +941,6 @@ @implementation GGMLMetalClass
1103941 for (int i = 0 ; i < GGML_METAL_MAX_COMMAND_BUFFERS; ++i) {
1104942 ctx->cmd_bufs [i].obj = nil ;
1105943
1106- ctx->cmd_bufs [i].mem_pool = ggml_metal_mem_pool_init ();
1107- ctx->cmd_bufs [i].mem_pool ->device = device;
1108-
1109944 if (ctx_dev->use_concurrency ) {
1110945 ctx->cmd_bufs [i].mem_ranges = ggml_mem_ranges_init (ctx_dev->debug_graph );
1111946 }
@@ -1760,8 +1595,6 @@ static void ggml_metal_free(struct ggml_backend_metal_context * ctx) {
17601595 [ctx->cmd_bufs[i].obj release ];
17611596 }
17621597
1763- ggml_metal_mem_pool_free (ctx->cmd_bufs [i].mem_pool );
1764-
17651598 if (ctx->cmd_bufs [i].mem_ranges ) {
17661599 ggml_mem_ranges_free (ctx->cmd_bufs [i].mem_ranges );
17671600 }
@@ -2126,8 +1959,6 @@ static bool ggml_metal_supports_op(const struct ggml_backend_metal_device_contex
21261959
21271960 id <MTLComputeCommandEncoder > encoder;
21281961
2129- struct ggml_metal_mem_pool * mem_pool;
2130-
21311962 struct ggml_mem_ranges * mem_ranges;
21321963};
21331964
@@ -2164,8 +1995,6 @@ static int ggml_metal_encode_node(struct ggml_metal_encode_context * ctx_enc, in
21641995
21651996 id <MTLComputeCommandEncoder > encoder = ctx_enc->encoder ;
21661997
2167- struct ggml_metal_mem_pool * mem_pool = ctx_enc->mem_pool ;
2168-
21691998 struct ggml_backend_metal_context * ctx = backend->context ;
21701999 struct ggml_backend_metal_device_context * ctx_dev = backend->device ->context ;
21712000
@@ -2206,8 +2035,6 @@ static int ggml_metal_encode_node(struct ggml_metal_encode_context * ctx_enc, in
22062035 GGML_ABORT (" unsupported op" );
22072036 }
22082037
2209- ggml_metal_mem_pool_clear (mem_pool);
2210-
22112038 const int64_t ne00 = src0 ? src0->ne [0 ] : 0 ;
22122039 const int64_t ne01 = src0 ? src0->ne [1 ] : 0 ;
22132040 const int64_t ne02 = src0 ? src0->ne [2 ] : 0 ;
@@ -5819,12 +5646,7 @@ static enum ggml_status ggml_metal_graph_compute(
58195646 // the main thread commits the first few commands immediately
58205647 // cmd_buf[n_cb]
58215648 {
5822- // cannot use commandBufferWithUnretainedReferences because the buffers from the memory pool can get destroyed
5823- // TODO: when the memory pools are removed, we can again use commandBufferWithUnretainedReferences
5824- // https://github.com/ggml-org/llama.cpp/pull/15832#discussion_r2334215009
5825- // [TAG_MEM_POOL_REMOVE]
5826- // id<MTLCommandBuffer> cmd_buf = [ctx->queue commandBufferWithUnretainedReferences];
5827- id <MTLCommandBuffer > cmd_buf = [ctx->queue commandBuffer ];
5649+ id <MTLCommandBuffer > cmd_buf = [ctx->queue commandBufferWithUnretainedReferences ];
58285650 [cmd_buf retain ];
58295651
58305652 if (ctx->cmd_bufs [n_cb].obj ) {
@@ -5843,8 +5665,7 @@ static enum ggml_status ggml_metal_graph_compute(
58435665 // prepare the rest of the command buffers asynchronously (optional)
58445666 // cmd_buf[0.. n_cb)
58455667 for (int cb_idx = 0 ; cb_idx < n_cb; ++cb_idx) {
5846- // id<MTLCommandBuffer> cmd_buf = [ctx->queue commandBufferWithUnretainedReferences];
5847- id <MTLCommandBuffer > cmd_buf = [ctx->queue commandBuffer ];
5668+ id <MTLCommandBuffer > cmd_buf = [ctx->queue commandBufferWithUnretainedReferences ];
58485669 [cmd_buf retain ];
58495670
58505671 if (ctx->cmd_bufs [cb_idx].obj ) {
@@ -6674,11 +6495,8 @@ static void ggml_backend_metal_set_n_cb(ggml_backend_t backend, int n_cb) {
66746495 const int n_nodes_per_cb = ctx->n_nodes_per_cb ;
66756496
66766497 id <MTLCommandBuffer > cmd_buf = ctx->cmd_bufs [cb_idx].obj ;
6677- struct ggml_metal_mem_pool * mem_pool = ctx->cmd_bufs [cb_idx].mem_pool ;
66786498 struct ggml_mem_ranges * mem_ranges = ctx->cmd_bufs [cb_idx].mem_ranges ;
66796499
6680- ggml_metal_mem_pool_reset (mem_pool);
6681-
66826500 if (mem_ranges) {
66836501 ggml_mem_ranges_reset (mem_ranges);
66846502 }
@@ -6706,7 +6524,6 @@ static void ggml_backend_metal_set_n_cb(ggml_backend_t backend, int n_cb) {
67066524 struct ggml_metal_encode_context ctx_enc = {
67076525 /* .backend =*/ backend,
67086526 /* .encoder =*/ encoder,
6709- /* .mem_pool =*/ mem_pool,
67106527 /* .mem_ranges =*/ mem_ranges,
67116528 };
67126529
0 commit comments