@@ -468,16 +468,21 @@ static void ggml_backend_metal_device_rel(struct ggml_backend_metal_device_conte
468468 GGML_METAL_KERNEL_TYPE_COUNT
469469};
470470
471+ //
472+ // ggml_metal_heap
473+ //
474+
471475struct ggml_metal_heap {
472476 int n_unused; // number of times the heap was unused
473477
474- int64_t n_alloc;
478+ int64_t n_alloc; // total number of buffer allocations in this heap across all computes
475479
480+ // current offset in the heap - we reset this after each node in order to reuse the memory
476481 size_t offs;
477482
478483 id <MTLHeap > obj;
479484
480- NSMutableArray * bufs;
485+ NSMutableArray * bufs; // the currently allocated MTLBuffer objects in this heap
481486};
482487
483488static struct ggml_metal_heap * ggml_metal_heap_init (id <MTLDevice > device, size_t size) {
@@ -511,6 +516,7 @@ static void ggml_backend_metal_device_rel(struct ggml_backend_metal_device_conte
511516static void ggml_metal_heap_reset (struct ggml_metal_heap * heap) {
512517 heap->offs = 0 ;
513518
519+ // count how many graph computes the heap ended up being unused
514520 if ([heap->bufs count ] > 0 ) {
515521 heap->n_unused = 0 ;
516522 } else {
@@ -522,6 +528,8 @@ static void ggml_metal_heap_reset(struct ggml_metal_heap * heap) {
522528 }
523529 [heap->bufs removeAllObjects ];
524530
531+ // tell the OS that it can reuse this memory if needed
532+ // ref: https://developer.apple.com/documentation/metal/mtlpurgeablestate?language=objc
525533 [heap->obj setPurgeableState: MTLPurgeableStateVolatile ];
526534}
527535
@@ -538,73 +546,41 @@ static void ggml_metal_heap_free(struct ggml_metal_heap * heap) {
538546 free (heap);
539547}
540548
541- struct ggml_metal_command_buffer {
542- id <MTLCommandBuffer > obj;
543-
544- struct ggml_metal_mem_pool * mem_pool;
545- };
546-
547- struct ggml_backend_metal_context {
548- id <MTLDevice > device;
549- id <MTLCommandQueue > queue;
550-
551- dispatch_queue_t d_queue;
552-
553- struct ggml_metal_kernel kernels[GGML_METAL_KERNEL_TYPE_COUNT];
554-
555- // capture state
556- bool capture_next_compute;
557- bool capture_started;
558-
559- id <MTLCaptureScope > capture_scope;
560-
561- // command buffer state
562- int n_cb; // number of extra threads used to submit the command buffers
563- int n_nodes_0; // number of nodes submitted by the main thread
564- int n_nodes_1; // remaining number of nodes submitted by the n_cb threads
565- int n_nodes_per_cb;
566-
567- struct ggml_cgraph * gf;
568-
569- // the callback given to the thread pool
570- void (^encode_async)(size_t ith);
571-
572- // n_cb command buffers + 1 used by the main thread
573- struct ggml_metal_command_buffer cmd_bufs[GGML_METAL_MAX_COMMAND_BUFFERS + 1 ];
574-
575- // abort ggml_metal_graph_compute if callback returns true
576- ggml_abort_callback abort_callback;
577- void * abort_callback_data;
578- };
579-
580549@interface ggml_metal_heap_ptr : NSObject
581550
582551@property (nonatomic , assign ) struct ggml_metal_heap * data;
583552
584553@end
585554
586555@implementation ggml_metal_heap_ptr
587-
588556@end
589557
558+ //
559+ // ggml_metal_mem_pool
560+ //
561+
590562struct ggml_metal_mem_pool {
591563 id <MTLDevice > device;
592564
565+ int n_heaps; // total number of heaps ever created (including those that were removed)
566+
593567 NSMutableArray * heaps;
594568 NSMutableArray * heaps_to_remove;
595569};
596570
597571static struct ggml_metal_mem_pool * ggml_metal_mem_pool_init (void ) {
598572 struct ggml_metal_mem_pool * mem_pool = calloc (1 , sizeof (struct ggml_metal_mem_pool));
599573
574+ mem_pool->n_heaps = 0 ;
575+
600576 mem_pool->heaps = [[NSMutableArray alloc ] init ];
601577 mem_pool->heaps_to_remove = [[NSMutableArray alloc ] init ];
602578
603579 return mem_pool;
604580}
605581
606582static void ggml_metal_mem_pool_free (struct ggml_metal_mem_pool * mem_pool) {
607- GGML_LOG_DEBUG (" %s : freeing memory pool, num heaps = %zu \n " , __func__, [mem_pool->heaps count ]);
583+ GGML_LOG_DEBUG (" %s : freeing memory pool, num heaps = %zu (total = %d ) \n " , __func__, [mem_pool->heaps count ], mem_pool-> n_heaps );
608584
609585 size_t size_all = 0 ;
610586 size_t size_cur = 0 ;
@@ -679,6 +655,9 @@ static void ggml_metal_mem_pool_clear(struct ggml_metal_mem_pool * mem_pool) {
679655 for (ggml_metal_heap_ptr * ptr in mem_pool->heaps ) {
680656 struct ggml_metal_heap * heap = ptr.data ;
681657 if (heap->offs + size_aligned <= [heap->obj size ]) {
658+ // if this is the first buffer in the heap for the current command buffer, tell the OS that
659+ // it cannot free the memory used by the heap
660+ // ref: https://developer.apple.com/documentation/metal/mtlpurgeablestate?language=objc
682661 if ([heap->bufs count ] == 0 ) {
683662 [heap->obj setPurgeableState: MTLPurgeableStateNonVolatile ];
684663 }
@@ -702,11 +681,15 @@ static void ggml_metal_mem_pool_clear(struct ggml_metal_mem_pool * mem_pool) {
702681 ggml_metal_heap_ptr * heap_ptr = [ggml_metal_heap_ptr new ];
703682
704683 struct ggml_metal_heap * heap = ggml_metal_heap_init (mem_pool->device , size_aligned);
705- heap_ptr.data = heap;
684+ if (heap == NULL ) {
685+ GGML_LOG_ERROR (" %s : error: failed to create heap of size %zu \n " , __func__, size_aligned);
686+ return NULL ;
687+ }
706688
707- GGML_LOG_DEBUG (" %s : creating new heap of size %zu , got %zu \n " , __func__, size_aligned, [heap->obj size ]);
689+ // GGML_LOG_DEBUG("%s: creating new heap of size %zu, got %zu\n", __func__, size_aligned, [heap->obj size]);
708690
709- ggml_metal_heap_reset (heap_ptr.data );
691+ heap_ptr.data = heap;
692+ ggml_metal_heap_reset (heap);
710693
711694 [heap->obj setPurgeableState: MTLPurgeableStateNonVolatile ];
712695 id <MTLBuffer > buf = [heap->obj newBufferWithLength: size_aligned options: MTLResourceStorageModePrivate offset: heap->offs];
@@ -721,10 +704,51 @@ static void ggml_metal_mem_pool_clear(struct ggml_metal_mem_pool * mem_pool) {
721704 [heap->bufs addObject: buf];
722705
723706 [mem_pool->heaps addObject: heap_ptr];
707+ mem_pool->n_heaps ++;
724708
725709 return buf;
726710}
727711
712+ struct ggml_metal_command_buffer {
713+ id <MTLCommandBuffer > obj;
714+
715+ // each command buffer has a memory pool from which it can allocate temporary buffers during the compute
716+ struct ggml_metal_mem_pool * mem_pool;
717+ };
718+
719+ struct ggml_backend_metal_context {
720+ id <MTLDevice > device;
721+ id <MTLCommandQueue > queue;
722+
723+ dispatch_queue_t d_queue;
724+
725+ struct ggml_metal_kernel kernels[GGML_METAL_KERNEL_TYPE_COUNT];
726+
727+ // capture state
728+ bool capture_next_compute;
729+ bool capture_started;
730+
731+ id <MTLCaptureScope > capture_scope;
732+
733+ // command buffer state
734+ int n_cb; // number of extra threads used to submit the command buffers
735+ int n_nodes_0; // number of nodes submitted by the main thread
736+ int n_nodes_1; // remaining number of nodes submitted by the n_cb threads
737+ int n_nodes_per_cb;
738+
739+ struct ggml_cgraph * gf;
740+
741+ // the callback given to the thread pool
742+ void (^encode_async)(size_t ith);
743+
744+ // n_cb command buffers + 1 used by the main thread
745+ struct ggml_metal_command_buffer cmd_bufs[GGML_METAL_MAX_COMMAND_BUFFERS + 1 ];
746+
747+ // abort ggml_metal_graph_compute if callback returns true
748+ ggml_abort_callback abort_callback;
749+ void * abort_callback_data;
750+ };
751+
728752// MSL code
729753// TODO: move the contents here when ready
730754// for now it is easier to work in a separate file
0 commit comments