Skip to content

Commit 69f7b09

Browse files
committed
metal : add comments
1 parent 6f41327 commit 69f7b09

File tree

1 file changed

+70
-46
lines changed

1 file changed

+70
-46
lines changed

ggml/src/ggml-metal/ggml-metal.m

Lines changed: 70 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -468,16 +468,21 @@ static void ggml_backend_metal_device_rel(struct ggml_backend_metal_device_conte
468468
GGML_METAL_KERNEL_TYPE_COUNT
469469
};
470470

471+
//
472+
// ggml_metal_heap
473+
//
474+
471475
struct ggml_metal_heap {
472476
int n_unused; // number of times the heap was unused
473477

474-
int64_t n_alloc;
478+
int64_t n_alloc; // total number of buffer allocations in this heap across all computes
475479

480+
// current offset in the heap - we reset this after each node in order to reuse the memory
476481
size_t offs;
477482

478483
id<MTLHeap> obj;
479484

480-
NSMutableArray * bufs;
485+
NSMutableArray * bufs; // the currently allocated MTLBuffer objects in this heap
481486
};
482487

483488
static struct ggml_metal_heap * ggml_metal_heap_init(id<MTLDevice> device, size_t size) {
@@ -511,6 +516,7 @@ static void ggml_backend_metal_device_rel(struct ggml_backend_metal_device_conte
511516
static void ggml_metal_heap_reset(struct ggml_metal_heap * heap) {
512517
heap->offs = 0;
513518

519+
// count how many graph computes the heap ended up being unused
514520
if ([heap->bufs count] > 0) {
515521
heap->n_unused = 0;
516522
} else {
@@ -522,6 +528,8 @@ static void ggml_metal_heap_reset(struct ggml_metal_heap * heap) {
522528
}
523529
[heap->bufs removeAllObjects];
524530

531+
// tell the OS that it can reuse this memory if needed
532+
// ref: https://developer.apple.com/documentation/metal/mtlpurgeablestate?language=objc
525533
[heap->obj setPurgeableState:MTLPurgeableStateVolatile];
526534
}
527535

@@ -538,73 +546,41 @@ static void ggml_metal_heap_free(struct ggml_metal_heap * heap) {
538546
free(heap);
539547
}
540548

541-
struct ggml_metal_command_buffer {
542-
id<MTLCommandBuffer> obj;
543-
544-
struct ggml_metal_mem_pool * mem_pool;
545-
};
546-
547-
struct ggml_backend_metal_context {
548-
id<MTLDevice> device;
549-
id<MTLCommandQueue> queue;
550-
551-
dispatch_queue_t d_queue;
552-
553-
struct ggml_metal_kernel kernels[GGML_METAL_KERNEL_TYPE_COUNT];
554-
555-
// capture state
556-
bool capture_next_compute;
557-
bool capture_started;
558-
559-
id<MTLCaptureScope> capture_scope;
560-
561-
// command buffer state
562-
int n_cb; // number of extra threads used to submit the command buffers
563-
int n_nodes_0; // number of nodes submitted by the main thread
564-
int n_nodes_1; // remaining number of nodes submitted by the n_cb threads
565-
int n_nodes_per_cb;
566-
567-
struct ggml_cgraph * gf;
568-
569-
// the callback given to the thread pool
570-
void (^encode_async)(size_t ith);
571-
572-
// n_cb command buffers + 1 used by the main thread
573-
struct ggml_metal_command_buffer cmd_bufs[GGML_METAL_MAX_COMMAND_BUFFERS + 1];
574-
575-
// abort ggml_metal_graph_compute if callback returns true
576-
ggml_abort_callback abort_callback;
577-
void * abort_callback_data;
578-
};
579-
580549
@interface ggml_metal_heap_ptr : NSObject
581550

582551
@property (nonatomic, assign) struct ggml_metal_heap * data;
583552

584553
@end
585554

586555
@implementation ggml_metal_heap_ptr
587-
588556
@end
589557

558+
//
559+
// ggml_metal_mem_pool
560+
//
561+
590562
struct ggml_metal_mem_pool {
591563
id<MTLDevice> device;
592564

565+
int n_heaps; // total number of heaps ever created (including those that were removed)
566+
593567
NSMutableArray * heaps;
594568
NSMutableArray * heaps_to_remove;
595569
};
596570

597571
static struct ggml_metal_mem_pool * ggml_metal_mem_pool_init(void) {
598572
struct ggml_metal_mem_pool * mem_pool = calloc(1, sizeof(struct ggml_metal_mem_pool));
599573

574+
mem_pool->n_heaps = 0;
575+
600576
mem_pool->heaps = [[NSMutableArray alloc] init];
601577
mem_pool->heaps_to_remove = [[NSMutableArray alloc] init];
602578

603579
return mem_pool;
604580
}
605581

606582
static void ggml_metal_mem_pool_free(struct ggml_metal_mem_pool * mem_pool) {
607-
GGML_LOG_DEBUG("%s: freeing memory pool, num heaps = %zu\n", __func__, [mem_pool->heaps count]);
583+
GGML_LOG_DEBUG("%s: freeing memory pool, num heaps = %zu (total = %d)\n", __func__, [mem_pool->heaps count], mem_pool->n_heaps);
608584

609585
size_t size_all = 0;
610586
size_t size_cur = 0;
@@ -679,6 +655,9 @@ static void ggml_metal_mem_pool_clear(struct ggml_metal_mem_pool * mem_pool) {
679655
for (ggml_metal_heap_ptr * ptr in mem_pool->heaps) {
680656
struct ggml_metal_heap * heap = ptr.data;
681657
if (heap->offs + size_aligned <= [heap->obj size]) {
658+
// if this is the first buffer in the heap for the current command buffer, tell the OS that
659+
// it cannot free the memory used by the heap
660+
// ref: https://developer.apple.com/documentation/metal/mtlpurgeablestate?language=objc
682661
if ([heap->bufs count] == 0) {
683662
[heap->obj setPurgeableState:MTLPurgeableStateNonVolatile];
684663
}
@@ -702,11 +681,15 @@ static void ggml_metal_mem_pool_clear(struct ggml_metal_mem_pool * mem_pool) {
702681
ggml_metal_heap_ptr * heap_ptr = [ggml_metal_heap_ptr new];
703682

704683
struct ggml_metal_heap * heap = ggml_metal_heap_init(mem_pool->device, size_aligned);
705-
heap_ptr.data = heap;
684+
if (heap == NULL) {
685+
GGML_LOG_ERROR("%s: error: failed to create heap of size %zu\n", __func__, size_aligned);
686+
return NULL;
687+
}
706688

707-
GGML_LOG_DEBUG("%s: creating new heap of size %zu, got %zu\n", __func__, size_aligned, [heap->obj size]);
689+
//GGML_LOG_DEBUG("%s: creating new heap of size %zu, got %zu\n", __func__, size_aligned, [heap->obj size]);
708690

709-
ggml_metal_heap_reset(heap_ptr.data);
691+
heap_ptr.data = heap;
692+
ggml_metal_heap_reset(heap);
710693

711694
[heap->obj setPurgeableState:MTLPurgeableStateNonVolatile];
712695
id<MTLBuffer> buf = [heap->obj newBufferWithLength:size_aligned options:MTLResourceStorageModePrivate offset:heap->offs];
@@ -721,10 +704,51 @@ static void ggml_metal_mem_pool_clear(struct ggml_metal_mem_pool * mem_pool) {
721704
[heap->bufs addObject:buf];
722705

723706
[mem_pool->heaps addObject:heap_ptr];
707+
mem_pool->n_heaps++;
724708

725709
return buf;
726710
}
727711

712+
struct ggml_metal_command_buffer {
713+
id<MTLCommandBuffer> obj;
714+
715+
// each command buffer has a memory pool from which it can allocate temporary buffers during the compute
716+
struct ggml_metal_mem_pool * mem_pool;
717+
};
718+
719+
struct ggml_backend_metal_context {
720+
id<MTLDevice> device;
721+
id<MTLCommandQueue> queue;
722+
723+
dispatch_queue_t d_queue;
724+
725+
struct ggml_metal_kernel kernels[GGML_METAL_KERNEL_TYPE_COUNT];
726+
727+
// capture state
728+
bool capture_next_compute;
729+
bool capture_started;
730+
731+
id<MTLCaptureScope> capture_scope;
732+
733+
// command buffer state
734+
int n_cb; // number of extra threads used to submit the command buffers
735+
int n_nodes_0; // number of nodes submitted by the main thread
736+
int n_nodes_1; // remaining number of nodes submitted by the n_cb threads
737+
int n_nodes_per_cb;
738+
739+
struct ggml_cgraph * gf;
740+
741+
// the callback given to the thread pool
742+
void (^encode_async)(size_t ith);
743+
744+
// n_cb command buffers + 1 used by the main thread
745+
struct ggml_metal_command_buffer cmd_bufs[GGML_METAL_MAX_COMMAND_BUFFERS + 1];
746+
747+
// abort ggml_metal_graph_compute if callback returns true
748+
ggml_abort_callback abort_callback;
749+
void * abort_callback_data;
750+
};
751+
728752
// MSL code
729753
// TODO: move the contents here when ready
730754
// for now it is easier to work in a separate file

0 commit comments

Comments
 (0)