1919//  max number of MTLCommandBuffer used to submit a graph for processing
2020#define  GGML_METAL_MAX_COMMAND_BUFFERS  8 
2121
22- //  max number of buffers that can be allocated on the heap per command buffer
23- #define  GGML_METAL_MAX_HEAP_BUFFERS  64 
24- 
2522#ifndef  TARGET_OS_VISION
2623#define  TARGET_OS_VISION  0 
2724#endif 
@@ -472,14 +469,15 @@ static void ggml_backend_metal_device_rel(struct ggml_backend_metal_device_conte
472469};
473470
474471struct  ggml_metal_heap {
475-     int  n;
476472    int  fail;
477473
474+     size_t  offs;
478475    size_t  need;
479476
480477    id <MTLDevice > device;
481478    id <MTLHeap >   obj;
482-     id <MTLBuffer > bufs[GGML_METAL_MAX_HEAP_BUFFERS];
479+ 
480+     NSMutableArray  * bufs;
483481};
484482
485483static  struct  ggml_metal_heap * ggml_metal_heap_init (id <MTLDevice > device, size_t  size) {
@@ -488,7 +486,7 @@ static void ggml_backend_metal_device_rel(struct ggml_backend_metal_device_conte
488486    MTLHeapDescriptor  * desc = [[MTLHeapDescriptor  alloc ] init ];
489487    desc.storageMode   = MTLStorageModePrivate ;
490488    desc.cpuCacheMode  = MTLCPUCacheModeDefaultCache ;
491-     desc.type          = MTLHeapTypeAutomatic ;  //  TODO: use  MTLHeapTypePlacement
489+     desc.type          = MTLHeapTypePlacement ; 
492490    desc.size          = size;
493491
494492    heap->device  = device;
@@ -501,39 +499,35 @@ static void ggml_backend_metal_device_rel(struct ggml_backend_metal_device_conte
501499        return  false ;
502500    }
503501
504-     for  (int  i = 0 ; i < GGML_METAL_MAX_HEAP_BUFFERS; ++i) {
505-         heap->bufs [i] = nil ;
506-     }
507- 
508502    [desc release ];
509503
504+     heap->bufs  = [[NSMutableArray  alloc ] init ];
505+ 
510506    return  heap;
511507}
512508
509+ static  void  ggml_metal_heap_reset (struct  ggml_metal_heap * heap) {
510+     heap->fail  = 0 ;
511+     heap->offs  = 0 ;
512+     heap->need  = 0 ;
513+ 
514+     for  (id <MTLBuffer > buf in heap->bufs ) {
515+         [buf release ];
516+     }
517+     [heap->bufs removeAllObjects ];
518+ }
519+ 
513520static  void  ggml_metal_heap_free (struct  ggml_metal_heap * heap) {
514521    if  (heap == nil ) {
515522        return ;
516523    }
517524
518-     [heap->obj release ];
519- 
520-     free (heap);
521- }
522- 
523- static  void  ggml_metal_heap_reset (struct  ggml_metal_heap * heap) {
524-     heap->n  = 0 ;
525-     heap->fail  = 0 ;
526-     heap->need  = 0 ;
525+     ggml_metal_heap_reset (heap);
527526
528-     for  (int  i = 0 ; i < GGML_METAL_MAX_HEAP_BUFFERS; i++) {
529-         if  (heap->bufs [i]) {
530-             [heap->bufs[i] release ];
531-             heap->bufs [i] = nil ;
532-             continue ;
533-         }
527+     [heap->obj  release ];
528+     [heap->bufs release ];
534529
535-         break ;
536-     }
530+     free (heap);
537531}
538532
539533static  bool  ggml_metal_heap_resize (struct  ggml_metal_heap * heap, size_t  size) {
@@ -546,7 +540,7 @@ static bool ggml_metal_heap_resize(struct ggml_metal_heap * heap, size_t size) {
546540    MTLHeapDescriptor  * desc = [[MTLHeapDescriptor  alloc ] init ];
547541    desc.storageMode   = MTLStorageModePrivate ;
548542    desc.cpuCacheMode  = MTLCPUCacheModeDefaultCache ;
549-     desc.type          = MTLHeapTypeAutomatic ;  //  TODO: use  MTLHeapTypePlacement
543+     desc.type          = MTLHeapTypePlacement ; 
550544    desc.size          = size;
551545
552546    heap->obj  = [heap->device newHeapWithDescriptor: desc];
@@ -571,33 +565,32 @@ static bool ggml_metal_heap_resize(struct ggml_metal_heap * heap, size_t size) {
571565
572566    const  size_t  size_aligned = GGML_PAD (size, alignment);
573567
574-     // GGML_LOG_INFO("%s: size = %zu, size_aligned = %zu, need = %zu, fail = %d\n", __func__, size, size_aligned, heap->need, heap->fail);
568+     heap->offs  += size_aligned;
569+     heap->need  = MAX (heap->need , heap->offs  + size_aligned);
575570
576-     heap-> need  +=  size_aligned;
571+     // GGML_LOG_INFO("%s: size = %zu, size_aligned = %zu, offs = %zu,  need = %zu\n", __func__, size,  size_aligned, offs, heap->offs, heap->need) ;
577572
578573    if  (no_alloc) {
579574        return  nil ;
580575    }
581576
582-     if  (!heap->fail  && size_aligned > [heap->obj maxAvailableSizeWithAlignment: alignment ]) {
577+     if  (!heap->fail  && heap-> offs  +  size_aligned > [heap->obj size ]) {
583578        heap->fail  = 1 ;
584579    }
585580
586-     if  (!heap->fail  && heap->n  >= GGML_METAL_MAX_HEAP_BUFFERS) {
587-         heap->fail  = 2 ;
588-     }
589- 
590581    if  (heap->fail ) {
591582        return  nil ;
592583    }
593584
594-     id <MTLBuffer > buf = [heap->obj newBufferWithLength: size_aligned options: MTLResourceStorageModePrivate ];
585+     id <MTLBuffer > buf = [heap->obj newBufferWithLength: size_aligned options: MTLResourceStorageModePrivate   offset: heap->offs ];
595586    if  (!buf) {
596587        heap->fail  = 3 ;
597588        return  nil ;
598589    }
599590
600-     heap->bufs [heap->n++] = buf;
591+     [heap->bufs addObject: buf];
592+ 
593+     // GGML_LOG_INFO("%s: allocated buffer, size = %zu, offs = %zu, heap size = %zu, heap used = %zu\n", __func__, size_aligned, offs, [heap->obj size], [heap->obj usedSize]);
601594
602595    return  buf;
603596}
@@ -634,7 +627,6 @@ static bool ggml_metal_heap_resize(struct ggml_metal_heap * heap, size_t size) {
634627    void  (^encode_async)(size_t  ith);
635628
636629    //  n_cb command buffers + 1 used by the main thread
637-     // id<MTLCommandBuffer> command_buffers[GGML_METAL_MAX_COMMAND_BUFFERS + 1];
638630    struct  ggml_metal_command_buffer cmd_bufs[GGML_METAL_MAX_COMMAND_BUFFERS + 1 ];
639631
640632    //  abort ggml_metal_graph_compute if callback returns true
@@ -1638,13 +1630,16 @@ static bool ggml_metal_encode_node(
16381630    //  heap buffers for temporary data
16391631    id <MTLBuffer > h_src0 = nil ;
16401632
1633+     //  always allocate buffers from the start of the heap for the current node
1634+     heap->offs  = 0 ;
1635+ 
16411636    switch  (dst->op ) {
16421637        case  GGML_OP_SOFT_MAX:
16431638            {
16441639                h_src0 = ggml_metal_heap_alloc (heap, ggml_nbytes (src0), no_alloc);
16451640                if  (!no_alloc && !h_src0) {
1646-                     GGML_LOG_ERROR (" %s : failed to allocate buffer, idx = %4d , size = %8zu , need  = %8zu , max available = %9zu , heap size = %9zu , heap used = %zu , fail = %d \n "  ,
1647-                             __func__, idx, ggml_nbytes (src0), heap->need , [heap->obj maxAvailableSizeWithAlignment: 0 ], [heap->obj size ], [heap->obj usedSize ], heap->fail );
1641+                     GGML_LOG_ERROR (" %s : failed to allocate buffer, idx = %4d , size = %8zu , offs  = %8zu , max available = %9zu , heap size = %9zu , heap used = %zu , fail = %d \n "  ,
1642+                             __func__, idx, ggml_nbytes (src0), heap->offs , [heap->obj maxAvailableSizeWithAlignment: 0 ], [heap->obj size ], [heap->obj usedSize ], heap->fail );
16481643                    return  false ;
16491644                }
16501645            } break ;
@@ -2250,8 +2245,6 @@ static bool ggml_metal_encode_node(
22502245            {
22512246                GGML_ASSERT (!src1 || src1->type  == GGML_TYPE_F16 || src1->type  == GGML_TYPE_F32);
22522247
2253-                 GGML_ASSERT (ggml_is_contiguous (src0));
2254- 
22552248                int  nth = 32 ; //  SIMD width
22562249
22572250                id <MTLComputePipelineState > pipeline = nil ;
@@ -4836,6 +4829,12 @@ static enum ggml_status ggml_metal_graph_compute(
48364829            [next_buffer commit ];
48374830        }
48384831
4832+         for  (int  i = 0 ; i <= n_cb; ++i) {
4833+             struct  ggml_metal_heap * heap = ctx->cmd_bufs [i].heap ;
4834+ 
4835+             [heap->obj setPurgeableState: MTLPurgeableStateEmpty ];
4836+         }
4837+ 
48394838        if  (!should_capture && ctx->capture_started ) {
48404839            [ctx->capture_scope endScope ];
48414840            [[MTLCaptureManager  sharedCaptureManager ] stopCapture ];
@@ -5233,6 +5232,8 @@ static void ggml_backend_metal_set_n_cb(ggml_backend_t backend, int n_cb) {
52335232            }
52345233        }
52355234
5235+         // GGML_LOG_INFO("XXXXXXXXXXXXXXXXXXXXXXXXX\n");
5236+ 
52365237        if  (can_compute) {
52375238            for  (int  idx = node_start; idx < node_end; ++idx) {
52385239                if  (should_capture) {
0 commit comments