@@ -568,7 +568,7 @@ static bool ggml_metal_heap_resize(struct ggml_metal_heap * heap, size_t size) {
568568
569569    heap->need  += size_aligned;
570570
571-     if  (!heap->fail  && heap-> need  > [heap->obj maxAvailableSizeWithAlignment: alignment]) {
571+     if  (!heap->fail  && size_aligned  > [heap->obj maxAvailableSizeWithAlignment: alignment]) {
572572        heap->fail  = 1 ;
573573    }
574574
@@ -2278,11 +2278,13 @@ static bool ggml_metal_encode_node(
22782278                    /* .nb3  =*/   nb03,
22792279                };
22802280
2281-                 id <MTLBuffer > id_src0h = ggml_metal_heap_alloc (heap, ggml_nbytes (src0), 32 );
2281+                 id <MTLBuffer > id_src0h = ggml_metal_heap_alloc (heap, ggml_nbytes (src0), 64 * 1024 );
22822282                if  (!id_src0h) {
2283-                     // GGML_LOG_ERROR("%s: failed to allocate buffer for cpy, size = %zu, need = %zu, max available = %zu\n",
2284-                     //         __func__, ggml_nbytes(src0), heap->need, [heap->obj maxAvailableSizeWithAlignment:32]);
2285-                     return  false ;
2283+                     // GGML_LOG_ERROR("%s: failed to allocate buffer, idx = %4d, size = %8zu, need = %8zu, max available = %9zu, heap size = %9zu, heap used = %zu\n",
2284+                     //         __func__, idx, ggml_nbytes(src0), heap->need, [heap->obj maxAvailableSizeWithAlignment:0], [heap->obj size], [heap->obj usedSize]);
2285+                     return  true ;
2286+                 } else  {
2287+                     // GGML_LOG_ERROR("%s: allocated %zu\n", __func__, ggml_nbytes(src0));
22862288                }
22872289
22882290                if  (src0->type  == GGML_TYPE_F16) {
@@ -4689,7 +4691,7 @@ static enum ggml_status ggml_metal_graph_compute(
46894691    //  number of threads in addition to the main thread
46904692    const  int  n_cb = ctx->n_cb ;
46914693
4692-     int  n_try = 64 ;
4694+     int  n_try = 2 ;
46934695
46944696    //  submit the ggml compute graph to the GPU by creating command buffers and encoding the ops in them
46954697    //  the first n_nodes_0 are encoded and submitted for processing directly by the calling thread
@@ -4816,7 +4818,7 @@ static enum ggml_status ggml_metal_graph_compute(
48164818    for  (int  i = 0 ; i <= n_cb; ++i) {
48174819        struct  ggml_metal_heap * heap = ctx->cmd_bufs [i].heap ;
48184820
4819-         const  size_t  need = 4 * heap->need ;
4821+         const  size_t  need = heap->need ;
48204822
48214823        // printf("\nXXXXXXXXXXXXXXXXX cb %d, need = %zu, fail = %d, size = %zu\n", i, need, heap->fail, [heap->obj currentAllocatedSize]);
48224824
0 commit comments