@@ -883,7 +883,7 @@ @implementation GGMLMetalClass
883883
884884 // create 1MB heaps per command buffer
885885 // these can be resized during compute when necessary
886- ctx->cmd_bufs [i].heap = ggml_metal_heap_init (device, 1024 * 1024 );
886+ ctx->cmd_bufs [i].heap = ggml_metal_heap_init (device, 32 );
887887 }
888888
889889#if TARGET_OS_OSX || (TARGET_OS_IOS && __clang_major__ >= 15)
@@ -1274,9 +1274,9 @@ static void ggml_metal_free(struct ggml_backend_metal_context * ctx) {
12741274
12751275 [ctx->queue release ];
12761276
1277- // ggml_metal_heap_free(ctx->heap);
12781277 for (int i = 0 ; i < GGML_METAL_MAX_COMMAND_BUFFERS; ++i) {
1279- [ctx->cmd_bufs[i].obj release ];
1278+ // ctx->cmd_bufs[i].obj is auto released
1279+
12801280 ggml_metal_heap_free (ctx->cmd_bufs [i].heap );
12811281 }
12821282
@@ -5166,7 +5166,7 @@ static void ggml_backend_metal_set_n_cb(ggml_backend_t backend, int n_cb) {
51665166 id <MTLCommandBuffer > cmd_buf = ctx->cmd_bufs [cb_idx].obj ;
51675167 struct ggml_metal_heap * heap = ctx->cmd_bufs [cb_idx].heap ;
51685168
5169- int n_try = 3 ;
5169+ int n_try = 2 ;
51705170
51715171 while (n_try-- > 0 ) {
51725172 id <MTLComputeCommandEncoder > encoder = [cmd_buf computeCommandEncoder ];
@@ -5199,6 +5199,21 @@ static void ggml_backend_metal_set_n_cb(ggml_backend_t backend, int n_cb) {
51995199 break ;
52005200 }
52015201
5202+ if (heap->fail == 2 ) {
5203+ GGML_LOG_ERROR (" %s : MTLHeap ran out of buffers, max = %d \n " , __func__, heap->n );
5204+ break ;
5205+ }
5206+
5207+ if (heap->fail == 3 ) {
5208+ GGML_LOG_ERROR (" %s : MTLHeap failed to allocate buffer\n " , __func__);
5209+ break ;
5210+ }
5211+
5212+ if (n_try == 0 ) {
5213+ GGML_LOG_ERROR (" %s : failed to allocate heap memory\n " , __func__);
5214+ break ;
5215+ }
5216+
52025217 const size_t need = heap->need ;
52035218
52045219 GGML_LOG_INFO (" %s : increasing heap size to %zu \n " , __func__, need);
0 commit comments