@@ -883,7 +883,7 @@ @implementation GGMLMetalClass
883883
884884 // create 1MB heaps per command buffer
885885 // these can be resized during compute when necessary
886- ctx->cmd_bufs [i].heap = ggml_metal_heap_init (device, 1024 * 1024 );
886+ ctx->cmd_bufs [i].heap = ggml_metal_heap_init (device, 32 );
887887 }
888888
889889#if TARGET_OS_OSX || (TARGET_OS_IOS && __clang_major__ >= 15)
@@ -1274,9 +1274,9 @@ static void ggml_metal_free(struct ggml_backend_metal_context * ctx) {
12741274
12751275 [ctx->queue release ];
12761276
1277- // ggml_metal_heap_free(ctx->heap);
12781277 for (int i = 0 ; i < GGML_METAL_MAX_COMMAND_BUFFERS; ++i) {
1279- [ctx->cmd_bufs[i].obj release ];
1278+ // ctx->cmd_bufs[i].obj is auto released
1279+
12801280 ggml_metal_heap_free (ctx->cmd_bufs [i].heap );
12811281 }
12821282
@@ -5167,7 +5167,7 @@ static void ggml_backend_metal_set_n_cb(ggml_backend_t backend, int n_cb) {
51675167 id <MTLCommandBuffer > cmd_buf = ctx->cmd_bufs [cb_idx].obj ;
51685168 struct ggml_metal_heap * heap = ctx->cmd_bufs [cb_idx].heap ;
51695169
5170- int n_try = 3 ;
5170+ int n_try = 2 ;
51715171
51725172 while (n_try-- > 0 ) {
51735173 id <MTLComputeCommandEncoder > encoder = [cmd_buf computeCommandEncoder ];
@@ -5200,6 +5200,21 @@ static void ggml_backend_metal_set_n_cb(ggml_backend_t backend, int n_cb) {
52005200 break ;
52015201 }
52025202
5203+ if (heap->fail == 2 ) {
5204+ GGML_LOG_ERROR (" %s : MTLHeap ran out of buffers, max = %d \n " , __func__, heap->n );
5205+ break ;
5206+ }
5207+
5208+ if (heap->fail == 3 ) {
5209+ GGML_LOG_ERROR (" %s : MTLHeap failed to allocate buffer\n " , __func__);
5210+ break ;
5211+ }
5212+
5213+ if (n_try == 0 ) {
5214+ GGML_LOG_ERROR (" %s : failed to allocate heap memory\n " , __func__);
5215+ break ;
5216+ }
5217+
52035218 const size_t need = heap->need ;
52045219
52055220 GGML_LOG_INFO (" %s : increasing heap size to %zu \n " , __func__, need);
0 commit comments