@@ -308,6 +308,8 @@ void ggml_abort(const char * file, int line, const char * fmt, ...) {
308308}
309309
310310#define GGML_DEBUG 0
311+ #define GGML_MAX_CONTEXTS 64 // pre-allocated contexts in static memory
312+
311313#define GGML_GELU_FP16
312314#define GGML_GELU_QUICK_FP16
313315
@@ -1985,7 +1987,7 @@ static const size_t GGML_OBJECT_SIZE = sizeof(struct ggml_object);
19851987
19861988struct ggml_context {
19871989 size_t mem_size;
1988- void* mem_buffer;
1990+ void * mem_buffer;
19891991 bool mem_buffer_owned;
19901992 bool no_alloc;
19911993 bool no_alloc_save; // this is used to save the no_alloc state when using scratch buffers
@@ -3839,7 +3841,7 @@ struct ggml_context * ggml_init(struct ggml_init_params params) {
38393841 is_first_call = false;
38403842 }
38413843
3842- // find non-used context in g_state
3844+ // find non-used static context in g_state
38433845 struct ggml_context * ctx = NULL;
38443846
38453847 for (int i = 0; i < GGML_MAX_CONTEXTS; i++) {
@@ -3852,12 +3854,12 @@ struct ggml_context * ggml_init(struct ggml_init_params params) {
38523854 }
38533855 }
38543856
3855- if (ctx == NULL) {
3856- GGML_LOG_ERROR("%s: ran out of contexts (max = %d)\n", __func__, GGML_MAX_CONTEXTS);
3857+ ggml_critical_section_end();
38573858
3858- ggml_critical_section_end();
3859+ if (ctx == NULL) {
3860+ GGML_PRINT_DEBUG("%s: no static contexts available, allocating on the heap\n", __func__);
38593861
3860- return NULL ;
3862+ ctx = GGML_ALIGNED_MALLOC(sizeof(struct ggml_context)) ;
38613863 }
38623864
38633865 // allow to call ggml_init with 0 size
@@ -3886,8 +3888,6 @@ struct ggml_context * ggml_init(struct ggml_init_params params) {
38863888
38873889 GGML_PRINT_DEBUG("%s: context initialized\n", __func__);
38883890
3889- ggml_critical_section_end();
3890-
38913891 return ctx;
38923892}
38933893
@@ -3896,6 +3896,10 @@ void ggml_free(struct ggml_context * ctx) {
38963896 return;
38973897 }
38983898
3899+ if (ctx->mem_buffer_owned) {
3900+ GGML_ALIGNED_FREE(ctx->mem_buffer);
3901+ }
3902+
38993903 // make this function thread safe
39003904 ggml_critical_section_start();
39013905
@@ -3905,23 +3909,19 @@ void ggml_free(struct ggml_context * ctx) {
39053909 if (&g_state.contexts[i].context == ctx) {
39063910 g_state.contexts[i].used = false;
39073911
3908- GGML_PRINT_DEBUG("%s: context %d has been freed. memory used = %zu\n",
3909- __func__, i, ggml_used_mem(ctx));
3910-
3911- if (ctx->mem_buffer_owned) {
3912- GGML_ALIGNED_FREE(ctx->mem_buffer);
3913- }
3912+ GGML_PRINT_DEBUG("%s: context %d has been freed. memory used = %zu\n", __func__, i, ggml_used_mem(ctx));
39143913
39153914 found = true;
39163915 break;
39173916 }
39183917 }
39193918
3919+ ggml_critical_section_end();
3920+
39203921 if (!found) {
3921- GGML_PRINT_DEBUG("%s: context not found\n", __func__);
3922+ // this is a heap-allocated context
3923+ GGML_ALIGNED_FREE(ctx);
39223924 }
3923-
3924- ggml_critical_section_end();
39253925}
39263926
39273927size_t ggml_used_mem(const struct ggml_context * ctx) {
0 commit comments