@@ -306,6 +306,7 @@ void ggml_abort(const char * file, int line, const char * fmt, ...) {
306306}
307307
308308#define GGML_DEBUG 0
309+
309310#define GGML_GELU_FP16
310311#define GGML_GELU_QUICK_FP16
311312
@@ -2014,7 +2015,7 @@ static const size_t GGML_OBJECT_SIZE = sizeof(struct ggml_object);
20142015
20152016struct ggml_context {
20162017 size_t mem_size;
2017- void* mem_buffer;
2018+ void * mem_buffer;
20182019 bool mem_buffer_owned;
20192020 bool no_alloc;
20202021 bool no_alloc_save; // this is used to save the no_alloc state when using scratch buffers
@@ -3263,7 +3264,6 @@ struct ggml_numa_nodes {
32633264//
32643265
32653266struct ggml_state {
3266- struct ggml_context_container contexts[GGML_MAX_CONTEXTS];
32673267 struct ggml_numa_nodes numa;
32683268};
32693269
@@ -3845,7 +3845,6 @@ struct ggml_context * ggml_init(struct ggml_init_params params) {
38453845 const uint64_t t_start = ggml_time_us(); UNUSED(t_start);
38463846
38473847 g_state = (struct ggml_state) {
3848- /*.contexts =*/ { { 0 } },
38493848 /*.numa =*/ {
38503849 .n_nodes = 0,
38513850 .total_cpus = 0,
@@ -3864,26 +3863,9 @@ struct ggml_context * ggml_init(struct ggml_init_params params) {
38643863 is_first_call = false;
38653864 }
38663865
3867- // find non-used context in g_state
3868- struct ggml_context * ctx = NULL;
3869-
3870- for (int i = 0; i < GGML_MAX_CONTEXTS; i++) {
3871- if (!g_state.contexts[i].used) {
3872- g_state.contexts[i].used = true;
3873- ctx = &g_state.contexts[i].context;
3874-
3875- GGML_PRINT_DEBUG("%s: found unused context %d\n", __func__, i);
3876- break;
3877- }
3878- }
3879-
3880- if (ctx == NULL) {
3881- GGML_PRINT_DEBUG("%s: no unused context found\n", __func__);
3882-
3883- ggml_critical_section_end();
3866+ ggml_critical_section_end();
38843867
3885- return NULL;
3886- }
3868+ struct ggml_context * ctx = GGML_MALLOC(sizeof(struct ggml_context));
38873869
38883870 // allow to call ggml_init with 0 size
38893871 if (params.mem_size == 0) {
@@ -3911,42 +3893,31 @@ struct ggml_context * ggml_init(struct ggml_init_params params) {
39113893
39123894 GGML_PRINT_DEBUG("%s: context initialized\n", __func__);
39133895
3914- ggml_critical_section_end();
3915-
39163896 return ctx;
39173897}
39183898
3919- void ggml_free (struct ggml_context * ctx) {
3899+ void ggml_reset (struct ggml_context * ctx) {
39203900 if (ctx == NULL) {
39213901 return;
39223902 }
39233903
3924- // make this function thread safe
3925- ggml_critical_section_start();
3926-
3927- bool found = false;
3928-
3929- for (int i = 0; i < GGML_MAX_CONTEXTS; i++) {
3930- if (&g_state.contexts[i].context == ctx) {
3931- g_state.contexts[i].used = false;
3932-
3933- GGML_PRINT_DEBUG("%s: context %d has been freed. memory used = %zu\n",
3934- __func__, i, ggml_used_mem(ctx));
3935-
3936- if (ctx->mem_buffer_owned) {
3937- ggml_aligned_free(ctx->mem_buffer, ctx->mem_size);
3938- }
3904+ ctx->n_objects = 0;
3905+ ctx->objects_begin = NULL;
3906+ ctx->objects_end = NULL;
3907+ ctx->scratch = (struct ggml_scratch) { 0, 0, NULL, };
3908+ ctx->scratch_save = (struct ggml_scratch) { 0, 0, NULL, };
3909+ }
39393910
3940- found = true;
3941- break;
3942- }
3911+ void ggml_free(struct ggml_context * ctx) {
3912+ if (ctx == NULL) {
3913+ return;
39433914 }
39443915
3945- if (!found ) {
3946- GGML_PRINT_DEBUG("%s: context not found\n", __func__ );
3916+ if (ctx->mem_buffer_owned ) {
3917+ ggml_aligned_free(ctx->mem_buffer, ctx->mem_size );
39473918 }
39483919
3949- ggml_critical_section_end( );
3920+ GGML_FREE(ctx );
39503921}
39513922
39523923size_t ggml_used_mem(const struct ggml_context * ctx) {
@@ -7272,6 +7243,7 @@ struct ggml_tensor * ggml_ssm_conv(
72727243 const int64_t n_s = sx->ne[2];
72737244
72747245 // TODO: maybe support other strides than 1?
7246+ // FIXME: this is always true?
72757247 GGML_ASSERT(sx->ne[0] == d_conv - 1 + n_t);
72767248 GGML_ASSERT(sx->ne[1] == d_inner);
72777249 GGML_ASSERT(n_t >= 0);
@@ -22102,18 +22074,46 @@ static size_t gguf_type_size(enum gguf_type type) {
2210222074 return GGUF_TYPE_SIZE[type];
2210322075}
2210422076
22105- static void gguf_tensor_info_sanitize(struct gguf_tensor_info * info) {
22106- GGML_ASSERT(info->n_dims <= GGML_MAX_DIMS);
22107- GGML_ASSERT(0 <= info->type && info->type < GGML_TYPE_COUNT);
22077+ static bool gguf_tensor_info_sanitize(struct gguf_tensor_info * info) {
22078+ if (info->n_dims > GGML_MAX_DIMS) {
22079+ fprintf(stderr, "%s: invalid number of dimensions (%" PRIu32 ")\n", __func__, info->n_dims);
22080+ return false;
22081+ }
22082+
22083+ if (info->type < 0 || info->type >= GGML_TYPE_COUNT) {
22084+ fprintf(stderr, "%s: invalid type (%d)\n", __func__, info->type);
22085+ return false;
22086+ }
22087+
22088+ if (strlen(info->name.data) >= GGML_MAX_NAME) {
22089+ fprintf(stderr, "%s: tensor '%s' name is too long\n", __func__, info->name.data);
22090+ return false;
22091+ }
2210822092
2210922093 for (uint32_t i = 0; i < info->n_dims; ++i) {
22110- GGML_ASSERT(info->ne[i] > 0);
22094+ if (info->ne[i] <= 0) {
22095+ fprintf(stderr, "%s: invalid number of elements (%" PRIu64 ")\n", __func__, info->ne[i]);
22096+ return false;
22097+ }
2211122098 }
2211222099
2211322100 // prevent overflow for total number of elements
22114- GGML_ASSERT(INT64_MAX/info->ne[1] > info->ne[0]);
22115- GGML_ASSERT(INT64_MAX/info->ne[2] > info->ne[0]*info->ne[1]);
22116- GGML_ASSERT(INT64_MAX/info->ne[3] > info->ne[0]*info->ne[1]*info->ne[2]);
22101+ if (INT64_MAX/info->ne[1] <= info->ne[0]) {
22102+ fprintf(stderr, "%s: invalid number of elements (%" PRIu64 ")\n", __func__, info->ne[1]);
22103+ return false;
22104+ }
22105+
22106+ if (INT64_MAX/info->ne[2] <= info->ne[0]*info->ne[1]) {
22107+ fprintf(stderr, "%s: invalid number of elements (%" PRIu64 ")\n", __func__, info->ne[2]);
22108+ return false;
22109+ }
22110+
22111+ if (INT64_MAX/info->ne[3] <= info->ne[0]*info->ne[1]*info->ne[2]) {
22112+ fprintf(stderr, "%s: invalid number of elements (%" PRIu64 ")\n", __func__, info->ne[3]);
22113+ return false;
22114+ }
22115+
22116+ return true;
2211722117}
2211822118
2211922119static bool gguf_fread_el(FILE * file, void * dst, size_t size, size_t * offset) {
@@ -22414,8 +22414,7 @@ struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_p
2241422414 ok = ok && gguf_fread_el (file, &info->type, sizeof(info->type), &offset);
2241522415 ok = ok && gguf_fread_el (file, &info->offset, sizeof(info->offset), &offset);
2241622416
22417- // TODO: return an error instead of crashing with GGML_ASSERT
22418- gguf_tensor_info_sanitize(info);
22417+ ok = ok && gguf_tensor_info_sanitize(info);
2241922418
2242022419 // make sure there is no duplicated tensor names
2242122420 for (uint64_t j = 0; j < i && ok; ++j) {
0 commit comments