Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 16 additions & 0 deletions ggml/src/ggml-impl.h
Original file line number Diff line number Diff line change
Expand Up @@ -551,6 +551,22 @@ static inline ggml_bf16_t ggml_compute_fp32_to_bf16(float s) {
#define GGML_FP32_TO_BF16(x) ggml_compute_fp32_to_bf16(x)
#define GGML_BF16_TO_FP32(x) ggml_compute_bf16_to_fp32(x)

// expose GGUF internals for test code

GGML_API size_t gguf_type_size(enum gguf_type type);

GGML_API struct gguf_context * gguf_init_from_file_impl(FILE * file, struct gguf_init_params params);

struct gguf_buf {
void * data;
size_t size;
size_t offset;
};
GGML_API struct gguf_buf gguf_buf_init(size_t size);
GGML_API void gguf_buf_free(struct gguf_buf buf);

GGML_API void gguf_write_to_buf(const struct gguf_context * ctx, struct gguf_buf * buf, bool only_meta);

#ifdef __cplusplus
}
#endif
65 changes: 24 additions & 41 deletions ggml/src/ggml.c
Original file line number Diff line number Diff line change
Expand Up @@ -6489,7 +6489,7 @@ struct gguf_context {
void * data;
};

static size_t gguf_type_size(enum gguf_type type) {
size_t gguf_type_size(enum gguf_type type) {
GGML_ASSERT(0 <= type && type < GGUF_TYPE_COUNT);
return GGUF_TYPE_SIZE[type];
}
Expand Down Expand Up @@ -6617,13 +6617,7 @@ struct gguf_context * gguf_init_empty(void) {
return ctx;
}

struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_params params) {
FILE * file = ggml_fopen(fname, "rb");
if (!file) {
fprintf(stderr, "%s: failed to open '%s': '%s'\n", __func__, fname, strerror(errno));
return NULL;
}

struct gguf_context * gguf_init_from_file_impl(FILE * file, struct gguf_init_params params) {
// offset from start of file
size_t offset = 0;

Expand All @@ -6636,7 +6630,6 @@ struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_p
for (uint32_t i = 0; i < sizeof(magic); i++) {
if (magic[i] != GGUF_MAGIC[i]) {
fprintf(stderr, "%s: invalid magic characters '%c%c%c%c'\n", __func__, magic[0], magic[1], magic[2], magic[3]);
fclose(file);
return NULL;
}
}
Expand All @@ -6647,7 +6640,6 @@ struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_p
struct gguf_context * ctx = calloc(1, sizeof(struct gguf_context));
if (!ctx) {
fprintf(stderr, "%s: failed to allocate memory for context\n", __func__);
fclose(file);
return NULL;
}

Expand All @@ -6665,7 +6657,6 @@ struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_p

if (ctx->header.version == 1) {
fprintf(stderr, "%s: GGUFv1 is no longer supported. please use a more up-to-date version\n", __func__);
fclose(file);
gguf_free(ctx);
return NULL;
}
Expand All @@ -6678,7 +6669,6 @@ struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_p

if (!ok) {
fprintf(stderr, "%s: failed to read header\n", __func__);
fclose(file);
gguf_free(ctx);
return NULL;
}
Expand All @@ -6688,12 +6678,13 @@ struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_p
{
const uint64_t n_kv = ctx->header.n_kv;

ctx->kv = calloc(n_kv, sizeof(struct gguf_kv));
if (!ctx->kv) {
fprintf(stderr, "%s: failed to allocate memory for kv pairs\n", __func__);
fclose(file);
gguf_free(ctx);
return NULL;
if (n_kv > 0) {
ctx->kv = calloc(n_kv, sizeof(struct gguf_kv));
if (!ctx->kv) {
fprintf(stderr, "%s: failed to allocate memory for kv pairs\n", __func__);
gguf_free(ctx);
return NULL;
}
}

for (uint64_t i = 0; i < n_kv; ++i) {
Expand Down Expand Up @@ -6740,15 +6731,13 @@ struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_p
// prevent from integer overflow in the malloc below
if (kv->value.arr.n >= SIZE_MAX/gguf_type_size(kv->value.arr.type)) {
fprintf(stderr, "%s: array size is too large (%" PRIu64 ")\n", __func__, kv->value.arr.n);
fclose(file);
gguf_free(ctx);
return NULL;
}

kv->value.arr.data = calloc(kv->value.arr.n, gguf_type_size(kv->value.arr.type));
if (!kv->value.arr.data) {
fprintf(stderr, "%s: failed to allocate memory for array\n", __func__);
fclose(file);
gguf_free(ctx);
return NULL;
}
Expand All @@ -6760,15 +6749,13 @@ struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_p
// prevent from integer overflow in the malloc below
if (kv->value.arr.n >= SIZE_MAX/sizeof(struct gguf_str)) {
fprintf(stderr, "%s: array size is too large (%" PRIu64 ")\n", __func__, kv->value.arr.n);
fclose(file);
gguf_free(ctx);
return NULL;
}

kv->value.arr.data = calloc(kv->value.arr.n, sizeof(struct gguf_str));
if (!kv->value.arr.data) {
fprintf(stderr, "%s: failed to allocate memory for array\n", __func__);
fclose(file);
gguf_free(ctx);
return NULL;
}
Expand Down Expand Up @@ -6799,7 +6786,6 @@ struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_p

if (!ok) {
fprintf(stderr, "%s: failed to read key-value pairs\n", __func__);
fclose(file);
gguf_free(ctx);
return NULL;
}
Expand All @@ -6810,7 +6796,6 @@ struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_p
ctx->infos = calloc(ctx->header.n_tensors, sizeof(struct gguf_tensor_info));
if (!ctx->infos) {
fprintf(stderr, "%s: failed to allocate memory for tensor infos\n", __func__);
fclose(file);
gguf_free(ctx);
return NULL;
}
Expand Down Expand Up @@ -6846,7 +6831,6 @@ struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_p

if (!ok) {
fprintf(stderr, "%s: failed to read tensor info\n", __func__);
fclose(file);
gguf_free(ctx);
return NULL;
}
Expand Down Expand Up @@ -6889,15 +6873,13 @@ struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_p
// this tensor type support have been removed:
fprintf(stderr, "%s: tensor '%s' of type %d: %s\n",
__func__, info->name.data, (int) info->type, ggml_type_name(info->type));
fclose(file);
gguf_free(ctx);
return NULL;
}

if (ne % ggml_blck_size(info->type) != 0) {
fprintf(stderr, "%s: tensor '%s' of type %d (%s) number of elements (%" PRId64 ") is not a multiple of block size (%" PRId64 ")\n",
__func__, info->name.data, (int) info->type, ggml_type_name(info->type), ne, ggml_blck_size(info->type));
fclose(file);
gguf_free(ctx);
return NULL;
}
Expand Down Expand Up @@ -6929,7 +6911,6 @@ struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_p
*params.ctx = ggml_init(pdata);
if (*params.ctx == NULL) {
fprintf(stderr, "%s: failed to initialize context\n", __func__);
fclose(file);
gguf_free(ctx);
return NULL;
}
Expand All @@ -6948,7 +6929,6 @@ struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_p

if (!ok) {
fprintf(stderr, "%s: failed to read tensor data\n", __func__);
fclose(file);
ggml_free(ctx_data);
gguf_free(ctx);
return NULL;
Expand Down Expand Up @@ -6987,7 +6967,6 @@ struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_p

if (!ok) {
fprintf(stderr, "%s: failed to read the tensor data\n", __func__);
fclose(file);
ggml_free(ctx_data);
gguf_free(ctx);
return NULL;
Expand All @@ -6996,11 +6975,21 @@ struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_p
ggml_set_no_alloc(ctx_data, params.no_alloc);
}

fclose(file);

return ctx;
}

struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_params params) {
FILE * file = ggml_fopen(fname, "rb");
if (!file) {
fprintf(stderr, "%s: failed to open '%s': '%s'\n", __func__, fname, strerror(errno));
return NULL;
}

struct gguf_context * result = gguf_init_from_file_impl(file, params);
fclose(file);
return result;
}

void gguf_free(struct gguf_context * ctx) {
if (ctx == NULL) {
return;
Expand Down Expand Up @@ -7460,13 +7449,7 @@ void gguf_set_tensor_data(struct gguf_context * ctx, const char * name, const vo
// fwrite(val, sizeof(char), size, file);
//}

struct gguf_buf {
void * data;
size_t size;
size_t offset;
};

static struct gguf_buf gguf_buf_init(size_t size) {
struct gguf_buf gguf_buf_init(size_t size) {
struct gguf_buf buf = {
/*buf.data =*/ size == 0 ? NULL : GGML_CALLOC(1, size),
/*buf.size =*/ size,
Expand All @@ -7476,7 +7459,7 @@ static struct gguf_buf gguf_buf_init(size_t size) {
return buf;
}

static void gguf_buf_free(struct gguf_buf buf) {
void gguf_buf_free(struct gguf_buf buf) {
if (buf.data) {
GGML_FREE(buf.data);
}
Expand Down Expand Up @@ -7514,7 +7497,7 @@ static void gguf_bwrite_el(struct gguf_buf * buf, const void * val, size_t el_si
buf->offset += el_size;
}

static void gguf_write_to_buf(const struct gguf_context * ctx, struct gguf_buf * buf, bool only_meta) {
void gguf_write_to_buf(const struct gguf_context * ctx, struct gguf_buf * buf, bool only_meta) {
// write header
gguf_bwrite_el(buf, &ctx->header.magic, sizeof(ctx->header.magic));
gguf_bwrite_el(buf, &ctx->header.version, sizeof(ctx->header.version));
Expand Down
1 change: 1 addition & 0 deletions tests/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -129,6 +129,7 @@ llama_target_and_test(test-arg-parser.cpp)
llama_target_and_test(test-chat-template.cpp)

# llama_target_and_test(test-opt.cpp) # SLOW
llama_target_and_test(test-gguf.cpp)
llama_target_and_test(test-backend-ops.cpp)

llama_target_and_test(test-model-load-cancel.cpp LABEL "model")
Expand Down
Loading
Loading