Skip to content

Commit b887270

Browse files
GGUF: backend support, fixed-width I/O, misc fixes
1 parent cc98896 commit b887270

File tree

5 files changed

+327
-264
lines changed

5 files changed

+327
-264
lines changed

examples/gguf/gguf.cpp

Lines changed: 11 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,8 @@
11
#include "ggml.h"
22

33
#include <cstdio>
4-
#include <cinttypes>
54
#include <string>
65
#include <sstream>
7-
#include <fstream>
86
#include <vector>
97

108
#undef MIN
@@ -135,9 +133,11 @@ static bool gguf_ex_read_0(const std::string & fname) {
135133

136134
for (int i = 0; i < n_tensors; ++i) {
137135
const char * name = gguf_get_tensor_name (ctx, i);
136+
const size_t size = gguf_get_tensor_size (ctx, i);
137+
// const size_t size = 0;
138138
const size_t offset = gguf_get_tensor_offset(ctx, i);
139139

140-
printf("%s: tensor[%d]: name = %s, offset = %zu\n", __func__, i, name, offset);
140+
printf("%s: tensor[%d]: name = %s, size = %zu, offset = %zu\n", __func__, i, name, size, offset);
141141
}
142142
}
143143

@@ -182,9 +182,11 @@ static bool gguf_ex_read_1(const std::string & fname, bool check_data) {
182182

183183
for (int i = 0; i < n_tensors; ++i) {
184184
const char * name = gguf_get_tensor_name (ctx, i);
185+
const size_t size = gguf_get_tensor_size (ctx, i);
186+
// const size_t size = 0;
185187
const size_t offset = gguf_get_tensor_offset(ctx, i);
186188

187-
printf("%s: tensor[%d]: name = %s, offset = %zu\n", __func__, i, name, offset);
189+
printf("%s: tensor[%d]: name = %s, size = %zu, offset = %zu\n", __func__, i, name, size, offset);
188190
}
189191
}
190192

@@ -199,7 +201,8 @@ static bool gguf_ex_read_1(const std::string & fname, bool check_data) {
199201

200202
struct ggml_tensor * cur = ggml_get_tensor(ctx_data, name);
201203

202-
printf("%s: tensor[%d]: n_dims = %d, name = %s, data = %p\n", __func__, i, ggml_n_dims(cur), cur->name, cur->data);
204+
printf("%s: tensor[%d]: n_dims = %d, ne = (%d, %d, %d, %d) name = %s, data = %p\n",
205+
__func__, i, ggml_n_dims(cur), int(cur->ne[0]), int(cur->ne[1]), int(cur->ne[2]), int(cur->ne[3]), cur->name, cur->data);
203206

204207
// print first 10 elements
205208
const float * data = (const float *) cur->data;
@@ -215,7 +218,7 @@ static bool gguf_ex_read_1(const std::string & fname, bool check_data) {
215218
const float * data = (const float *) cur->data;
216219
for (int j = 0; j < ggml_nelements(cur); ++j) {
217220
if (data[j] != 100 + i) {
218-
fprintf(stderr, "%s: tensor[%d]: data[%d] = %f\n", __func__, i, j, data[j]);
221+
fprintf(stderr, "%s: tensor[%d], data[%d]: found %f, expected %f\n", __func__, i, j, data[j], float(100 + i));
219222
gguf_free(ctx);
220223
return false;
221224
}
@@ -245,6 +248,8 @@ int main(int argc, char ** argv) {
245248
check_data = false;
246249
}
247250

251+
srand(123456);
252+
248253
const std::string fname(argv[1]);
249254
const std::string mode (argv[2]);
250255

examples/llava/clip.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2566,7 +2566,8 @@ bool clip_model_quantize(const char * fname_inp, const char * fname_out, const i
25662566
total_size_org += orig_size;
25672567
total_size_new += new_size;
25682568
gguf_set_tensor_type(ctx_out, name.c_str(), new_type);
2569-
gguf_set_tensor_data(ctx_out, name.c_str(), new_data, new_size);
2569+
GGML_ASSERT(gguf_get_tensor_size(ctx_out, gguf_find_tensor(ctx_out, name.c_str())) == new_size);
2570+
gguf_set_tensor_data(ctx_out, name.c_str(), new_data);
25702571
fout.write((const char *)new_data, new_size);
25712572
size_t pad = GGML_PAD(new_size, gguf_get_alignment(ctx_out)) - new_size;
25722573
for (size_t j = 0; j < pad; ++j) {

ggml/include/ggml.h

Lines changed: 33 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -2072,9 +2072,10 @@ extern "C" {
20722072
const float * imatrix);
20732073

20742074
//
2075-
// gguf
2075+
// GGUF
20762076
//
20772077

2078+
// types that can be stored as GGUF KV data
20782079
enum gguf_type {
20792080
GGUF_TYPE_UINT8 = 0,
20802081
GGUF_TYPE_INT8 = 1,
@@ -2136,41 +2137,56 @@ extern "C" {
21362137
GGML_API const char * gguf_get_val_str (const struct gguf_context * ctx, int key_id);
21372138
GGML_API const void * gguf_get_val_data(const struct gguf_context * ctx, int key_id);
21382139
GGML_API int gguf_get_arr_n (const struct gguf_context * ctx, int key_id);
2140+
2141+
// get raw pointer to the first element of the array with the given key_id
2142+
// for bool arrays, note that they are always stored as int8 on all platforms (usually this makes no difference)
21392143
GGML_API const void * gguf_get_arr_data(const struct gguf_context * ctx, int key_id);
2144+
2145+
// get ith C string from array with given key_id
21402146
GGML_API const char * gguf_get_arr_str (const struct gguf_context * ctx, int key_id, int i);
21412147

21422148
GGML_API int gguf_get_n_tensors (const struct gguf_context * ctx);
21432149
GGML_API int gguf_find_tensor (const struct gguf_context * ctx, const char * name);
21442150
GGML_API size_t gguf_get_tensor_offset(const struct gguf_context * ctx, int i);
2145-
GGML_API char * gguf_get_tensor_name (const struct gguf_context * ctx, int i);
2151+
GGML_API const char * gguf_get_tensor_name (const struct gguf_context * ctx, int i);
21462152
GGML_API enum ggml_type gguf_get_tensor_type (const struct gguf_context * ctx, int i);
2153+
GGML_API size_t gguf_get_tensor_size (const struct gguf_context * ctx, int i);
21472154

21482155
// removes key if it exists
21492156
GGML_API void gguf_remove_key(struct gguf_context * ctx, const char * key);
21502157

21512158
// overrides existing values or adds a new one
2152-
GGML_API void gguf_set_val_u8 (struct gguf_context * ctx, const char * key, uint8_t val);
2153-
GGML_API void gguf_set_val_i8 (struct gguf_context * ctx, const char * key, int8_t val);
2154-
GGML_API void gguf_set_val_u16 (struct gguf_context * ctx, const char * key, uint16_t val);
2155-
GGML_API void gguf_set_val_i16 (struct gguf_context * ctx, const char * key, int16_t val);
2156-
GGML_API void gguf_set_val_u32 (struct gguf_context * ctx, const char * key, uint32_t val);
2157-
GGML_API void gguf_set_val_i32 (struct gguf_context * ctx, const char * key, int32_t val);
2158-
GGML_API void gguf_set_val_f32 (struct gguf_context * ctx, const char * key, float val);
2159-
GGML_API void gguf_set_val_u64 (struct gguf_context * ctx, const char * key, uint64_t val);
2160-
GGML_API void gguf_set_val_i64 (struct gguf_context * ctx, const char * key, int64_t val);
2161-
GGML_API void gguf_set_val_f64 (struct gguf_context * ctx, const char * key, double val);
2162-
GGML_API void gguf_set_val_bool(struct gguf_context * ctx, const char * key, bool val);
2159+
GGML_API void gguf_set_val_u8 (struct gguf_context * ctx, const char * key, uint8_t val);
2160+
GGML_API void gguf_set_val_i8 (struct gguf_context * ctx, const char * key, int8_t val);
2161+
GGML_API void gguf_set_val_u16 (struct gguf_context * ctx, const char * key, uint16_t val);
2162+
GGML_API void gguf_set_val_i16 (struct gguf_context * ctx, const char * key, int16_t val);
2163+
GGML_API void gguf_set_val_u32 (struct gguf_context * ctx, const char * key, uint32_t val);
2164+
GGML_API void gguf_set_val_i32 (struct gguf_context * ctx, const char * key, int32_t val);
2165+
GGML_API void gguf_set_val_f32 (struct gguf_context * ctx, const char * key, float val);
2166+
GGML_API void gguf_set_val_u64 (struct gguf_context * ctx, const char * key, uint64_t val);
2167+
GGML_API void gguf_set_val_i64 (struct gguf_context * ctx, const char * key, int64_t val);
2168+
GGML_API void gguf_set_val_f64 (struct gguf_context * ctx, const char * key, double val);
2169+
GGML_API void gguf_set_val_bool(struct gguf_context * ctx, const char * key, bool val);
21632170
GGML_API void gguf_set_val_str (struct gguf_context * ctx, const char * key, const char * val);
2171+
2172+
// creates a new array with n elements of the given type and copies the corresponding number of bytes from data
21642173
GGML_API void gguf_set_arr_data(struct gguf_context * ctx, const char * key, enum gguf_type type, const void * data, int n);
2174+
2175+
// creates a new array with n strings and copies the corresponding strings from data
21652176
GGML_API void gguf_set_arr_str (struct gguf_context * ctx, const char * key, const char ** data, int n);
21662177

21672178
// set or add KV pairs from another context
2168-
GGML_API void gguf_set_kv(struct gguf_context * ctx, struct gguf_context * src);
2179+
GGML_API void gguf_set_kv(struct gguf_context * ctx, const struct gguf_context * src);
21692180

21702181
// manage tensor info
21712182
GGML_API void gguf_add_tensor(struct gguf_context * ctx, const struct ggml_tensor * tensor);
2183+
2184+
// after changing a tensor's type, the offsets of all tensors with higher indices are recalculated
2185+
// in such a way that the tensor data remains as one contiguous block (except for padding)
21722186
GGML_API void gguf_set_tensor_type(struct gguf_context * ctx, const char * name, enum ggml_type type);
2173-
GGML_API void gguf_set_tensor_data(struct gguf_context * ctx, const char * name, const void * data, size_t size);
2187+
2188+
// assumes that at least gguf_get_tensor_size bytes can be read from data
2189+
GGML_API void gguf_set_tensor_data(struct gguf_context * ctx, const char * name, const void * data);
21742190

21752191
// writing gguf files can be done in 2 ways:
21762192
//
@@ -2195,6 +2211,8 @@ extern "C" {
21952211

21962212
// get the size in bytes of the meta data (header, kv pairs, tensor info) including padding
21972213
GGML_API size_t gguf_get_meta_size(const struct gguf_context * ctx);
2214+
2215+
// writes the meta data to pointer "data"
21982216
GGML_API void gguf_get_meta_data(const struct gguf_context * ctx, void * data);
21992217

22002218
#ifdef __cplusplus

0 commit comments

Comments
 (0)