Skip to content

Commit 53ff6b9

Browse files
GGUF: C++ refactor, backend support, misc fixes (#11030)
* GGUF: C++ refactor, backend support, misc fixes remove ggml_tensor.backend update CODEOWNERS [no ci] remove gguf_get_data from API revise GGUF API data types
1 parent 017cc5f commit 53ff6b9

File tree

21 files changed

+1801
-1633
lines changed

21 files changed

+1801
-1633
lines changed

CODEOWNERS

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,3 +3,9 @@
33
/ci/ @ggerganov
44
/.devops/*.Dockerfile @ngxson
55
/examples/server/ @ngxson
6+
/ggml/src/ggml-cuda/fattn* @JohannesGaessler
7+
/ggml/src/ggml-cuda/mmq.* @JohannesGaessler
8+
/ggml/src/ggml-cuda/mmv.* @JohannesGaessler
9+
/ggml/src/ggml-cuda/mmvq.* @JohannesGaessler
10+
/ggml/src/ggml-opt.cpp @JohannesGaessler
11+
/ggml/src/gguf.cpp @JohannesGaessler

common/common.cpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,9 @@
22
#define _SILENCE_CXX17_CODECVT_HEADER_DEPRECATION_WARNING
33
#endif
44

5+
#include "ggml.h"
6+
#include "gguf.h"
7+
58
#include "common.h"
69
#include "log.h"
710
// Change JSON_ASSERT from assert() to GGML_ASSERT:

examples/convert-llama2c-to-ggml/convert-llama2c-to-ggml.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,6 @@
11
#include "ggml.h"
2+
#include "gguf.h"
3+
24
#include "llama.h"
35
#include "common.h"
46
#include "log.h"

examples/cvector-generator/cvector-generator.cpp

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,9 @@
1+
#include "ggml.h"
2+
#include "gguf.h"
3+
14
#include "arg.h"
25
#include "common.h"
36
#include "llama.h"
4-
#include "ggml.h"
57
#include "pca.hpp"
68
#include "mean.hpp"
79

examples/export-lora/export-lora.cpp

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,9 @@
1-
#include "arg.h"
2-
#include "common.h"
31
#include "ggml.h"
42
#include "ggml-alloc.h"
3+
#include "gguf.h"
4+
5+
#include "arg.h"
6+
#include "common.h"
57

68
#include <map>
79
#include <vector>

examples/gguf-hash/gguf-hash.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
#include "ggml.h"
2+
#include "gguf.h"
23

34
#include <cstdlib> /* abort() */
45
#include <cstddef>

examples/gguf-split/gguf-split.cpp

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,16 +1,18 @@
1+
#include "ggml.h"
2+
#include "gguf.h"
13
#include "llama.h"
24
#include "common.h"
35

46
#include <algorithm>
7+
#include <cinttypes>
8+
#include <climits>
9+
#include <cstdio>
510
#include <cstdlib>
11+
#include <stdexcept>
12+
#include <cstring>
613
#include <fstream>
714
#include <string>
815
#include <vector>
9-
#include <climits>
10-
11-
#include <cstdio>
12-
#include <cstring>
13-
#include <stdexcept>
1416

1517
#if defined(_WIN32)
1618
#include <windows.h>
@@ -296,7 +298,7 @@ struct split_strategy {
296298
total_size += ggml_nbytes(t);
297299
}
298300
total_size = total_size / 1000 / 1000; // convert to megabytes
299-
printf("split %05d: n_tensors = %d, total_size = %zuM\n", i_split + 1, gguf_get_n_tensors(ctx_out), total_size);
301+
printf("split %05d: n_tensors = %" PRIi64 ", total_size = %zuM\n", i_split + 1, gguf_get_n_tensors(ctx_out), total_size);
300302
i_split++;
301303
}
302304
}

examples/gguf/gguf.cpp

Lines changed: 10 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,9 @@
11
#include "ggml.h"
2+
#include "gguf.h"
23

34
#include <cstdio>
4-
#include <cinttypes>
55
#include <string>
66
#include <sstream>
7-
#include <fstream>
87
#include <vector>
98

109
#undef MIN
@@ -135,9 +134,10 @@ static bool gguf_ex_read_0(const std::string & fname) {
135134

136135
for (int i = 0; i < n_tensors; ++i) {
137136
const char * name = gguf_get_tensor_name (ctx, i);
137+
const size_t size = gguf_get_tensor_size (ctx, i);
138138
const size_t offset = gguf_get_tensor_offset(ctx, i);
139139

140-
printf("%s: tensor[%d]: name = %s, offset = %zu\n", __func__, i, name, offset);
140+
printf("%s: tensor[%d]: name = %s, size = %zu, offset = %zu\n", __func__, i, name, size, offset);
141141
}
142142
}
143143

@@ -182,9 +182,10 @@ static bool gguf_ex_read_1(const std::string & fname, bool check_data) {
182182

183183
for (int i = 0; i < n_tensors; ++i) {
184184
const char * name = gguf_get_tensor_name (ctx, i);
185+
const size_t size = gguf_get_tensor_size (ctx, i);
185186
const size_t offset = gguf_get_tensor_offset(ctx, i);
186187

187-
printf("%s: tensor[%d]: name = %s, offset = %zu\n", __func__, i, name, offset);
188+
printf("%s: tensor[%d]: name = %s, size = %zu, offset = %zu\n", __func__, i, name, size, offset);
188189
}
189190
}
190191

@@ -199,7 +200,8 @@ static bool gguf_ex_read_1(const std::string & fname, bool check_data) {
199200

200201
struct ggml_tensor * cur = ggml_get_tensor(ctx_data, name);
201202

202-
printf("%s: tensor[%d]: n_dims = %d, name = %s, data = %p\n", __func__, i, ggml_n_dims(cur), cur->name, cur->data);
203+
printf("%s: tensor[%d]: n_dims = %d, ne = (%d, %d, %d, %d), name = %s, data = %p\n",
204+
__func__, i, ggml_n_dims(cur), int(cur->ne[0]), int(cur->ne[1]), int(cur->ne[2]), int(cur->ne[3]), cur->name, cur->data);
203205

204206
// print first 10 elements
205207
const float * data = (const float *) cur->data;
@@ -215,7 +217,7 @@ static bool gguf_ex_read_1(const std::string & fname, bool check_data) {
215217
const float * data = (const float *) cur->data;
216218
for (int j = 0; j < ggml_nelements(cur); ++j) {
217219
if (data[j] != 100 + i) {
218-
fprintf(stderr, "%s: tensor[%d]: data[%d] = %f\n", __func__, i, j, data[j]);
220+
fprintf(stderr, "%s: tensor[%d], data[%d]: found %f, expected %f\n", __func__, i, j, data[j], float(100 + i));
219221
gguf_free(ctx);
220222
return false;
221223
}
@@ -245,6 +247,8 @@ int main(int argc, char ** argv) {
245247
check_data = false;
246248
}
247249

250+
srand(123456);
251+
248252
const std::string fname(argv[1]);
249253
const std::string mode (argv[2]);
250254

examples/llava/clip.cpp

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
#include "ggml-cpu.h"
88
#include "ggml-alloc.h"
99
#include "ggml-backend.h"
10+
#include "gguf.h"
1011

1112
//#ifdef GGML_USE_CUDA
1213
//#include "ggml-cuda.h"
@@ -262,7 +263,7 @@ static std::string gguf_kv_to_str(const struct gguf_context * ctx_gguf, int i) {
262263
{
263264
const enum gguf_type arr_type = gguf_get_arr_type(ctx_gguf, i);
264265
int arr_n = gguf_get_arr_n(ctx_gguf, i);
265-
const void * data = gguf_get_arr_data(ctx_gguf, i);
266+
const void * data = arr_type == GGUF_TYPE_STRING ? nullptr : gguf_get_arr_data(ctx_gguf, i);
266267
std::stringstream ss;
267268
ss << "[";
268269
for (int j = 0; j < arr_n; j++) {
@@ -2734,7 +2735,8 @@ bool clip_model_quantize(const char * fname_inp, const char * fname_out, const i
27342735
total_size_org += orig_size;
27352736
total_size_new += new_size;
27362737
gguf_set_tensor_type(ctx_out, name.c_str(), new_type);
2737-
gguf_set_tensor_data(ctx_out, name.c_str(), new_data, new_size);
2738+
GGML_ASSERT(gguf_get_tensor_size(ctx_out, gguf_find_tensor(ctx_out, name.c_str())) == new_size);
2739+
gguf_set_tensor_data(ctx_out, name.c_str(), new_data);
27382740
fout.write((const char *)new_data, new_size);
27392741
size_t pad = GGML_PAD(new_size, gguf_get_alignment(ctx_out)) - new_size;
27402742
for (size_t j = 0; j < pad; ++j) {

ggml/CMakeLists.txt

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -243,7 +243,8 @@ set(GGML_PUBLIC_HEADERS
243243
include/ggml-metal.h
244244
include/ggml-rpc.h
245245
include/ggml-sycl.h
246-
include/ggml-vulkan.h)
246+
include/ggml-vulkan.h
247+
include/gguf.h)
247248

248249
set_target_properties(ggml PROPERTIES PUBLIC_HEADER "${GGML_PUBLIC_HEADERS}")
249250
#if (GGML_METAL)

0 commit comments

Comments
 (0)