Skip to content

Commit e788b82

Browse files
committed
You'll never take us alive
We swore that death will do us part They'll call our crimes a work of art
2 parents dcfa1ec + 53ff6b9 commit e788b82

File tree

19 files changed

+1872
-1710
lines changed

19 files changed

+1872
-1710
lines changed

Makefile

Lines changed: 7 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -90,10 +90,10 @@ endif
9090
CUBLASLD_FLAGS =
9191
CUBLAS_OBJS =
9292

93-
OBJS_FULL += ggml-alloc.o ggml-cpu-traits.o ggml-quants.o ggml-cpu-quants.o ggml-cpu-aarch64.o unicode.o unicode-data.o ggml-threading.o ggml-cpu-cpp.o sgemm.o common.o sampling.o
94-
OBJS_SIMPLE += ggml-alloc.o ggml-cpu-traits.o ggml-quants_noavx2.o ggml-cpu-quants_noavx2.o ggml-cpu-aarch64_noavx2.o unicode.o unicode-data.o ggml-threading.o ggml-cpu-cpp.o sgemm_noavx2.o common.o sampling.o
95-
OBJS_SIMPLER += ggml-alloc.o ggml-cpu-traits.o ggml-quants_noavx1.o ggml-cpu-quants_noavx1.o ggml-cpu-aarch64_noavx1.o unicode.o unicode-data.o ggml-threading.o ggml-cpu-cpp.o sgemm_noavx1.o common.o sampling.o
96-
OBJS_FAILSAFE += ggml-alloc.o ggml-cpu-traits.o ggml-quants_failsafe.o ggml-cpu-quants_failsafe.o ggml-cpu-aarch64_failsafe.o unicode.o unicode-data.o ggml-threading.o ggml-cpu-cpp.o sgemm_failsafe.o common.o sampling.o
93+
OBJS_FULL += ggml-alloc.o ggml-cpu-traits.o ggml-quants.o ggml-cpu-quants.o ggml-cpu-aarch64.o unicode.o unicode-data.o ggml-threading.o ggml-cpu-cpp.o gguf.o sgemm.o common.o sampling.o
94+
OBJS_SIMPLE += ggml-alloc.o ggml-cpu-traits.o ggml-quants_noavx2.o ggml-cpu-quants_noavx2.o ggml-cpu-aarch64_noavx2.o unicode.o unicode-data.o ggml-threading.o ggml-cpu-cpp.o gguf.o sgemm_noavx2.o common.o sampling.o
95+
OBJS_SIMPLER += ggml-alloc.o ggml-cpu-traits.o ggml-quants_noavx1.o ggml-cpu-quants_noavx1.o ggml-cpu-aarch64_noavx1.o unicode.o unicode-data.o ggml-threading.o ggml-cpu-cpp.o gguf.o sgemm_noavx1.o common.o sampling.o
96+
OBJS_FAILSAFE += ggml-alloc.o ggml-cpu-traits.o ggml-quants_failsafe.o ggml-cpu-quants_failsafe.o ggml-cpu-aarch64_failsafe.o unicode.o unicode-data.o ggml-threading.o ggml-cpu-cpp.o gguf.o sgemm_failsafe.o common.o sampling.o
9797

9898
# OS specific
9999
ifeq ($(UNAME_S),Linux)
@@ -537,6 +537,8 @@ ggml-threading.o: ggml/src/ggml-threading.cpp ggml/include/ggml.h
537537
$(CXX) $(CXXFLAGS) -c $< -o $@
538538
ggml-cpu-cpp.o: ggml/src/ggml-cpu/ggml-cpu.cpp ggml/include/ggml.h ggml/src/ggml-common.h
539539
$(CXX) $(CXXFLAGS) -c $< -o $@
540+
gguf.o: ggml/src/gguf.cpp ggml/include/gguf.h
541+
$(CXX) $(CXXFLAGS) -c $< -o $@
540542

541543
#these have special gpu defines
542544
ggml-backend_default.o: ggml/src/ggml-backend.cpp ggml/src/ggml-backend-impl.h ggml/include/ggml.h ggml/include/ggml-backend.h
@@ -655,7 +657,7 @@ gpttype_adapter_vulkan_noavx2.o: $(GPTTYPE_ADAPTER)
655657
$(CXX) $(CXXFLAGS) $(FAILSAFE_FLAGS) $(VULKAN_FLAGS) -c $< -o $@
656658

657659
clean:
658-
rm -vf *.o main sdmain whispermain quantize_gguf quantize_clip quantize_gpt2 quantize_gptj quantize_neox quantize_mpt quantize-stats perplexity embedding benchmark-matmult save-load-state gguf imatrix vulkan-shaders-gen gguf-split gguf-split.exe vulkan-shaders-gen.exe imatrix.exe gguf.exe main.exe sdmain.exe whispermain.exe quantize_clip.exe quantize_gguf.exe quantize_gptj.exe quantize_gpt2.exe quantize_neox.exe quantize_mpt.exe koboldcpp_default.dll koboldcpp_failsafe.dll koboldcpp_noavx2.dll koboldcpp_clblast.dll koboldcpp_clblast_noavx2.dll koboldcpp_cublas.dll koboldcpp_hipblas.dll koboldcpp_vulkan.dll koboldcpp_vulkan_noavx2.dll koboldcpp_default.so koboldcpp_failsafe.so koboldcpp_noavx2.so koboldcpp_clblast.so koboldcpp_clblast_noavx2.so koboldcpp_cublas.so koboldcpp_hipblas.so koboldcpp_vulkan.so koboldcpp_vulkan_noavx2.so
660+
rm -vf *.o main sdmain whispermain quantize_gguf quantize_clip quantize_gpt2 quantize_gptj quantize_neox quantize_mpt vulkan-shaders-gen gguf-split gguf-split.exe vulkan-shaders-gen.exe main.exe sdmain.exe whispermain.exe quantize_clip.exe quantize_gguf.exe quantize_gptj.exe quantize_gpt2.exe quantize_neox.exe quantize_mpt.exe koboldcpp_default.dll koboldcpp_failsafe.dll koboldcpp_noavx2.dll koboldcpp_clblast.dll koboldcpp_clblast_noavx2.dll koboldcpp_cublas.dll koboldcpp_hipblas.dll koboldcpp_vulkan.dll koboldcpp_vulkan_noavx2.dll koboldcpp_default.so koboldcpp_failsafe.so koboldcpp_noavx2.so koboldcpp_clblast.so koboldcpp_clblast_noavx2.so koboldcpp_cublas.so koboldcpp_hipblas.so koboldcpp_vulkan.so koboldcpp_vulkan_noavx2.so
659661
rm -vrf ggml/src/ggml-cuda/*.o
660662
rm -vrf ggml/src/ggml-cuda/template-instances/*.o
661663

@@ -667,10 +669,6 @@ sdmain: otherarch/sdcpp/util.cpp otherarch/sdcpp/main.cpp otherarch/sdcpp/stable
667669
$(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
668670
whispermain: otherarch/whispercpp/main.cpp otherarch/whispercpp/whisper.cpp build-info.h ggml.o ggml-cpu.o llama.o console.o ggml-backend_default.o ggml-backend-reg_default.o $(OBJS_FULL) $(OBJS)
669671
$(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
670-
imatrix: examples/imatrix/imatrix.cpp build-info.h ggml.o ggml-cpu.o llama.o console.o llavaclip_default.o llava.o ggml-backend_default.o ggml-backend-reg_default.o $(OBJS_FULL) $(OBJS)
671-
$(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
672-
gguf: examples/gguf/gguf.cpp build-info.h ggml.o ggml-cpu.o llama.o llavaclip_default.o llava.o ggml-backend_default.o ggml-backend-reg_default.o $(OBJS_FULL) $(OBJS)
673-
$(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
674672
gguf-split: examples/gguf-split/gguf-split.cpp ggml.o ggml-cpu.o llama.o build-info.h llavaclip_default.o llava.o ggml-backend_default.o ggml-backend-reg_default.o $(OBJS_FULL) $(OBJS)
675673
$(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
676674

common/common.cpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,9 @@
22
#define _SILENCE_CXX17_CODECVT_HEADER_DEPRECATION_WARNING
33
#endif
44

5+
#include "ggml.h"
6+
#include "gguf.h"
7+
58
#include "common.h"
69
#include "log.h"
710
#include "build-info.h"

examples/gguf-split/gguf-split.cpp

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,17 +1,19 @@
1+
#include "ggml.h"
2+
#include "gguf.h"
13
#include "llama.h"
24
#include "common.h"
35
#include "build-info.h"
46

57
#include <algorithm>
8+
#include <cinttypes>
9+
#include <climits>
10+
#include <cstdio>
611
#include <cstdlib>
12+
#include <stdexcept>
13+
#include <cstring>
714
#include <fstream>
815
#include <string>
916
#include <vector>
10-
#include <climits>
11-
12-
#include <cstdio>
13-
#include <cstring>
14-
#include <stdexcept>
1517

1618
#if defined(_WIN32)
1719
#include <windows.h>
@@ -297,7 +299,7 @@ struct split_strategy {
297299
total_size += ggml_nbytes(t);
298300
}
299301
total_size = total_size / 1000 / 1000; // convert to megabytes
300-
printf("split %05d: n_tensors = %d, total_size = %zuM\n", i_split + 1, gguf_get_n_tensors(ctx_out), total_size);
302+
printf("split %05d: n_tensors = %" PRIi64 ", total_size = %zuM\n", i_split + 1, gguf_get_n_tensors(ctx_out), total_size);
301303
i_split++;
302304
}
303305
}

examples/gguf/gguf.cpp

Lines changed: 10 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,9 @@
11
#include "ggml.h"
2+
#include "gguf.h"
23

34
#include <cstdio>
4-
#include <cinttypes>
55
#include <string>
66
#include <sstream>
7-
#include <fstream>
87
#include <vector>
98

109
#undef MIN
@@ -135,9 +134,10 @@ static bool gguf_ex_read_0(const std::string & fname) {
135134

136135
for (int i = 0; i < n_tensors; ++i) {
137136
const char * name = gguf_get_tensor_name (ctx, i);
137+
const size_t size = gguf_get_tensor_size (ctx, i);
138138
const size_t offset = gguf_get_tensor_offset(ctx, i);
139139

140-
printf("%s: tensor[%d]: name = %s, offset = %zu\n", __func__, i, name, offset);
140+
printf("%s: tensor[%d]: name = %s, size = %zu, offset = %zu\n", __func__, i, name, size, offset);
141141
}
142142
}
143143

@@ -182,9 +182,10 @@ static bool gguf_ex_read_1(const std::string & fname, bool check_data) {
182182

183183
for (int i = 0; i < n_tensors; ++i) {
184184
const char * name = gguf_get_tensor_name (ctx, i);
185+
const size_t size = gguf_get_tensor_size (ctx, i);
185186
const size_t offset = gguf_get_tensor_offset(ctx, i);
186187

187-
printf("%s: tensor[%d]: name = %s, offset = %zu\n", __func__, i, name, offset);
188+
printf("%s: tensor[%d]: name = %s, size = %zu, offset = %zu\n", __func__, i, name, size, offset);
188189
}
189190
}
190191

@@ -199,7 +200,8 @@ static bool gguf_ex_read_1(const std::string & fname, bool check_data) {
199200

200201
struct ggml_tensor * cur = ggml_get_tensor(ctx_data, name);
201202

202-
printf("%s: tensor[%d]: n_dims = %d, name = %s, data = %p\n", __func__, i, ggml_n_dims(cur), cur->name, cur->data);
203+
printf("%s: tensor[%d]: n_dims = %d, ne = (%d, %d, %d, %d), name = %s, data = %p\n",
204+
__func__, i, ggml_n_dims(cur), int(cur->ne[0]), int(cur->ne[1]), int(cur->ne[2]), int(cur->ne[3]), cur->name, cur->data);
203205

204206
// print first 10 elements
205207
const float * data = (const float *) cur->data;
@@ -215,7 +217,7 @@ static bool gguf_ex_read_1(const std::string & fname, bool check_data) {
215217
const float * data = (const float *) cur->data;
216218
for (int j = 0; j < ggml_nelements(cur); ++j) {
217219
if (data[j] != 100 + i) {
218-
fprintf(stderr, "%s: tensor[%d]: data[%d] = %f\n", __func__, i, j, data[j]);
220+
fprintf(stderr, "%s: tensor[%d], data[%d]: found %f, expected %f\n", __func__, i, j, data[j], float(100 + i));
219221
gguf_free(ctx);
220222
return false;
221223
}
@@ -245,6 +247,8 @@ int main(int argc, char ** argv) {
245247
check_data = false;
246248
}
247249

250+
srand(123456);
251+
248252
const std::string fname(argv[1]);
249253
const std::string mode (argv[2]);
250254

examples/llava/clip.cpp

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
#include "ggml-cpu.h"
88
#include "ggml-alloc.h"
99
#include "ggml-backend.h"
10+
#include "gguf.h"
1011

1112
#ifdef GGML_USE_CUDA
1213
#include "ggml-cuda.h"
@@ -265,7 +266,7 @@ static std::string gguf_kv_to_str(const struct gguf_context * ctx_gguf, int i) {
265266
{
266267
const enum gguf_type arr_type = gguf_get_arr_type(ctx_gguf, i);
267268
int arr_n = gguf_get_arr_n(ctx_gguf, i);
268-
const void * data = gguf_get_arr_data(ctx_gguf, i);
269+
const void * data = arr_type == GGUF_TYPE_STRING ? nullptr : gguf_get_arr_data(ctx_gguf, i);
269270
std::stringstream ss;
270271
ss << "[";
271272
for (int j = 0; j < arr_n; j++) {
@@ -2844,7 +2845,8 @@ bool clip_model_quantize(const char * fname_inp, const char * fname_out, const i
28442845
total_size_org += orig_size;
28452846
total_size_new += new_size;
28462847
gguf_set_tensor_type(ctx_out, name.c_str(), new_type);
2847-
gguf_set_tensor_data(ctx_out, name.c_str(), new_data, new_size);
2848+
GGML_ASSERT(gguf_get_tensor_size(ctx_out, gguf_find_tensor(ctx_out, name.c_str())) == new_size);
2849+
gguf_set_tensor_data(ctx_out, name.c_str(), new_data);
28482850
fout.write((const char *)new_data, new_size);
28492851
size_t pad = GGML_PAD(new_size, gguf_get_alignment(ctx_out)) - new_size;
28502852
for (size_t j = 0; j < pad; ++j) {

ggml/include/ggml-cpp.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
#include "ggml.h"
88
#include "ggml-alloc.h"
99
#include "ggml-backend.h"
10+
#include "gguf.h"
1011
#include <memory>
1112

1213
// Smart pointers for ggml types

ggml/include/ggml.h

Lines changed: 7 additions & 140 deletions
Original file line numberDiff line numberDiff line change
@@ -241,12 +241,6 @@
241241
#define GGML_ROPE_TYPE_MROPE 8
242242
#define GGML_ROPE_TYPE_VISION 24
243243

244-
#define GGUF_MAGIC "GGUF"
245-
246-
#define GGUF_VERSION 3
247-
248-
#define GGUF_DEFAULT_ALIGNMENT 32
249-
250244
#define GGML_UNUSED(x) (void)(x)
251245

252246
#define GGML_PAD(x, n) (((x) + (n) - 1) & ~((n) - 1))
@@ -409,12 +403,6 @@ extern "C" {
409403
GGML_PREC_F32,
410404
};
411405

412-
enum ggml_backend_type {
413-
GGML_BACKEND_TYPE_CPU = 0,
414-
GGML_BACKEND_TYPE_GPU = 10,
415-
GGML_BACKEND_TYPE_GPU_SPLIT = 20,
416-
};
417-
418406
// model file types
419407
enum ggml_ftype {
420408
GGML_FTYPE_UNKNOWN = -1,
@@ -593,8 +581,6 @@ extern "C" {
593581
struct ggml_tensor {
594582
enum ggml_type type;
595583

596-
GGML_DEPRECATED(enum ggml_backend_type backend, "use the buffer type to find the storage location of the tensor");
597-
598584
struct ggml_backend_buffer * buffer;
599585

600586
int64_t ne[GGML_MAX_DIMS]; // number of elements
@@ -623,7 +609,14 @@ extern "C" {
623609

624610
void * extra; // extra things e.g. for ggml-cuda.cu
625611

612+
union {
626613
char padding[8];
614+
union {
615+
char trimmed_pad_1[3];
616+
char clblast_offload_gpu; //we sneak the flag for gpu offloading for clblast into the padding
617+
char trimmed_pad_2[4];
618+
};
619+
};
627620
};
628621

629622
static const size_t GGML_TENSOR_SIZE = sizeof(struct ggml_tensor);
@@ -2117,132 +2110,6 @@ extern "C" {
21172110
int64_t n_per_row,
21182111
const float * imatrix);
21192112

2120-
//
2121-
// gguf
2122-
//
2123-
2124-
enum gguf_type {
2125-
GGUF_TYPE_UINT8 = 0,
2126-
GGUF_TYPE_INT8 = 1,
2127-
GGUF_TYPE_UINT16 = 2,
2128-
GGUF_TYPE_INT16 = 3,
2129-
GGUF_TYPE_UINT32 = 4,
2130-
GGUF_TYPE_INT32 = 5,
2131-
GGUF_TYPE_FLOAT32 = 6,
2132-
GGUF_TYPE_BOOL = 7,
2133-
GGUF_TYPE_STRING = 8,
2134-
GGUF_TYPE_ARRAY = 9,
2135-
GGUF_TYPE_UINT64 = 10,
2136-
GGUF_TYPE_INT64 = 11,
2137-
GGUF_TYPE_FLOAT64 = 12,
2138-
GGUF_TYPE_COUNT, // marks the end of the enum
2139-
};
2140-
2141-
struct gguf_context;
2142-
2143-
struct gguf_init_params {
2144-
bool no_alloc;
2145-
2146-
// if not NULL, create a ggml_context and allocate the tensor data in it
2147-
struct ggml_context ** ctx;
2148-
};
2149-
2150-
GGML_API struct gguf_context * gguf_init_empty(void);
2151-
GGML_API struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_params params);
2152-
//GGML_API struct gguf_context * gguf_init_from_buffer(..);
2153-
2154-
GGML_API void gguf_free(struct gguf_context * ctx);
2155-
2156-
GGML_API const char * gguf_type_name(enum gguf_type type);
2157-
2158-
GGML_API int gguf_get_version (const struct gguf_context * ctx);
2159-
GGML_API size_t gguf_get_alignment (const struct gguf_context * ctx);
2160-
GGML_API size_t gguf_get_data_offset(const struct gguf_context * ctx);
2161-
GGML_API void * gguf_get_data (const struct gguf_context * ctx);
2162-
2163-
GGML_API int gguf_get_n_kv(const struct gguf_context * ctx);
2164-
GGML_API int gguf_find_key(const struct gguf_context * ctx, const char * key);
2165-
GGML_API const char * gguf_get_key (const struct gguf_context * ctx, int key_id);
2166-
2167-
GGML_API enum gguf_type gguf_get_kv_type (const struct gguf_context * ctx, int key_id);
2168-
GGML_API enum gguf_type gguf_get_arr_type(const struct gguf_context * ctx, int key_id);
2169-
2170-
// will abort if the wrong type is used for the key
2171-
GGML_API uint8_t gguf_get_val_u8 (const struct gguf_context * ctx, int key_id);
2172-
GGML_API int8_t gguf_get_val_i8 (const struct gguf_context * ctx, int key_id);
2173-
GGML_API uint16_t gguf_get_val_u16 (const struct gguf_context * ctx, int key_id);
2174-
GGML_API int16_t gguf_get_val_i16 (const struct gguf_context * ctx, int key_id);
2175-
GGML_API uint32_t gguf_get_val_u32 (const struct gguf_context * ctx, int key_id);
2176-
GGML_API int32_t gguf_get_val_i32 (const struct gguf_context * ctx, int key_id);
2177-
GGML_API float gguf_get_val_f32 (const struct gguf_context * ctx, int key_id);
2178-
GGML_API uint64_t gguf_get_val_u64 (const struct gguf_context * ctx, int key_id);
2179-
GGML_API int64_t gguf_get_val_i64 (const struct gguf_context * ctx, int key_id);
2180-
GGML_API double gguf_get_val_f64 (const struct gguf_context * ctx, int key_id);
2181-
GGML_API bool gguf_get_val_bool(const struct gguf_context * ctx, int key_id);
2182-
GGML_API const char * gguf_get_val_str (const struct gguf_context * ctx, int key_id);
2183-
GGML_API const void * gguf_get_val_data(const struct gguf_context * ctx, int key_id);
2184-
GGML_API int gguf_get_arr_n (const struct gguf_context * ctx, int key_id);
2185-
GGML_API const void * gguf_get_arr_data(const struct gguf_context * ctx, int key_id);
2186-
GGML_API const char * gguf_get_arr_str (const struct gguf_context * ctx, int key_id, int i);
2187-
2188-
GGML_API int gguf_get_n_tensors (const struct gguf_context * ctx);
2189-
GGML_API int gguf_find_tensor (const struct gguf_context * ctx, const char * name);
2190-
GGML_API size_t gguf_get_tensor_offset(const struct gguf_context * ctx, int i);
2191-
GGML_API char * gguf_get_tensor_name (const struct gguf_context * ctx, int i);
2192-
GGML_API enum ggml_type gguf_get_tensor_type (const struct gguf_context * ctx, int i);
2193-
2194-
// removes key if it exists
2195-
GGML_API void gguf_remove_key(struct gguf_context * ctx, const char * key);
2196-
2197-
// overrides existing values or adds a new one
2198-
GGML_API void gguf_set_val_u8 (struct gguf_context * ctx, const char * key, uint8_t val);
2199-
GGML_API void gguf_set_val_i8 (struct gguf_context * ctx, const char * key, int8_t val);
2200-
GGML_API void gguf_set_val_u16 (struct gguf_context * ctx, const char * key, uint16_t val);
2201-
GGML_API void gguf_set_val_i16 (struct gguf_context * ctx, const char * key, int16_t val);
2202-
GGML_API void gguf_set_val_u32 (struct gguf_context * ctx, const char * key, uint32_t val);
2203-
GGML_API void gguf_set_val_i32 (struct gguf_context * ctx, const char * key, int32_t val);
2204-
GGML_API void gguf_set_val_f32 (struct gguf_context * ctx, const char * key, float val);
2205-
GGML_API void gguf_set_val_u64 (struct gguf_context * ctx, const char * key, uint64_t val);
2206-
GGML_API void gguf_set_val_i64 (struct gguf_context * ctx, const char * key, int64_t val);
2207-
GGML_API void gguf_set_val_f64 (struct gguf_context * ctx, const char * key, double val);
2208-
GGML_API void gguf_set_val_bool(struct gguf_context * ctx, const char * key, bool val);
2209-
GGML_API void gguf_set_val_str (struct gguf_context * ctx, const char * key, const char * val);
2210-
GGML_API void gguf_set_arr_data(struct gguf_context * ctx, const char * key, enum gguf_type type, const void * data, int n);
2211-
GGML_API void gguf_set_arr_str (struct gguf_context * ctx, const char * key, const char ** data, int n);
2212-
2213-
// set or add KV pairs from another context
2214-
GGML_API void gguf_set_kv(struct gguf_context * ctx, struct gguf_context * src);
2215-
2216-
// manage tensor info
2217-
GGML_API void gguf_add_tensor(struct gguf_context * ctx, const struct ggml_tensor * tensor);
2218-
GGML_API void gguf_set_tensor_type(struct gguf_context * ctx, const char * name, enum ggml_type type);
2219-
GGML_API void gguf_set_tensor_data(struct gguf_context * ctx, const char * name, const void * data, size_t size);
2220-
2221-
// writing gguf files can be done in 2 ways:
2222-
//
2223-
// - write the entire gguf_context to a binary file in a single pass:
2224-
//
2225-
// gguf_write_to_file(ctx, fname);
2226-
//
2227-
// - first prepare a file with a placeholder for the meta data, write the tensor data, then write the meta data:
2228-
//
2229-
// FILE * f = fopen(fname, "wb");
2230-
// fseek(f, gguf_get_meta_size(ctx), SEEK_SET);
2231-
// fwrite(f, ...);
2232-
// void * data = gguf_meta_get_meta_data(ctx);
2233-
// fseek(f, 0, SEEK_SET);
2234-
// fwrite(f, data, gguf_get_meta_size(ctx));
2235-
// free(data);
2236-
// fclose(f);
2237-
//
2238-
2239-
// write the entire context to a binary file
2240-
GGML_API void gguf_write_to_file(const struct gguf_context * ctx, const char * fname, bool only_meta);
2241-
2242-
// get the size in bytes of the meta data (header, kv pairs, tensor info) including padding
2243-
GGML_API size_t gguf_get_meta_size(const struct gguf_context * ctx);
2244-
GGML_API void gguf_get_meta_data(const struct gguf_context * ctx, void * data);
2245-
22462113
#ifdef __cplusplus
22472114
// restrict not standard in C++
22482115
# if defined(__GNUC__)

0 commit comments

Comments
 (0)