Skip to content

Commit 16ff4b3

Browse files
committed
Merge branch 'concedo_experimental' into croco_nex_0
2 parents 4e5aa48 + d3d7dae commit 16ff4b3

40 files changed

+2800
-1823
lines changed

CMakeLists.txt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -793,6 +793,8 @@ else ()
793793
ggml/src/ggml-cpu/ggml-cpu-quants.c
794794
ggml/src/ggml-cpu/ggml-cpu-quants.h
795795
ggml/src/ggml-backend-reg.cpp
796+
ggml/include/gguf.h
797+
ggml/src/gguf.cpp
796798
${GGML_SOURCES_CUDA})
797799
target_include_directories(ggml PUBLIC . ./ggml/include ./ggml/src ./ggml/src/ggml-cpu ./include ./otherarch ./otherarch/tools)
798800
target_compile_features(ggml PUBLIC c_std_11) # don't bump

Makefile

Lines changed: 7 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -96,10 +96,10 @@ endif
9696
CUBLASLD_FLAGS =
9797
CUBLAS_OBJS =
9898

99-
OBJS_FULL += ggml-alloc.o ggml-cpu-traits.o ggml-quants.o ggml-cpu-quants.o ggml-cpu-aarch64.o unicode.o unicode-data.o ggml-threading.o ggml-cpu-cpp.o sgemm.o common.o sampling.o
100-
OBJS_SIMPLE += ggml-alloc.o ggml-cpu-traits.o ggml-quants_noavx2.o ggml-cpu-quants_noavx2.o ggml-cpu-aarch64_noavx2.o unicode.o unicode-data.o ggml-threading.o ggml-cpu-cpp.o sgemm_noavx2.o common.o sampling.o
101-
OBJS_SIMPLER += ggml-alloc.o ggml-cpu-traits.o ggml-quants_noavx1.o ggml-cpu-quants_noavx1.o ggml-cpu-aarch64_noavx1.o unicode.o unicode-data.o ggml-threading.o ggml-cpu-cpp.o sgemm_noavx1.o common.o sampling.o
102-
OBJS_FAILSAFE += ggml-alloc.o ggml-cpu-traits.o ggml-quants_failsafe.o ggml-cpu-quants_failsafe.o ggml-cpu-aarch64_failsafe.o unicode.o unicode-data.o ggml-threading.o ggml-cpu-cpp.o sgemm_failsafe.o common.o sampling.o
99+
OBJS_FULL += ggml-alloc.o ggml-cpu-traits.o ggml-quants.o ggml-cpu-quants.o ggml-cpu-aarch64.o unicode.o unicode-data.o ggml-threading.o ggml-cpu-cpp.o gguf.o sgemm.o common.o sampling.o
100+
OBJS_SIMPLE += ggml-alloc.o ggml-cpu-traits.o ggml-quants_noavx2.o ggml-cpu-quants_noavx2.o ggml-cpu-aarch64_noavx2.o unicode.o unicode-data.o ggml-threading.o ggml-cpu-cpp.o gguf.o sgemm_noavx2.o common.o sampling.o
101+
OBJS_SIMPLER += ggml-alloc.o ggml-cpu-traits.o ggml-quants_noavx1.o ggml-cpu-quants_noavx1.o ggml-cpu-aarch64_noavx1.o unicode.o unicode-data.o ggml-threading.o ggml-cpu-cpp.o gguf.o sgemm_noavx1.o common.o sampling.o
102+
OBJS_FAILSAFE += ggml-alloc.o ggml-cpu-traits.o ggml-quants_failsafe.o ggml-cpu-quants_failsafe.o ggml-cpu-aarch64_failsafe.o unicode.o unicode-data.o ggml-threading.o ggml-cpu-cpp.o gguf.o sgemm_failsafe.o common.o sampling.o
103103

104104
# OS specific
105105
ifeq ($(UNAME_S),Linux)
@@ -587,6 +587,8 @@ ggml-threading.o: ggml/src/ggml-threading.cpp ggml/include/ggml.h
587587
$(CXX) $(CXXFLAGS) -c $< -o $@
588588
ggml-cpu-cpp.o: ggml/src/ggml-cpu/ggml-cpu.cpp ggml/include/ggml.h ggml/src/ggml-common.h
589589
$(CXX) $(CXXFLAGS) -c $< -o $@
590+
gguf.o: ggml/src/gguf.cpp ggml/include/gguf.h
591+
$(CXX) $(CXXFLAGS) -c $< -o $@
590592

591593
#these have special gpu defines
592594
ggml-backend_default.o: ggml/src/ggml-backend.cpp ggml/src/ggml-backend-impl.h ggml/include/ggml.h ggml/include/ggml-backend.h
@@ -705,7 +707,7 @@ gpttype_adapter_vulkan_noavx2.o: $(GPTTYPE_ADAPTER)
705707
$(CXX) $(CXXFLAGS) $(FAILSAFE_FLAGS) $(VULKAN_FLAGS) -c $< -o $@
706708

707709
clean:
708-
rm -vf *.o main sdmain whispermain quantize_gguf quantize_clip quantize_gpt2 quantize_gptj quantize_neox quantize_mpt quantize-stats perplexity embedding benchmark-matmult save-load-state gguf imatrix vulkan-shaders-gen gguf-split gguf-split.exe vulkan-shaders-gen.exe imatrix.exe gguf.exe main.exe sdmain.exe whispermain.exe quantize_clip.exe quantize_gguf.exe quantize_gptj.exe quantize_gpt2.exe quantize_neox.exe quantize_mpt.exe koboldcpp_default.dll koboldcpp_failsafe.dll koboldcpp_noavx2.dll koboldcpp_clblast.dll koboldcpp_clblast_noavx2.dll koboldcpp_cublas.dll koboldcpp_hipblas.dll koboldcpp_vulkan.dll koboldcpp_vulkan_noavx2.dll koboldcpp_default.so koboldcpp_failsafe.so koboldcpp_noavx2.so koboldcpp_clblast.so koboldcpp_clblast_noavx2.so koboldcpp_cublas.so koboldcpp_hipblas.so koboldcpp_vulkan.so koboldcpp_vulkan_noavx2.so
710+
rm -vf *.o main sdmain whispermain quantize_gguf quantize_clip quantize_gpt2 quantize_gptj quantize_neox quantize_mpt vulkan-shaders-gen gguf-split gguf-split.exe vulkan-shaders-gen.exe main.exe sdmain.exe whispermain.exe quantize_clip.exe quantize_gguf.exe quantize_gptj.exe quantize_gpt2.exe quantize_neox.exe quantize_mpt.exe koboldcpp_default.dll koboldcpp_failsafe.dll koboldcpp_noavx2.dll koboldcpp_clblast.dll koboldcpp_clblast_noavx2.dll koboldcpp_cublas.dll koboldcpp_hipblas.dll koboldcpp_vulkan.dll koboldcpp_vulkan_noavx2.dll koboldcpp_default.so koboldcpp_failsafe.so koboldcpp_noavx2.so koboldcpp_clblast.so koboldcpp_clblast_noavx2.so koboldcpp_cublas.so koboldcpp_hipblas.so koboldcpp_vulkan.so koboldcpp_vulkan_noavx2.so
709711
rm -vrf ggml/src/ggml-cuda/*.o
710712
rm -vrf ggml/src/ggml-cuda/template-instances/*.o
711713

@@ -722,10 +724,6 @@ sdmain: otherarch/sdcpp/util.cpp otherarch/sdcpp/main.cpp otherarch/sdcpp/stable
722724
$(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
723725
whispermain: otherarch/whispercpp/main.cpp otherarch/whispercpp/whisper.cpp build-info.h ggml.o ggml-cpu.o llama.o console.o ggml-backend_default.o ggml-backend-reg_default.o $(OBJS_FULL) $(OBJS)
724726
$(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
725-
imatrix: examples/imatrix/imatrix.cpp build-info.h ggml.o ggml-cpu.o llama.o console.o llavaclip_default.o llava.o ggml-backend_default.o ggml-backend-reg_default.o $(OBJS_FULL) $(OBJS)
726-
$(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
727-
gguf: examples/gguf/gguf.cpp build-info.h ggml.o ggml-cpu.o llama.o llavaclip_default.o llava.o ggml-backend_default.o ggml-backend-reg_default.o $(OBJS_FULL) $(OBJS)
728-
$(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
729727
gguf-split: examples/gguf-split/gguf-split.cpp ggml.o ggml-cpu.o llama.o build-info.h llavaclip_default.o llava.o ggml-backend_default.o ggml-backend-reg_default.o $(OBJS_FULL) $(OBJS)
730728
$(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
731729

common/arg.cpp

Lines changed: 13 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,11 @@ common_arg & common_arg::set_examples(std::initializer_list<enum llama_example>
2323
return *this;
2424
}
2525

26+
common_arg & common_arg::set_excludes(std::initializer_list<enum llama_example> excludes) {
27+
this->excludes = std::move(excludes);
28+
return *this;
29+
}
30+
2631
common_arg & common_arg::set_env(const char * env) {
2732
help = help + "\n(env: " + env + ")";
2833
this->env = env;
@@ -38,6 +43,10 @@ bool common_arg::in_example(enum llama_example ex) {
3843
return examples.find(ex) != examples.end();
3944
}
4045

46+
bool common_arg::is_exclude(enum llama_example ex) {
47+
return excludes.find(ex) != excludes.end();
48+
}
49+
4150
bool common_arg::get_value_from_env(std::string & output) {
4251
if (env == nullptr) return false;
4352
char * value = std::getenv(env);
@@ -422,7 +431,7 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
422431
* - if both {LLAMA_EXAMPLE_COMMON, LLAMA_EXAMPLE_*,} are set, we will prioritize the LLAMA_EXAMPLE_* matching current example
423432
*/
424433
auto add_opt = [&](common_arg arg) {
425-
if (arg.in_example(ex) || arg.in_example(LLAMA_EXAMPLE_COMMON)) {
434+
if ((arg.in_example(ex) || arg.in_example(LLAMA_EXAMPLE_COMMON)) && !arg.is_exclude(ex)) {
426435
ctx_arg.options.push_back(std::move(arg));
427436
}
428437
};
@@ -651,7 +660,7 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
651660
[](common_params & params, const std::string & value) {
652661
params.prompt = value;
653662
}
654-
));
663+
).set_excludes({LLAMA_EXAMPLE_SERVER}));
655664
add_opt(common_arg(
656665
{"--no-perf"},
657666
string_format("disable internal libllama performance timings (default: %s)", params.no_perf ? "true" : "false"),
@@ -675,7 +684,7 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
675684
params.prompt.pop_back();
676685
}
677686
}
678-
));
687+
).set_excludes({LLAMA_EXAMPLE_SERVER}));
679688
add_opt(common_arg(
680689
{"--in-file"}, "FNAME",
681690
"an input file (repeat to specify multiple files)",
@@ -702,7 +711,7 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
702711
params.prompt = ss.str();
703712
fprintf(stderr, "Read %zu bytes from binary file %s\n", params.prompt.size(), value.c_str());
704713
}
705-
));
714+
).set_excludes({LLAMA_EXAMPLE_SERVER}));
706715
add_opt(common_arg(
707716
{"-e", "--escape"},
708717
string_format("process escapes sequences (\\n, \\r, \\t, \\', \\\", \\\\) (default: %s)", params.escape ? "true" : "false"),

common/arg.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212

1313
struct common_arg {
1414
std::set<enum llama_example> examples = {LLAMA_EXAMPLE_COMMON};
15+
std::set<enum llama_example> excludes = {};
1516
std::vector<const char *> args;
1617
const char * value_hint = nullptr; // help text or example for arg value
1718
const char * value_hint_2 = nullptr; // for second arg value
@@ -53,9 +54,11 @@ struct common_arg {
5354
) : args(args), value_hint(value_hint), value_hint_2(value_hint_2), help(help), handler_str_str(handler) {}
5455

5556
common_arg & set_examples(std::initializer_list<enum llama_example> examples);
57+
common_arg & set_excludes(std::initializer_list<enum llama_example> excludes);
5658
common_arg & set_env(const char * env);
5759
common_arg & set_sparam();
5860
bool in_example(enum llama_example ex);
61+
bool is_exclude(enum llama_example ex);
5962
bool get_value_from_env(std::string & output);
6063
bool has_value_from_env();
6164
std::string to_string();

common/common.cpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,9 @@
22
#define _SILENCE_CXX17_CODECVT_HEADER_DEPRECATION_WARNING
33
#endif
44

5+
#include "ggml.h"
6+
#include "gguf.h"
7+
58
#include "common.h"
69
#include "log.h"
710
#include "build-info.h"

convert_lora_to_gguf.py

Lines changed: 31 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -226,6 +226,9 @@ def get_base_tensor_name(lora_tensor_name: str) -> str:
226226
base_name = lora_tensor_name.replace("base_model.model.", "")
227227
base_name = base_name.replace(".lora_A.weight", ".weight")
228228
base_name = base_name.replace(".lora_B.weight", ".weight")
229+
# models produced by mergekit-extract-lora have token embeddings in the adapter
230+
base_name = base_name.replace(".lora_embedding_A", ".weight")
231+
base_name = base_name.replace(".lora_embedding_B", ".weight")
229232
return base_name
230233

231234

@@ -260,6 +263,10 @@ def parse_args() -> argparse.Namespace:
260263
"--base", type=Path,
261264
help="directory containing Hugging Face model config files (config.json, tokenizer.json) for the base model that the adapter is based on - only config is needed, actual model weights are not required. If base model is unspecified, it will be loaded from Hugging Face hub based on the adapter config",
262265
)
266+
parser.add_argument(
267+
"--base-model-id", type=str,
268+
help="the model ID of the base model, if it is not available locally or in the adapter config. If specified, it will ignore --base and load the base model config from the Hugging Face hub (Example: 'meta-llama/Llama-3.2-1B-Instruct')",
269+
)
263270
parser.add_argument(
264271
"lora_path", type=Path,
265272
help="directory containing Hugging Face PEFT LoRA config (adapter_model.json) and weights (adapter_model.safetensors or adapter_model.bin)",
@@ -290,6 +297,7 @@ def load_hparams_from_hf(hf_model_id: str) -> dict[str, Any]:
290297

291298
dir_base_model: Path | None = args.base
292299
dir_lora: Path = args.lora_path
300+
base_model_id: str | None = args.base_model_id
293301
lora_config = dir_lora / "adapter_config.json"
294302
input_model = dir_lora / "adapter_model.safetensors"
295303

@@ -313,7 +321,10 @@ def load_hparams_from_hf(hf_model_id: str) -> dict[str, Any]:
313321
lparams: dict[str, Any] = json.load(f)
314322

315323
# load base model
316-
if dir_base_model is None:
324+
if base_model_id is not None:
325+
logger.info(f"Loading base model from Hugging Face: {base_model_id}")
326+
hparams = load_hparams_from_hf(base_model_id)
327+
elif dir_base_model is None:
317328
if "base_model_name_or_path" in lparams:
318329
model_id = lparams["base_model_name_or_path"]
319330
logger.info(f"Loading base model from Hugging Face: {model_id}")
@@ -371,11 +382,16 @@ def get_tensors(self) -> Iterator[tuple[str, Tensor]]:
371382
if self.lazy:
372383
tensor = LazyTorchTensor.from_eager(tensor)
373384
base_name = get_base_tensor_name(name)
374-
is_lora_a = ".lora_A.weight" in name
375-
is_lora_b = ".lora_B.weight" in name
385+
# note: mergekit-extract-lora also adds token embeddings to the adapter
386+
is_lora_a = ".lora_A.weight" in name or ".lora_embedding_A" in name
387+
is_lora_b = ".lora_B.weight" in name or ".lora_embedding_B" in name
376388
if not is_lora_a and not is_lora_b:
377389
if ".base_layer.weight" in name:
378390
continue
391+
# mergekit-extract-lora add these layernorm to the adapter, we need to keep them
392+
if "_layernorm" in name or ".norm" in name:
393+
yield (base_name, tensor)
394+
continue
379395
logger.error(f"Unexpected name '{name}': Not a lora_A or lora_B tensor")
380396
if ".embed_tokens.weight" in name or ".lm_head.weight" in name:
381397
logger.error("Embeddings is present in the adapter. This can be due to new tokens added during fine tuning")
@@ -407,9 +423,21 @@ def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iter
407423
if name == "lm_head.weight" and len(dest) == 0:
408424
raise ValueError("lm_head is present in adapter, but is ignored in base model")
409425
for dest_name, dest_data in dest:
426+
# mergekit-extract-lora add these layernorm to the adapter
427+
if "_norm" in dest_name:
428+
assert dest_data.dim() == 1
429+
yield (dest_name, dest_data)
430+
continue
431+
432+
# otherwise, we must get the lora_A and lora_B tensors
410433
assert isinstance(dest_data, LoraTorchTensor)
411434
lora_a, lora_b = dest_data.get_lora_A_B()
412435

436+
# note: mergekit-extract-lora flip and transpose A and B
437+
# here we only need to transpose token_embd.lora_a, see llm_build_inp_embd()
438+
if "token_embd.weight" in dest_name:
439+
lora_a = lora_a.T
440+
413441
yield (dest_name + ".lora_a", lora_a)
414442
yield (dest_name + ".lora_b", lora_b)
415443

examples/gguf-split/gguf-split.cpp

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,17 +1,19 @@
1+
#include "ggml.h"
2+
#include "gguf.h"
13
#include "llama.h"
24
#include "common.h"
35
#include "build-info.h"
46

57
#include <algorithm>
8+
#include <cinttypes>
9+
#include <climits>
10+
#include <cstdio>
611
#include <cstdlib>
12+
#include <stdexcept>
13+
#include <cstring>
714
#include <fstream>
815
#include <string>
916
#include <vector>
10-
#include <climits>
11-
12-
#include <cstdio>
13-
#include <cstring>
14-
#include <stdexcept>
1517

1618
#if defined(_WIN32)
1719
#include <windows.h>
@@ -297,7 +299,7 @@ struct split_strategy {
297299
total_size += ggml_nbytes(t);
298300
}
299301
total_size = total_size / 1000 / 1000; // convert to megabytes
300-
printf("split %05d: n_tensors = %d, total_size = %zuM\n", i_split + 1, gguf_get_n_tensors(ctx_out), total_size);
302+
printf("split %05d: n_tensors = %" PRIi64 ", total_size = %zuM\n", i_split + 1, gguf_get_n_tensors(ctx_out), total_size);
301303
i_split++;
302304
}
303305
}

examples/gguf/gguf.cpp

Lines changed: 10 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,9 @@
11
#include "ggml.h"
2+
#include "gguf.h"
23

34
#include <cstdio>
4-
#include <cinttypes>
55
#include <string>
66
#include <sstream>
7-
#include <fstream>
87
#include <vector>
98

109
#undef MIN
@@ -135,9 +134,10 @@ static bool gguf_ex_read_0(const std::string & fname) {
135134

136135
for (int i = 0; i < n_tensors; ++i) {
137136
const char * name = gguf_get_tensor_name (ctx, i);
137+
const size_t size = gguf_get_tensor_size (ctx, i);
138138
const size_t offset = gguf_get_tensor_offset(ctx, i);
139139

140-
printf("%s: tensor[%d]: name = %s, offset = %zu\n", __func__, i, name, offset);
140+
printf("%s: tensor[%d]: name = %s, size = %zu, offset = %zu\n", __func__, i, name, size, offset);
141141
}
142142
}
143143

@@ -182,9 +182,10 @@ static bool gguf_ex_read_1(const std::string & fname, bool check_data) {
182182

183183
for (int i = 0; i < n_tensors; ++i) {
184184
const char * name = gguf_get_tensor_name (ctx, i);
185+
const size_t size = gguf_get_tensor_size (ctx, i);
185186
const size_t offset = gguf_get_tensor_offset(ctx, i);
186187

187-
printf("%s: tensor[%d]: name = %s, offset = %zu\n", __func__, i, name, offset);
188+
printf("%s: tensor[%d]: name = %s, size = %zu, offset = %zu\n", __func__, i, name, size, offset);
188189
}
189190
}
190191

@@ -199,7 +200,8 @@ static bool gguf_ex_read_1(const std::string & fname, bool check_data) {
199200

200201
struct ggml_tensor * cur = ggml_get_tensor(ctx_data, name);
201202

202-
printf("%s: tensor[%d]: n_dims = %d, name = %s, data = %p\n", __func__, i, ggml_n_dims(cur), cur->name, cur->data);
203+
printf("%s: tensor[%d]: n_dims = %d, ne = (%d, %d, %d, %d), name = %s, data = %p\n",
204+
__func__, i, ggml_n_dims(cur), int(cur->ne[0]), int(cur->ne[1]), int(cur->ne[2]), int(cur->ne[3]), cur->name, cur->data);
203205

204206
// print first 10 elements
205207
const float * data = (const float *) cur->data;
@@ -215,7 +217,7 @@ static bool gguf_ex_read_1(const std::string & fname, bool check_data) {
215217
const float * data = (const float *) cur->data;
216218
for (int j = 0; j < ggml_nelements(cur); ++j) {
217219
if (data[j] != 100 + i) {
218-
fprintf(stderr, "%s: tensor[%d]: data[%d] = %f\n", __func__, i, j, data[j]);
220+
fprintf(stderr, "%s: tensor[%d], data[%d]: found %f, expected %f\n", __func__, i, j, data[j], float(100 + i));
219221
gguf_free(ctx);
220222
return false;
221223
}
@@ -245,6 +247,8 @@ int main(int argc, char ** argv) {
245247
check_data = false;
246248
}
247249

250+
srand(123456);
251+
248252
const std::string fname(argv[1]);
249253
const std::string mode (argv[2]);
250254

examples/llava/clip.cpp

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
#include "ggml-cpu.h"
88
#include "ggml-alloc.h"
99
#include "ggml-backend.h"
10+
#include "gguf.h"
1011

1112
#ifdef GGML_USE_CUDA
1213
#include "ggml-cuda.h"
@@ -265,7 +266,7 @@ static std::string gguf_kv_to_str(const struct gguf_context * ctx_gguf, int i) {
265266
{
266267
const enum gguf_type arr_type = gguf_get_arr_type(ctx_gguf, i);
267268
int arr_n = gguf_get_arr_n(ctx_gguf, i);
268-
const void * data = gguf_get_arr_data(ctx_gguf, i);
269+
const void * data = arr_type == GGUF_TYPE_STRING ? nullptr : gguf_get_arr_data(ctx_gguf, i);
269270
std::stringstream ss;
270271
ss << "[";
271272
for (int j = 0; j < arr_n; j++) {
@@ -2844,7 +2845,8 @@ bool clip_model_quantize(const char * fname_inp, const char * fname_out, const i
28442845
total_size_org += orig_size;
28452846
total_size_new += new_size;
28462847
gguf_set_tensor_type(ctx_out, name.c_str(), new_type);
2847-
gguf_set_tensor_data(ctx_out, name.c_str(), new_data, new_size);
2848+
GGML_ASSERT(gguf_get_tensor_size(ctx_out, gguf_find_tensor(ctx_out, name.c_str())) == new_size);
2849+
gguf_set_tensor_data(ctx_out, name.c_str(), new_data);
28482850
fout.write((const char *)new_data, new_size);
28492851
size_t pad = GGML_PAD(new_size, gguf_get_alignment(ctx_out)) - new_size;
28502852
for (size_t j = 0; j < pad; ++j) {

ggml/include/ggml-cpp.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
#include "ggml.h"
88
#include "ggml-alloc.h"
99
#include "ggml-backend.h"
10+
#include "gguf.h"
1011
#include <memory>
1112

1213
// Smart pointers for ggml types

0 commit comments

Comments
 (0)