Skip to content

Commit 43544fb

Browse files
Implement --no-byteswap argument to disable byteswapping on big endian platform
1 parent b03a984 commit 43544fb

File tree

19 files changed

+86
-46
lines changed

19 files changed

+86
-46
lines changed

common/arg.cpp

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1390,6 +1390,13 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
13901390
params.use_mmap = false;
13911391
}
13921392
).set_env("LLAMA_ARG_NO_MMAP"));
1393+
add_opt(common_arg(
1394+
{"--no-byteswap"},
1395+
"don't byteswap model data on big endian systems (use if model is byteswapped to big endian in advance)",
1396+
[](common_params & params) {
1397+
params.no_byteswap = true;
1398+
}
1399+
).set_env("LLAMA_NO_BYTESWAP"));
13931400
add_opt(common_arg(
13941401
{"--numa"}, "TYPE",
13951402
"attempt optimizations that help on some NUMA systems\n"

common/common.cpp

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -925,7 +925,7 @@ struct common_init_result common_init_from_params(common_params & params) {
925925
// load and optionally apply lora adapters
926926
for (auto & la : params.lora_adapters) {
927927
llama_lora_adapter_ptr lora;
928-
lora.reset(llama_lora_adapter_init(model, la.path.c_str()));
928+
lora.reset(llama_lora_adapter_init(model, la.path.c_str(), mparams.no_byteswap));
929929
if (lora == nullptr) {
930930
LOG_ERR("%s: failed to apply lora adapter '%s'\n", __func__, la.path.c_str());
931931
llama_free(lctx);
@@ -1030,6 +1030,7 @@ struct llama_model_params common_model_params_to_llama(common_params & params) {
10301030
mparams.use_mmap = params.use_mmap;
10311031
mparams.use_mlock = params.use_mlock;
10321032
mparams.check_tensors = params.check_tensors;
1033+
mparams.no_byteswap = params.no_byteswap;
10331034
if (params.kv_overrides.empty()) {
10341035
mparams.kv_overrides = NULL;
10351036
} else {
@@ -1357,8 +1358,9 @@ struct llama_model * common_load_model_from_url(
13571358
int n_split = 0;
13581359
{
13591360
struct gguf_init_params gguf_params = {
1360-
/*.no_alloc = */ true,
1361-
/*.ctx = */ NULL,
1361+
/*.no_alloc = */ true,
1362+
/*.ctx = */ NULL,
1363+
/*.no_byteswap = */ false,
13621364
};
13631365
auto * ctx_gguf = gguf_init_from_file(local_path.c_str(), gguf_params);
13641366
if (!ctx_gguf) {
@@ -1856,8 +1858,9 @@ static common_control_vector_data common_control_vector_load_one(const common_co
18561858

18571859
ggml_context * ctx = nullptr;
18581860
struct gguf_init_params meta_gguf_params = {
1859-
/* .no_alloc = */ false,
1860-
/* .ctx = */ &ctx,
1861+
/* .no_alloc = */ false,
1862+
/* .ctx = */ &ctx,
1863+
/* .no_byteswap = */ false,
18611864
};
18621865
struct gguf_context * ctx_gguf = gguf_init_from_file(load_info.fname.c_str(), meta_gguf_params);
18631866
if (!ctx_gguf) {

common/common.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -297,6 +297,7 @@ struct common_params {
297297
bool no_kv_offload = false; // disable KV offloading
298298
bool warmup = true; // warmup run
299299
bool check_tensors = false; // validate tensor data
300+
bool no_byteswap = false; // skip byteswapping on big endian systems
300301

301302
ggml_type cache_type_k = GGML_TYPE_F16; // KV cache data type for the K
302303
ggml_type cache_type_v = GGML_TYPE_F16; // KV cache data type for the V

examples/convert-llama2c-to-ggml/convert-llama2c-to-ggml.cpp

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -533,8 +533,9 @@ static void load_vocab(const char * filename, const Config * config, struct my_l
533533
struct ggml_context * ctx_data = NULL;
534534

535535
struct gguf_init_params params = {
536-
/*.no_alloc = */ false,
537-
/*.ctx = */ &ctx_data,
536+
/*.no_alloc = */ false,
537+
/*.ctx = */ &ctx_data,
538+
/*.no_byteswap = */ false,
538539
};
539540

540541
struct gguf_context * ctx = gguf_init_from_file(filename, params);

examples/export-lora/export-lora.cpp

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -49,8 +49,9 @@ static std::string ggml_ne_string(const ggml_tensor * t) {
4949

5050
static struct gguf_context * load_gguf(std::string & fname, struct ggml_context ** ctx_ggml) {
5151
struct gguf_init_params params = {
52-
/*.no_alloc = */ true,
53-
/*.ctx = */ ctx_ggml,
52+
/*.no_alloc = */ true,
53+
/*.ctx = */ ctx_ggml,
54+
/*.no_byteswap = */ false,
5455
};
5556
struct gguf_context * ctx_gguf = gguf_init_from_file(fname.c_str(), params);
5657
if (!ctx_gguf) {

examples/gguf-hash/gguf-hash.cpp

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -288,8 +288,9 @@ static hash_exit_code_t gguf_hash(const hash_params & hash_params) {
288288
struct ggml_context * ctx_data = NULL;
289289

290290
struct gguf_init_params params = {
291-
/*.no_alloc = */ false,
292-
/*.ctx = */ &ctx_data,
291+
/*.no_alloc = */ false,
292+
/*.ctx = */ &ctx_data,
293+
/*.no_byteswap = */ false,
293294
};
294295

295296
// xxh64 init

examples/gguf-split/gguf-split.cpp

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -361,8 +361,9 @@ static void gguf_split(const split_params & split_params) {
361361
struct ggml_context * ctx_meta = NULL;
362362

363363
struct gguf_init_params params = {
364-
/*.no_alloc = */ true,
365-
/*.ctx = */ &ctx_meta,
364+
/*.no_alloc = */ true,
365+
/*.ctx = */ &ctx_meta,
366+
/*.no_byteswap = */ false,
366367
};
367368

368369
std::ifstream f_input(split_params.input.c_str(), std::ios::binary);
@@ -426,8 +427,9 @@ static void gguf_merge(const split_params & split_params) {
426427
struct ggml_context * ctx_meta = NULL;
427428

428429
struct gguf_init_params params = {
429-
/*.no_alloc = */ true,
430-
/*.ctx = */ &ctx_meta,
430+
/*.no_alloc = */ true,
431+
/*.ctx = */ &ctx_meta,
432+
/*.no_byteswap = */ false,
431433
};
432434

433435
if (i_split > 0) {

examples/gguf/gguf.cpp

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -85,8 +85,9 @@ static bool gguf_ex_write(const std::string & fname) {
8585
// just read tensor info
8686
static bool gguf_ex_read_0(const std::string & fname) {
8787
struct gguf_init_params params = {
88-
/*.no_alloc = */ false,
89-
/*.ctx = */ NULL,
88+
/*.no_alloc = */ false,
89+
/*.ctx = */ NULL,
90+
/*.no_byteswap = */ false,
9091
};
9192

9293
struct gguf_context * ctx = gguf_init_from_file(fname.c_str(), params);
@@ -151,8 +152,9 @@ static bool gguf_ex_read_1(const std::string & fname, bool check_data) {
151152
struct ggml_context * ctx_data = NULL;
152153

153154
struct gguf_init_params params = {
154-
/*.no_alloc = */ false,
155-
/*.ctx = */ &ctx_data,
155+
/*.no_alloc = */ false,
156+
/*.ctx = */ &ctx_data,
157+
/*.no_byteswap = */ false,
156158
};
157159

158160
struct gguf_context * ctx = gguf_init_from_file(fname.c_str(), params);

examples/llava/clip.cpp

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1114,8 +1114,9 @@ struct clip_ctx * clip_model_load(const char * fname, const int verbosity = 1) {
11141114
struct ggml_context * meta = NULL;
11151115

11161116
struct gguf_init_params params = {
1117-
/*.no_alloc = */ true,
1118-
/*.ctx = */ &meta,
1117+
/*.no_alloc = */ true,
1118+
/*.ctx = */ &meta,
1119+
/*.no_byteswap = */ false,
11191120
};
11201121

11211122
struct gguf_context * ctx = gguf_init_from_file(fname, params);

ggml/include/gguf.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -74,6 +74,8 @@ extern "C" {
7474

7575
// if not NULL, create a ggml_context and allocate the tensor data in it
7676
struct ggml_context ** ctx;
77+
78+
bool no_byteswap;
7779
};
7880

7981
GGML_API struct gguf_context * gguf_init_empty(void);

0 commit comments

Comments
 (0)