Skip to content

Commit a9db9b0

Browse files
Implement --no-byteswap argument to disable byteswapping on big endian platform
1 parent f4217a8 commit a9db9b0

File tree

19 files changed

+88
-46
lines changed

19 files changed

+88
-46
lines changed

common/arg.cpp

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1438,6 +1438,13 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
14381438
params.use_mmap = false;
14391439
}
14401440
).set_env("LLAMA_ARG_NO_MMAP"));
1441+
add_opt(common_arg(
1442+
{"--no-byteswap"},
1443+
"don't byteswap model data on big endian systems (use if model is byteswapped to big endian in advance)",
1444+
[](common_params & params) {
1445+
params.no_byteswap = true;
1446+
}
1447+
).set_env("LLAMA_NO_BYTESWAP"));
14411448
add_opt(common_arg(
14421449
{"--numa"}, "TYPE",
14431450
"attempt optimizations that help on some NUMA systems\n"

common/common.cpp

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -987,7 +987,7 @@ struct common_init_result common_init_from_params(common_params & params) {
987987
// load and optionally apply lora adapters
988988
for (auto & la : params.lora_adapters) {
989989
llama_adapter_lora_ptr lora;
990-
lora.reset(llama_adapter_lora_init(model, la.path.c_str()));
990+
lora.reset(llama_adapter_lora_init(model, la.path.c_str(), mparams.no_byteswap));
991991
if (lora == nullptr) {
992992
LOG_ERR("%s: failed to apply lora adapter '%s'\n", __func__, la.path.c_str());
993993
llama_free(lctx);
@@ -1092,6 +1092,7 @@ struct llama_model_params common_model_params_to_llama(common_params & params) {
10921092
mparams.use_mmap = params.use_mmap;
10931093
mparams.use_mlock = params.use_mlock;
10941094
mparams.check_tensors = params.check_tensors;
1095+
mparams.no_byteswap = params.no_byteswap;
10951096
if (params.kv_overrides.empty()) {
10961097
mparams.kv_overrides = NULL;
10971098
} else {
@@ -1418,8 +1419,9 @@ struct llama_model * common_load_model_from_url(
14181419
int n_split = 0;
14191420
{
14201421
struct gguf_init_params gguf_params = {
1421-
/*.no_alloc = */ true,
1422-
/*.ctx = */ NULL,
1422+
/*.no_alloc = */ true,
1423+
/*.ctx = */ NULL,
1424+
/*.no_byteswap = */ false,
14231425
};
14241426
auto * ctx_gguf = gguf_init_from_file(local_path.c_str(), gguf_params);
14251427
if (!ctx_gguf) {
@@ -2063,8 +2065,9 @@ static common_control_vector_data common_control_vector_load_one(const common_co
20632065

20642066
ggml_context * ctx = nullptr;
20652067
struct gguf_init_params meta_gguf_params = {
2066-
/* .no_alloc = */ false,
2067-
/* .ctx = */ &ctx,
2068+
/* .no_alloc = */ false,
2069+
/* .ctx = */ &ctx,
2070+
/* .no_byteswap = */ false,
20682071
};
20692072
struct gguf_context * ctx_gguf = gguf_init_from_file(load_info.fname.c_str(), meta_gguf_params);
20702073
if (!ctx_gguf) {

common/common.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -307,6 +307,7 @@ struct common_params {
307307
bool no_kv_offload = false; // disable KV offloading
308308
bool warmup = true; // warmup run
309309
bool check_tensors = false; // validate tensor data
310+
bool no_byteswap = false; // skip byteswapping on big endian systems
310311

311312
ggml_type cache_type_k = GGML_TYPE_F16; // KV cache data type for the K
312313
ggml_type cache_type_v = GGML_TYPE_F16; // KV cache data type for the V

examples/convert-llama2c-to-ggml/convert-llama2c-to-ggml.cpp

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -533,8 +533,9 @@ static void load_vocab(const char * filename, const Config * config, struct my_l
533533
struct ggml_context * ctx_data = NULL;
534534

535535
struct gguf_init_params params = {
536-
/*.no_alloc = */ false,
537-
/*.ctx = */ &ctx_data,
536+
/*.no_alloc = */ false,
537+
/*.ctx = */ &ctx_data,
538+
/*.no_byteswap = */ false,
538539
};
539540

540541
struct gguf_context * ctx = gguf_init_from_file(filename, params);

examples/export-lora/export-lora.cpp

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -48,8 +48,9 @@ static std::string ggml_ne_string(const ggml_tensor * t) {
4848

4949
static struct gguf_context * load_gguf(std::string & fname, struct ggml_context ** ctx_ggml) {
5050
struct gguf_init_params params = {
51-
/*.no_alloc = */ true,
52-
/*.ctx = */ ctx_ggml,
51+
/*.no_alloc = */ true,
52+
/*.ctx = */ ctx_ggml,
53+
/*.no_byteswap = */ false,
5354
};
5455
struct gguf_context * ctx_gguf = gguf_init_from_file(fname.c_str(), params);
5556
if (!ctx_gguf) {

examples/gguf-hash/gguf-hash.cpp

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -288,8 +288,9 @@ static hash_exit_code_t gguf_hash(const hash_params & hash_params) {
288288
struct ggml_context * ctx_data = NULL;
289289

290290
struct gguf_init_params params = {
291-
/*.no_alloc = */ false,
292-
/*.ctx = */ &ctx_data,
291+
/*.no_alloc = */ false,
292+
/*.ctx = */ &ctx_data,
293+
/*.no_byteswap = */ false,
293294
};
294295

295296
// xxh64 init

examples/gguf-split/gguf-split.cpp

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -361,8 +361,9 @@ static void gguf_split(const split_params & split_params) {
361361
struct ggml_context * ctx_meta = NULL;
362362

363363
struct gguf_init_params params = {
364-
/*.no_alloc = */ true,
365-
/*.ctx = */ &ctx_meta,
364+
/*.no_alloc = */ true,
365+
/*.ctx = */ &ctx_meta,
366+
/*.no_byteswap = */ false,
366367
};
367368

368369
std::ifstream f_input(split_params.input.c_str(), std::ios::binary);
@@ -426,8 +427,9 @@ static void gguf_merge(const split_params & split_params) {
426427
struct ggml_context * ctx_meta = NULL;
427428

428429
struct gguf_init_params params = {
429-
/*.no_alloc = */ true,
430-
/*.ctx = */ &ctx_meta,
430+
/*.no_alloc = */ true,
431+
/*.ctx = */ &ctx_meta,
432+
/*.no_byteswap = */ false,
431433
};
432434

433435
if (i_split > 0) {

examples/gguf/gguf.cpp

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -85,8 +85,9 @@ static bool gguf_ex_write(const std::string & fname) {
8585
// just read tensor info
8686
static bool gguf_ex_read_0(const std::string & fname) {
8787
struct gguf_init_params params = {
88-
/*.no_alloc = */ false,
89-
/*.ctx = */ NULL,
88+
/*.no_alloc = */ false,
89+
/*.ctx = */ NULL,
90+
/*.no_byteswap = */ false,
9091
};
9192

9293
struct gguf_context * ctx = gguf_init_from_file(fname.c_str(), params);
@@ -151,8 +152,9 @@ static bool gguf_ex_read_1(const std::string & fname, bool check_data) {
151152
struct ggml_context * ctx_data = NULL;
152153

153154
struct gguf_init_params params = {
154-
/*.no_alloc = */ false,
155-
/*.ctx = */ &ctx_data,
155+
/*.no_alloc = */ false,
156+
/*.ctx = */ &ctx_data,
157+
/*.no_byteswap = */ false,
156158
};
157159

158160
struct gguf_context * ctx = gguf_init_from_file(fname.c_str(), params);

examples/llava/clip.cpp

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1122,8 +1122,9 @@ struct clip_ctx * clip_model_load(const char * fname, const int verbosity = 1) {
11221122
struct ggml_context * meta = NULL;
11231123

11241124
struct gguf_init_params params = {
1125-
/*.no_alloc = */ true,
1126-
/*.ctx = */ &meta,
1125+
/*.no_alloc = */ true,
1126+
/*.ctx = */ &meta,
1127+
/*.no_byteswap = */ false,
11271128
};
11281129

11291130
struct gguf_context * ctx = gguf_init_from_file(fname, params);

ggml/include/gguf.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -74,6 +74,8 @@ extern "C" {
7474

7575
// if not NULL, create a ggml_context and allocate the tensor data in it
7676
struct ggml_context ** ctx;
77+
78+
bool no_byteswap;
7779
};
7880

7981
GGML_API struct gguf_context * gguf_init_empty(void);

0 commit comments

Comments
 (0)