Make byteswapping opt-in

AlekseiNikiforovIBM · AlekseiNikiforovIBM · commit 3bddc6a7697a · 2025-03-11T10:41:10.000+01:00
Enable it for llama.cpp
diff --git a/common/common.cpp b/common/common.cpp
@@ -1418,8 +1418,9 @@ struct llama_model * common_load_model_from_url(
     int n_split = 0;
     {
         struct gguf_init_params gguf_params = {
-            /*.no_alloc = */ true,
-            /*.ctx      = */ NULL,
+            /*.no_alloc           = */ true,
+            /*.ctx                = */ NULL,
+            /*.allow_byteswapping = */ true,
         };
         auto * ctx_gguf = gguf_init_from_file(local_path.c_str(), gguf_params);
         if (!ctx_gguf) {
@@ -2063,8 +2064,9 @@ static common_control_vector_data common_control_vector_load_one(const common_co
 
     ggml_context * ctx = nullptr;
     struct gguf_init_params meta_gguf_params = {
-        /* .no_alloc = */ false,
-        /* .ctx      = */ &ctx,
+        /* .no_alloc           = */ false,
+        /* .ctx                = */ &ctx,
+        /* .allow_byteswapping = */ true,
     };
     struct gguf_context * ctx_gguf = gguf_init_from_file(load_info.fname.c_str(), meta_gguf_params);
     if (!ctx_gguf) {
diff --git a/examples/convert-llama2c-to-ggml/convert-llama2c-to-ggml.cpp b/examples/convert-llama2c-to-ggml/convert-llama2c-to-ggml.cpp
@@ -533,8 +533,9 @@ static void load_vocab(const char * filename, const Config * config, struct my_l
         struct ggml_context * ctx_data = NULL;
 
         struct gguf_init_params params = {
-            /*.no_alloc = */ false,
-            /*.ctx      = */ &ctx_data,
+            /*.no_alloc           = */ false,
+            /*.ctx                = */ &ctx_data,
+            /*.allow_byteswapping = */ true,
         };
 
         struct gguf_context * ctx = gguf_init_from_file(filename, params);
diff --git a/examples/export-lora/export-lora.cpp b/examples/export-lora/export-lora.cpp
@@ -48,8 +48,9 @@ static std::string ggml_ne_string(const ggml_tensor * t) {
 
 static struct gguf_context * load_gguf(std::string & fname, struct ggml_context ** ctx_ggml) {
     struct gguf_init_params params = {
-        /*.no_alloc = */ true,
-        /*.ctx      = */ ctx_ggml,
+        /*.no_alloc           = */ true,
+        /*.ctx                = */ ctx_ggml,
+        /*.allow_byteswapping = */ true,
     };
     struct gguf_context * ctx_gguf = gguf_init_from_file(fname.c_str(), params);
     if (!ctx_gguf) {
diff --git a/examples/gguf-hash/gguf-hash.cpp b/examples/gguf-hash/gguf-hash.cpp
@@ -288,8 +288,9 @@ static hash_exit_code_t gguf_hash(const hash_params & hash_params) {
     struct ggml_context * ctx_data = NULL;
 
     struct gguf_init_params params = {
-        /*.no_alloc = */ false,
-        /*.ctx      = */ &ctx_data,
+        /*.no_alloc           = */ false,
+        /*.ctx                = */ &ctx_data,
+        /*.allow_byteswapping = */ true,
     };
 
     // xxh64 init
diff --git a/examples/gguf-split/gguf-split.cpp b/examples/gguf-split/gguf-split.cpp
@@ -372,8 +372,9 @@ static void gguf_split(const split_params & split_params) {
     struct ggml_context * ctx_meta = NULL;
 
     struct gguf_init_params params = {
-        /*.no_alloc = */ true,
-        /*.ctx      = */ &ctx_meta,
+        /*.no_alloc           = */ true,
+        /*.ctx                = */ &ctx_meta,
+        /*.allow_byteswapping = */ true,
     };
 
     std::ifstream f_input(split_params.input.c_str(), std::ios::binary);
@@ -437,8 +438,9 @@ static void gguf_merge(const split_params & split_params) {
         struct ggml_context * ctx_meta = NULL;
 
         struct gguf_init_params params = {
-            /*.no_alloc = */ true,
-            /*.ctx      = */ &ctx_meta,
+            /*.no_alloc           = */ true,
+            /*.ctx                = */ &ctx_meta,
+            /*.allow_byteswapping = */ true,
         };
 
         if (i_split > 0) {
diff --git a/examples/gguf/gguf.cpp b/examples/gguf/gguf.cpp
@@ -85,8 +85,9 @@ static bool gguf_ex_write(const std::string & fname) {
 // just read tensor info
 static bool gguf_ex_read_0(const std::string & fname) {
     struct gguf_init_params params = {
-        /*.no_alloc = */ false,
-        /*.ctx      = */ NULL,
+        /*.no_alloc           = */ false,
+        /*.ctx                = */ NULL,
+        /*.allow_byteswapping = */ true,
     };
 
     struct gguf_context * ctx = gguf_init_from_file(fname.c_str(), params);
@@ -151,8 +152,9 @@ static bool gguf_ex_read_1(const std::string & fname, bool check_data) {
     struct ggml_context * ctx_data = NULL;
 
     struct gguf_init_params params = {
-        /*.no_alloc = */ false,
-        /*.ctx      = */ &ctx_data,
+        /*.no_alloc           = */ false,
+        /*.ctx                = */ &ctx_data,
+        /*.allow_byteswapping = */ true,
     };
 
     struct gguf_context * ctx = gguf_init_from_file(fname.c_str(), params);
diff --git a/examples/llava/clip.cpp b/examples/llava/clip.cpp
@@ -1122,8 +1122,9 @@ struct clip_ctx * clip_model_load(const char * fname, const int verbosity = 1) {
     struct ggml_context * meta = NULL;
 
     struct gguf_init_params params = {
-        /*.no_alloc = */ true,
-        /*.ctx      = */ &meta,
+        /*.no_alloc           = */ true,
+        /*.ctx                = */ &meta,
+        /*.allow_byteswapping = */ true,
     };
 
     struct gguf_context * ctx = gguf_init_from_file(fname, params);
diff --git a/ggml/include/gguf.h b/ggml/include/gguf.h
@@ -74,6 +74,8 @@ extern "C" {
 
         // if not NULL, create a ggml_context and allocate the tensor data in it
         struct ggml_context ** ctx;
+
+        bool allow_byteswapping;
     };
 
     GGML_API struct gguf_context * gguf_init_empty(void);
diff --git a/ggml/src/gguf.cpp b/ggml/src/gguf.cpp
@@ -353,7 +353,7 @@ struct gguf_context * gguf_init_from_file_impl(FILE * file, struct gguf_init_par
     int64_t n_tensors = 0;
 
     if (ok && gr.read(ctx->version)) {
-        if (((ctx->version & 0x0000FFFF) == 0) && ((ctx->version & 0xFFFF0000) != 0)) {
+        if ((params.allow_byteswapping) && ((ctx->version & 0x0000FFFF) == 0) && ((ctx->version & 0xFFFF0000) != 0)) {
             // most likely different endianness, do byteswapping
             gr.do_byteswap = true;
             ctx->needs_byteswap = true;
diff --git a/src/llama-adapter.cpp b/src/llama-adapter.cpp
@@ -151,8 +151,9 @@ static void llama_adapter_lora_init_impl(struct llama_model & model, const char
 
     ggml_context * ctx_init;
     struct gguf_init_params meta_gguf_params = {
-        /* .no_alloc = */ true,
-        /* .ctx      = */ &ctx_init,
+        /* .no_alloc           = */ true,
+        /* .ctx                = */ &ctx_init,
+        /* .allow_byteswapping = */ true,
     };
 
     gguf_context_ptr ctx_gguf { gguf_init_from_file(path_lora, meta_gguf_params) };
diff --git a/src/llama-model-loader.cpp b/src/llama-model-loader.cpp
@@ -460,8 +460,9 @@ llama_model_loader::llama_model_loader(
     // Load the main GGUF
     struct ggml_context * ctx = NULL;
     struct gguf_init_params params = {
-        /*.no_alloc = */ true,
-        /*.ctx      = */ &ctx,
+        /*.no_alloc           = */ true,
+        /*.ctx                = */ &ctx,
+        /*.allow_byteswapping = */ true,
     };
 
     meta.reset(gguf_init_from_file(fname.c_str(), params));
@@ -520,8 +521,9 @@ llama_model_loader::llama_model_loader(
             const char * fname_split = splits[idx].c_str();
 
             struct gguf_init_params split_params = {
-                /*.no_alloc = */ true,
-                /*.ctx      = */ &ctx,
+                /*.no_alloc           = */ true,
+                /*.ctx                = */ &ctx,
+                /*.allow_byteswapping = */ true,
             };
             gguf_context_ptr ctx_gguf { gguf_init_from_file(fname_split, split_params) };
             if (!ctx_gguf) {
diff --git a/tests/test-gguf.cpp b/tests/test-gguf.cpp
@@ -707,8 +707,9 @@ static std::pair<int, int> test_handcrafted_file(const unsigned int seed) {
 
         struct ggml_context * ctx = nullptr;
         struct gguf_init_params gguf_params = {
-            /*no_alloc =*/ false,
-            /*ctx      =*/ hft >= offset_has_data ? &ctx : nullptr,
+            /*no_alloc           =*/ false,
+            /*ctx                =*/ hft >= offset_has_data ? &ctx : nullptr,
+            /*allow_byteswapping =*/ true,
         };
 
         struct gguf_context * gguf_ctx = gguf_init_from_file_impl(file, gguf_params);
@@ -1103,8 +1104,9 @@ static std::pair<int, int> test_roundtrip(ggml_backend_dev_t dev, const unsigned
 
     struct ggml_context * ctx_1 = nullptr;
     struct gguf_init_params gguf_params = {
-        /*no_alloc =*/ false,
-        /*ctx      =*/ only_meta ? nullptr : &ctx_1,
+        /*no_alloc           =*/ false,
+        /*ctx                =*/ only_meta ? nullptr : &ctx_1,
+        /*allow_byteswapping =*/ true,
     };
     struct gguf_context * gguf_ctx_1 = gguf_init_from_file_impl(file, gguf_params);