From e2fdf284bdadfba1af55b5ce07b78e174a860f8d Mon Sep 17 00:00:00 2001
From: Aaron Teo <aaron.teo1@ibm.com>
Date: Sat, 31 May 2025 20:59:03 +0800
Subject: [PATCH 1/5] gguf: prevent non-native endian models from being loaded

Signed-off-by: Aaron Teo <aaron.teo1@ibm.com>
---
 ggml/src/gguf.cpp | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/ggml/src/gguf.cpp b/ggml/src/gguf.cpp
index 8667a80bd0685..e2464bd2ffd03 100644
--- a/ggml/src/gguf.cpp
+++ b/ggml/src/gguf.cpp
@@ -347,6 +347,12 @@ struct gguf_context * gguf_init_from_file_impl(FILE * file, struct gguf_init_par
     int64_t n_tensors = 0;
 
     if (ok && gr.read(ctx->version)) {
+        if ((ctx->version & 0xFFFF) == 0x0000) {
+            GGML_LOG_ERROR("%s: host and model endian mismatch, please use a model compiled with the same endian as your host system\n", __func__);
+            gguf_free(ctx);
+            return nullptr;
+        }
+
         if (ctx->version == 1) {
             GGML_LOG_ERROR("%s: GGUFv1 is no longer supported, please use a more up-to-date version\n", __func__);
             ok = false;

From cc1def75cf6f258b900299f515570398644f70a5 Mon Sep 17 00:00:00 2001
From: Aaron Teo <aaron.teo1@ibm.com>
Date: Sat, 31 May 2025 21:02:17 +0800
Subject: [PATCH 2/5] gguf: update error message

Signed-off-by: Aaron Teo <aaron.teo1@ibm.com>
---
 ggml/src/gguf.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/ggml/src/gguf.cpp b/ggml/src/gguf.cpp
index e2464bd2ffd03..0b3a2597dc4f7 100644
--- a/ggml/src/gguf.cpp
+++ b/ggml/src/gguf.cpp
@@ -348,7 +348,7 @@ struct gguf_context * gguf_init_from_file_impl(FILE * file, struct gguf_init_par
 
     if (ok && gr.read(ctx->version)) {
         if ((ctx->version & 0xFFFF) == 0x0000) {
-            GGML_LOG_ERROR("%s: host and model endian mismatch, please use a model compiled with the same endian as your host system\n", __func__);
+            GGML_LOG_ERROR("%s: failed to load model: host and model endian mismatch, please use a model compiled with the same endian as your host system\n", __func__);
             gguf_free(ctx);
             return nullptr;
         }

From 25c971d8ea954f1e5bcfcda76e1d9f7ef8d64b6f Mon Sep 17 00:00:00 2001
From: Aaron Teo <aaron.teo1@ibm.com>
Date: Sun, 1 Jun 2025 17:12:16 +0800
Subject: [PATCH 3/5] gguf: make the non-native endian check more verbose

Signed-off-by: Aaron Teo <aaron.teo1@ibm.com>
---
 ggml/src/gguf.cpp | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/ggml/src/gguf.cpp b/ggml/src/gguf.cpp
index 0b3a2597dc4f7..091483c5e7b55 100644
--- a/ggml/src/gguf.cpp
+++ b/ggml/src/gguf.cpp
@@ -347,7 +347,15 @@ struct gguf_context * gguf_init_from_file_impl(FILE * file, struct gguf_init_par
     int64_t n_tensors = 0;
 
     if (ok && gr.read(ctx->version)) {
-        if ((ctx->version & 0xFFFF) == 0x0000) {
+        /*
+         * bit layout is different when reading non-native endian models.
+         * assuming that the GGUF version is 3, the non-native endian model
+         * would read it as 0x30000000. we can use the AND operation against
+         * the last 4 hexadecimal digit to check if the model is the same
+         * endianness as the host system.
+        */
+        GGML_ASSERT(ctx->version > 0 && ctx->version <= 65535);
+        if ((ctx->version & 0x0000FFFF) == 0x00000000) {
             GGML_LOG_ERROR("%s: failed to load model: host and model endian mismatch, please use a model compiled with the same endian as your host system\n", __func__);
             gguf_free(ctx);
             return nullptr;

From 65bf0629bf2d0f2dfdf81bd5fd3ad52ae1e5ee2a Mon Sep 17 00:00:00 2001
From: Aaron Teo <aaron.teo1@ibm.com>
Date: Sun, 1 Jun 2025 17:19:18 +0800
Subject: [PATCH 4/5] ggml: move ggml_assert location

Signed-off-by: Aaron Teo <aaron.teo1@ibm.com>
---
 ggml/src/gguf.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/ggml/src/gguf.cpp b/ggml/src/gguf.cpp
index 091483c5e7b55..b534c5068c29b 100644
--- a/ggml/src/gguf.cpp
+++ b/ggml/src/gguf.cpp
@@ -354,13 +354,13 @@ struct gguf_context * gguf_init_from_file_impl(FILE * file, struct gguf_init_par
          * the last 4 hexadecimal digit to check if the model is the same
          * endianness as the host system.
         */
-        GGML_ASSERT(ctx->version > 0 && ctx->version <= 65535);
         if ((ctx->version & 0x0000FFFF) == 0x00000000) {
             GGML_LOG_ERROR("%s: failed to load model: host and model endian mismatch, please use a model compiled with the same endian as your host system\n", __func__);
             gguf_free(ctx);
             return nullptr;
         }
 
+        GGML_ASSERT(ctx->version > 0 && ctx->version <= 65535);
         if (ctx->version == 1) {
             GGML_LOG_ERROR("%s: GGUFv1 is no longer supported, please use a more up-to-date version\n", __func__);
             ok = false;

From c83208fbdcf5231832c03f00360ef04beef21d8c Mon Sep 17 00:00:00 2001
From: Aaron Teo <aaron.teo1@ibm.com>
Date: Sun, 1 Jun 2025 20:43:46 +0800
Subject: [PATCH 5/5] ggml: reword the endianness check error message

Signed-off-by: Aaron Teo <aaron.teo1@ibm.com>
---
 ggml/src/gguf.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/ggml/src/gguf.cpp b/ggml/src/gguf.cpp
index b534c5068c29b..dab228e1ea32a 100644
--- a/ggml/src/gguf.cpp
+++ b/ggml/src/gguf.cpp
@@ -351,11 +351,11 @@ struct gguf_context * gguf_init_from_file_impl(FILE * file, struct gguf_init_par
          * bit layout is different when reading non-native endian models.
          * assuming that the GGUF version is 3, the non-native endian model
          * would read it as 0x30000000. we can use the AND operation against
-         * the last 4 hexadecimal digit to check if the model is the same
+         * the last 4 hexadecimal digits to check if the model is the same
          * endianness as the host system.
         */
         if ((ctx->version & 0x0000FFFF) == 0x00000000) {
-            GGML_LOG_ERROR("%s: failed to load model: host and model endian mismatch, please use a model compiled with the same endian as your host system\n", __func__);
+            GGML_LOG_ERROR("%s: failed to load model: this GGUF file version %" PRIu32 " is extremely large, is there a mismatch between the host and model endianness?\n", __func__, ctx->version);
             gguf_free(ctx);
             return nullptr;
         }