@@ -21,7 +21,7 @@ index 2874600..01882de 100644
2121 #endif
2222
2323diff --git a/ggml.c b/ggml.c
24- index beb7f46..fe53eb7 100644
24+ index 44c43b4..0061fab 100644
2525--- a/ggml.c
2626+++ b/ggml.c
2727@@ -9,7 +9,7 @@
@@ -76,18 +76,20 @@ index 6348fce..756fee1 100644
7676 #endif
7777 #endif
7878diff --git a/llama-util.h b/llama-util.h
79- index 75e19c5..23c2cc3 100644
79+ index 75e19c5..e80d755 100644
8080--- a/llama-util.h
8181+++ b/llama-util.h
82- @@ -15,6 +15,7 @@
82+ @@ -15,6 +15,9 @@
8383 #include <string>
8484 #include <vector>
8585 #include <stdexcept>
86+ + #ifdef __MVS__
8687+ #include <sys/endian.h>
88+ + #endif
8789
8890 #ifdef __has_include
8991 #if __has_include(<unistd.h>)
90- @@ -28,6 +29 ,42 @@
92+ @@ -28,6 +31 ,42 @@
9193 #endif
9294 #endif
9395
@@ -130,31 +132,31 @@ index 75e19c5..23c2cc3 100644
130132 #if defined(_WIN32)
131133 #define WIN32_LEAN_AND_MEAN
132134 #ifndef NOMINMAX
133- @@ -118,6 +155 ,9 @@ struct llama_file {
135+ @@ -118,6 +157 ,9 @@ struct llama_file {
134136 std::uint32_t read_u32() {
135137 std::uint32_t ret;
136138 read_raw(&ret, sizeof(ret));
137139+ #ifdef BIG_ENDIAN
138- + ret = LITTLE_TO_BIG_32(ret);
140+ + ret = LITTLE_TO_BIG_32(ret);
139141+ #endif
140142 return ret;
141143 }
142144
143- @@ -470,6 +510 ,13 @@ struct llama_buffer {
145+ @@ -470,6 +512 ,13 @@ struct llama_buffer {
144146 void resize(size_t len) {
145147 #ifdef GGML_USE_METAL
146148 free(addr);
147149+ #ifdef __MVS__
148150+ addr = malloc(len);
149- + result = 0;
151+ + int result = 0;
150152+ if (addr == NULL)
151153+ result = errno;
152154+ memset(addr, 0, len);
153155+ #else
154156 int result = posix_memalign((void **) &addr, getpagesize(), len);
155157 if (result == 0) {
156158 memset(addr, 0, len);
157- @@ -477,6 +524 ,7 @@ struct llama_buffer {
159+ @@ -477,6 +526 ,7 @@ struct llama_buffer {
158160 else {
159161 addr = NULL;
160162 }
@@ -163,7 +165,7 @@ index 75e19c5..23c2cc3 100644
163165 delete[] addr;
164166 addr = new uint8_t[len];
165167diff --git a/llama.cpp b/llama.cpp
166- index 3452439..b39fbcc 100644
168+ index f2cbe76..c25e480 100644
167169--- a/llama.cpp
168170+++ b/llama.cpp
169171@@ -574,6 +574,9 @@ struct llama_file_loader {
@@ -181,10 +183,10 @@ index 3452439..b39fbcc 100644
181183 tensor.ne.resize(n_dims);
182184 file.read_raw(tensor.ne.data(), sizeof(tensor.ne[0]) * n_dims);
183185+ #ifdef BIG_ENDIAN
184- + // Convert the tensor data from little endian to big endian
186+ + // Convert the tensor metadata from little endian to big endian
185187+ for (size_t i = 0; i < n_dims; ++i) {
186- + uint32_t* element = reinterpret_cast<uint32_t*>(tensor.ne.data() + i);
187- + *element = LITTLE_TO_BIG_32(*element);
188+ + uint32_t* element = reinterpret_cast<uint32_t*>(tensor.ne.data() + i);
189+ + *element = LITTLE_TO_BIG_32(*element);
188190+ }
189191+ #endif
190192 std::string name = file.read_string(name_len);
@@ -200,7 +202,7 @@ index 3452439..b39fbcc 100644
200202 this->use_mmap = use_mmap;
201203 }
202204
203- @@ -833,10 +846,26 @@ struct llama_model_loader {
205+ @@ -833,10 +846,28 @@ struct llama_model_loader {
204206 llama_file & file = file_loader->file;
205207 file.seek(lt.file_off, SEEK_SET);
206208 file.read_raw(lt.data, lt.size);
@@ -209,19 +211,21 @@ index 3452439..b39fbcc 100644
209211- if (0) {
210212- print_checksum(lt);
211213+ #ifdef BIG_ENDIAN
214+ +
212215+ #define QK4_0 32
213- + typedef struct {
214- + ggml_fp16_t d; // delta
215- + uint8_t qs[QK4_0 / 2]; // nibbles / quants
216- + } block_q4_0;
216+ + typedef struct {
217+ + ggml_fp16_t d; // delta
218+ + uint8_t qs[QK4_0 / 2]; // nibbles / quants
219+ + } block_q4_0;
217220+
218221+ // Convert the tensor data from little endian to big endian
219- + if (lt.type == 0 )
222+ + if (lt.type == GGML_TYPE_F32 )
220223+ for (size_t i = 0; i < (lt.size/4); ++i) {
221224+ uint32_t* element = reinterpret_cast<uint32_t*>(lt.data) + i;
222225+ *element = LITTLE_TO_BIG_32 (*element);
223226+ }
224- + if (lt.type == 2)
227+ +
228+ + if (lt.type == GGML_TYPE_Q4_0)
225229+ for (size_t i = 0; i < (lt.size/sizeof(block_q4_0)); ++i) {
226230+ block_q4_0* element = reinterpret_cast<block_q4_0*>(lt.data) + i;
227231+ element->d = ReverseShort (element->d);
0 commit comments