Skip to content

Commit f91b51d

Browse files
Address Haritha's comments
1 parent 1294a61 commit f91b51d

File tree

1 file changed

+24
-20
lines changed

1 file changed

+24
-20
lines changed

patches/PR1.patch

Lines changed: 24 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ index 2874600..01882de 100644
2121
#endif
2222

2323
diff --git a/ggml.c b/ggml.c
24-
index beb7f46..fe53eb7 100644
24+
index 44c43b4..0061fab 100644
2525
--- a/ggml.c
2626
+++ b/ggml.c
2727
@@ -9,7 +9,7 @@
@@ -76,18 +76,20 @@ index 6348fce..756fee1 100644
7676
#endif
7777
#endif
7878
diff --git a/llama-util.h b/llama-util.h
79-
index 75e19c5..23c2cc3 100644
79+
index 75e19c5..e80d755 100644
8080
--- a/llama-util.h
8181
+++ b/llama-util.h
82-
@@ -15,6 +15,7 @@
82+
@@ -15,6 +15,9 @@
8383
#include <string>
8484
#include <vector>
8585
#include <stdexcept>
86+
+#ifdef __MVS__
8687
+#include <sys/endian.h>
88+
+#endif
8789

8890
#ifdef __has_include
8991
#if __has_include(<unistd.h>)
90-
@@ -28,6 +29,42 @@
92+
@@ -28,6 +31,42 @@
9193
#endif
9294
#endif
9395

@@ -130,31 +132,31 @@ index 75e19c5..23c2cc3 100644
130132
#if defined(_WIN32)
131133
#define WIN32_LEAN_AND_MEAN
132134
#ifndef NOMINMAX
133-
@@ -118,6 +155,9 @@ struct llama_file {
135+
@@ -118,6 +157,9 @@ struct llama_file {
134136
std::uint32_t read_u32() {
135137
std::uint32_t ret;
136138
read_raw(&ret, sizeof(ret));
137139
+#ifdef BIG_ENDIAN
138-
+ ret = LITTLE_TO_BIG_32(ret);
140+
+ ret = LITTLE_TO_BIG_32(ret);
139141
+#endif
140142
return ret;
141143
}
142144

143-
@@ -470,6 +510,13 @@ struct llama_buffer {
145+
@@ -470,6 +512,13 @@ struct llama_buffer {
144146
void resize(size_t len) {
145147
#ifdef GGML_USE_METAL
146148
free(addr);
147149
+#ifdef __MVS__
148150
+ addr = malloc(len);
149-
+ result = 0;
151+
+ int result = 0;
150152
+ if (addr == NULL)
151153
+ result = errno;
152154
+ memset(addr, 0, len);
153155
+#else
154156
int result = posix_memalign((void **) &addr, getpagesize(), len);
155157
if (result == 0) {
156158
memset(addr, 0, len);
157-
@@ -477,6 +524,7 @@ struct llama_buffer {
159+
@@ -477,6 +526,7 @@ struct llama_buffer {
158160
else {
159161
addr = NULL;
160162
}
@@ -163,7 +165,7 @@ index 75e19c5..23c2cc3 100644
163165
delete[] addr;
164166
addr = new uint8_t[len];
165167
diff --git a/llama.cpp b/llama.cpp
166-
index 3452439..b39fbcc 100644
168+
index f2cbe76..c25e480 100644
167169
--- a/llama.cpp
168170
+++ b/llama.cpp
169171
@@ -574,6 +574,9 @@ struct llama_file_loader {
@@ -181,10 +183,10 @@ index 3452439..b39fbcc 100644
181183
tensor.ne.resize(n_dims);
182184
file.read_raw(tensor.ne.data(), sizeof(tensor.ne[0]) * n_dims);
183185
+#ifdef BIG_ENDIAN
184-
+ // Convert the tensor data from little endian to big endian
186+
+ // Convert the tensor metadata from little endian to big endian
185187
+ for (size_t i = 0; i < n_dims; ++i) {
186-
+ uint32_t* element = reinterpret_cast<uint32_t*>(tensor.ne.data() + i);
187-
+ *element = LITTLE_TO_BIG_32(*element);
188+
+ uint32_t* element = reinterpret_cast<uint32_t*>(tensor.ne.data() + i);
189+
+ *element = LITTLE_TO_BIG_32(*element);
188190
+ }
189191
+#endif
190192
std::string name = file.read_string(name_len);
@@ -200,7 +202,7 @@ index 3452439..b39fbcc 100644
200202
this->use_mmap = use_mmap;
201203
}
202204

203-
@@ -833,10 +846,26 @@ struct llama_model_loader {
205+
@@ -833,10 +846,28 @@ struct llama_model_loader {
204206
llama_file & file = file_loader->file;
205207
file.seek(lt.file_off, SEEK_SET);
206208
file.read_raw(lt.data, lt.size);
@@ -209,19 +211,21 @@ index 3452439..b39fbcc 100644
209211
- if (0) {
210212
- print_checksum(lt);
211213
+#ifdef BIG_ENDIAN
214+
+
212215
+#define QK4_0 32
213-
+typedef struct {
214-
+ ggml_fp16_t d; // delta
215-
+ uint8_t qs[QK4_0 / 2]; // nibbles / quants
216-
+} block_q4_0;
216+
+ typedef struct {
217+
+ ggml_fp16_t d; // delta
218+
+ uint8_t qs[QK4_0 / 2]; // nibbles / quants
219+
+ } block_q4_0;
217220
+
218221
+ // Convert the tensor data from little endian to big endian
219-
+ if (lt.type == 0)
222+
+ if (lt.type == GGML_TYPE_F32)
220223
+ for (size_t i = 0; i < (lt.size/4); ++i) {
221224
+ uint32_t* element = reinterpret_cast<uint32_t*>(lt.data) + i;
222225
+ *element = LITTLE_TO_BIG_32 (*element);
223226
+ }
224-
+ if (lt.type == 2)
227+
+
228+
+ if (lt.type == GGML_TYPE_Q4_0)
225229
+ for (size_t i = 0; i < (lt.size/sizeof(block_q4_0)); ++i) {
226230
+ block_q4_0* element = reinterpret_cast<block_q4_0*>(lt.data) + i;
227231
+ element->d = ReverseShort (element->d);

0 commit comments

Comments
 (0)