Address Haritha's comments

IgorTodorovskiIBM · IgorTodorovskiIBM · commit f91b51d76496 · 2023-08-22T10:39:21.000-04:00
diff --git a/patches/PR1.patch b/patches/PR1.patch
@@ -21,7 +21,7 @@ index 2874600..01882de 100644
  #endif
  
 diff --git a/ggml.c b/ggml.c
-index beb7f46..fe53eb7 100644
+index 44c43b4..0061fab 100644
 --- a/ggml.c
 +++ b/ggml.c
 @@ -9,7 +9,7 @@
@@ -76,18 +76,20 @@ index 6348fce..756fee1 100644
  #endif
  #endif
 diff --git a/llama-util.h b/llama-util.h
-index 75e19c5..23c2cc3 100644
+index 75e19c5..e80d755 100644
 --- a/llama-util.h
 +++ b/llama-util.h
-@@ -15,6 +15,7 @@
+@@ -15,6 +15,9 @@
  #include <string>
  #include <vector>
  #include <stdexcept>
++#ifdef __MVS__
 +#include <sys/endian.h>
++#endif
  
  #ifdef __has_include
      #if __has_include(<unistd.h>)
-@@ -28,6 +29,42 @@
+@@ -28,6 +31,42 @@
      #endif
  #endif
  
@@ -130,31 +132,31 @@ index 75e19c5..23c2cc3 100644
  #if defined(_WIN32)
      #define WIN32_LEAN_AND_MEAN
      #ifndef NOMINMAX
-@@ -118,6 +155,9 @@ struct llama_file {
+@@ -118,6 +157,9 @@ struct llama_file {
      std::uint32_t read_u32() {
          std::uint32_t ret;
          read_raw(&ret, sizeof(ret));
 +#ifdef BIG_ENDIAN
-+	    	ret = LITTLE_TO_BIG_32(ret);
++        ret = LITTLE_TO_BIG_32(ret);
 +#endif
          return ret;
      }
  
-@@ -470,6 +510,13 @@ struct llama_buffer {
+@@ -470,6 +512,13 @@ struct llama_buffer {
      void resize(size_t len) {
  #ifdef GGML_USE_METAL
          free(addr);
 +#ifdef __MVS__
 +        addr = malloc(len);
-+        result = 0;
++        int result = 0;
 +        if (addr == NULL)
 +          result = errno;
 +        memset(addr, 0, len);
 +#else
          int result = posix_memalign((void **) &addr, getpagesize(), len);
          if (result == 0) {
              memset(addr, 0, len);
-@@ -477,6 +524,7 @@ struct llama_buffer {
+@@ -477,6 +526,7 @@ struct llama_buffer {
          else {
              addr = NULL;
          }
@@ -163,7 +165,7 @@ index 75e19c5..23c2cc3 100644
          delete[] addr;
          addr = new uint8_t[len];
 diff --git a/llama.cpp b/llama.cpp
-index 3452439..b39fbcc 100644
+index f2cbe76..c25e480 100644
 --- a/llama.cpp
 +++ b/llama.cpp
 @@ -574,6 +574,9 @@ struct llama_file_loader {
@@ -181,10 +183,10 @@ index 3452439..b39fbcc 100644
              tensor.ne.resize(n_dims);
              file.read_raw(tensor.ne.data(), sizeof(tensor.ne[0]) * n_dims);
 +#ifdef BIG_ENDIAN
-+            // Convert the tensor data from little endian to big endian
++            // Convert the tensor metadata from little endian to big endian
 +            for (size_t i = 0; i < n_dims; ++i) {
-+                  uint32_t* element = reinterpret_cast<uint32_t*>(tensor.ne.data() + i);
-+                  *element = LITTLE_TO_BIG_32(*element);
++              uint32_t* element = reinterpret_cast<uint32_t*>(tensor.ne.data() + i);
++              *element = LITTLE_TO_BIG_32(*element);
 +            }    
 +#endif
              std::string name = file.read_string(name_len);
@@ -200,7 +202,7 @@ index 3452439..b39fbcc 100644
          this->use_mmap = use_mmap;
      }
  
-@@ -833,10 +846,26 @@ struct llama_model_loader {
+@@ -833,10 +846,28 @@ struct llama_model_loader {
              llama_file & file = file_loader->file;
              file.seek(lt.file_off, SEEK_SET);
              file.read_raw(lt.data, lt.size);
@@ -209,19 +211,21 @@ index 3452439..b39fbcc 100644
 -        if (0) {
 -            print_checksum(lt);
 +#ifdef BIG_ENDIAN
++
 +#define QK4_0 32
-+typedef struct {
-+    ggml_fp16_t d;          // delta
-+    uint8_t qs[QK4_0 / 2];  // nibbles / quants
-+} block_q4_0;
++            typedef struct {
++                ggml_fp16_t d;          // delta
++                uint8_t qs[QK4_0 / 2];  // nibbles / quants
++            } block_q4_0;
 +
 +            // Convert the tensor data from little endian to big endian
-+            if (lt.type == 0) 
++            if (lt.type == GGML_TYPE_F32) 
 +                for (size_t i = 0; i < (lt.size/4); ++i) {
 +                      uint32_t* element = reinterpret_cast<uint32_t*>(lt.data) + i;
 +                      *element = LITTLE_TO_BIG_32 (*element);
 +                }    
-+            if (lt.type == 2) 
++
++            if (lt.type == GGML_TYPE_Q4_0) 
 +                for (size_t i = 0; i < (lt.size/sizeof(block_q4_0)); ++i) {
 +                      block_q4_0* element = reinterpret_cast<block_q4_0*>(lt.data) + i;
 +                      element->d = ReverseShort (element->d);