Implemented fixes suggested in PR review

VyoJ · VyoJ · commit 5d337b7c768e · 2025-08-01T08:29:23.000-05:00
diff --git a/README.md b/README.md
@@ -2,7 +2,7 @@
 
 # llama.cpp
 
-Enable AI inferencing on z/os
+Enable AI inferencing on z/OS
 
 # Installation and Usage
 
@@ -27,8 +27,9 @@ See the [zopen porting guide](https://zopen.community/#/Guides/Porting) for more
 
 # Documentation
 
-
 # Troubleshooting
 
+While building if an error is encountered in the `ggml-cpu.cpp` file (perhaps related to pthread), run `zopen upgrade zoslib -y` and try building again.
+
 # Contributing
-Contributions are welcome! Please follow the [zopen contribution guidelines](https://github.com/zopencommunity/meta/blob/main/CONTRIBUTING.md).
+Contributions are welcome! Please follow the [zopen contribution guidelines](https://github.com/zopencommunity/meta/blob/main/CONTRIBUTING.md).
diff --git a/buildenv b/buildenv
@@ -7,7 +7,7 @@ export ZOPEN_DEV_TAG="b6027"
 export ZOPEN_NAME="llamacpp-master"
 export ZOPEN_RUNTIME_DEPS="ncurses"
 
-export ZOPEN_SKIP_ZOSLIB_ENV_HOOK=1
+# export ZOPEN_SKIP_ZOSLIB_ENV_HOOK=1
 
 rm -f "llama"
 ln -s "llama.cpp" "llama"
diff --git a/patches/repack.cpp.patch b/patches/repack.cpp.patch
@@ -1,69 +1,20 @@
 diff --git a/ggml/src/ggml-cpu/repack.cpp b/ggml/src/ggml-cpu/repack.cpp
-index 74c1c029..134b6daa 100644
+index 74c1c02..604bd17 100644
 --- a/ggml/src/ggml-cpu/repack.cpp
 +++ b/ggml/src/ggml-cpu/repack.cpp
-@@ -1424,35 +1424,37 @@ static const ggml::cpu::tensor_traits * ggml_repack_get_optimal_repack_type(cons
+@@ -1424,6 +1424,7 @@ static const ggml::cpu::tensor_traits * ggml_repack_get_optimal_repack_type(cons
      // instance for IQ4
      static const ggml::cpu::repack::tensor_traits<block_iq4_nl, 4, 4, GGML_TYPE_Q8_0> iq4_nl_4x4_q8_0;
  
--    if (cur->type == GGML_TYPE_Q4_0) {
--        if (ggml_cpu_has_avx2() || (ggml_cpu_has_sve() && ggml_cpu_has_matmul_int8() && ggml_cpu_get_sve_cnt() == QK8_0)) {
--            if (cur->ne[1] % 8 == 0) {
--                return &q4_0_8x8_q8_0;
--            }
--        }
--        if (ggml_cpu_has_neon() && ggml_cpu_has_matmul_int8()) {
--            if (cur->ne[1] % 4 == 0) {
--                return &q4_0_4x8_q8_0;
--            }
--        }
--        if (ggml_cpu_has_neon() && ggml_cpu_has_dotprod()) {
--            if (cur->ne[1] % 4 == 0) {
--                return &q4_0_4x4_q8_0;
--            }
--        }
--    } else if (cur->type == GGML_TYPE_Q4_K) {
--        if (ggml_cpu_has_avx2()) {
--            if (cur->ne[1] % 8 == 0) {
--                return &q4_K_8x8_q8_K;
--            }
--        }
--    } else if (cur->type == GGML_TYPE_IQ4_NL) {
--        if (ggml_cpu_has_neon() && ggml_cpu_has_dotprod()) {
--            if (cur->ne[1] % 4 == 0) {
--                return &iq4_nl_4x4_q8_0;
--            }
--        }
--    }
-+    // if (cur->type == GGML_TYPE_Q4_0) {
-+    //     if (ggml_cpu_has_avx2() || (ggml_cpu_has_sve() && ggml_cpu_has_matmul_int8() && ggml_cpu_get_sve_cnt() == QK8_0)) {
-+    //         if (cur->ne[1] % 8 == 0) {
-+    //             return &q4_0_8x8_q8_0;
-+    //         }
-+    //     }
-+    //     if (ggml_cpu_has_neon() && ggml_cpu_has_matmul_int8()) {
-+    //         if (cur->ne[1] % 4 == 0) {
-+    //             return &q4_0_4x8_q8_0;
-+    //         }
-+    //     }
-+    //     if (ggml_cpu_has_neon() && ggml_cpu_has_dotprod()) {
-+    //         if (cur->ne[1] % 4 == 0) {
-+    //             return &q4_0_4x4_q8_0;
-+    //         }
-+    //     }
-+    // } else if (cur->type == GGML_TYPE_Q4_K) {
-+    //     if (ggml_cpu_has_avx2()) {
-+    //         if (cur->ne[1] % 8 == 0) {
-+    //             return &q4_K_8x8_q8_K;
-+    //         }
-+    //     }
-+    // } else if (cur->type == GGML_TYPE_IQ4_NL) {
-+    //     if (ggml_cpu_has_neon() && ggml_cpu_has_dotprod()) {
-+    //         if (cur->ne[1] % 4 == 0) {
-+    //             return &iq4_nl_4x4_q8_0;
-+    //         }
-+    //     }
-+    // }
++#ifndef __MVS__
+     if (cur->type == GGML_TYPE_Q4_0) {
+         if (ggml_cpu_has_avx2() || (ggml_cpu_has_sve() && ggml_cpu_has_matmul_int8() && ggml_cpu_get_sve_cnt() == QK8_0)) {
+             if (cur->ne[1] % 8 == 0) {
+@@ -1453,6 +1454,9 @@ static const ggml::cpu::tensor_traits * ggml_repack_get_optimal_repack_type(cons
+             }
+         }
+     }
++#endif
 +
 +    GGML_LOG_DEBUG("REPACK DEBUG: %s: Repacking disabled for tensor '%s' (type %s)\n",  __func__, ggml_get_name(cur) ? ggml_get_name(cur) : "unnamed", ggml_type_name(cur->type));