zopencommunity · IgorTodorovskiIBM · Aug 6, 2025 · Jul 29, 2025 · Jul 30, 2025 · Jul 31, 2025
diff --git a/README.md b/README.md
@@ -2,7 +2,7 @@
 
 # llama.cpp
 
-Enable AI inferencing on z/os
+Enable AI inferencing on z/OS
 
 # Installation and Usage
 
@@ -27,8 +27,9 @@ See the [zopen porting guide](https://zopen.community/#/Guides/Porting) for more
 
 # Documentation
 
-
 # Troubleshooting
 
+While building if an error is encountered in the `ggml-cpu.cpp` file (perhaps related to pthread), run `zopen upgrade zoslib -y` and try building again.
+
 # Contributing
-Contributions are welcome! Please follow the [zopen contribution guidelines](https://github.com/zopencommunity/meta/blob/main/CONTRIBUTING.md).
+Contributions are welcome! Please follow the [zopen contribution guidelines](https://github.com/zopencommunity/meta/blob/main/CONTRIBUTING.md).
diff --git a/buildenv b/buildenv
@@ -3,21 +3,23 @@ export ZOPEN_STABLE_DEPS="zoslib make cmake curl"
 export ZOPEN_DEV_URL="https://github.com/ggerganov/llama.cpp.git"
 export ZOPEN_DEV_DEPS="zoslib make cmake curl openssl libssh2 zlib libpsl"
 export ZOPEN_CATEGORIES="ai"
-export ZOPEN_DEV_TAG="master"
+export ZOPEN_DEV_TAG="b6027"
 export ZOPEN_NAME="llamacpp-master"
 export ZOPEN_RUNTIME_DEPS="ncurses"
 
-# rm -f "llama"
-# ln -s "llama.cpp" "llama"
-# ln -s "llama.cpp" $ZOPEN_NAME
+# export ZOPEN_SKIP_ZOSLIB_ENV_HOOK=1
+
+rm -f "llama"
+ln -s "llama.cpp" "llama"
+ln -s "llama.cpp" $ZOPEN_NAME
 
 export ZOPEN_COMP="CLANG"
 # set env variables
 # export CURL_HOME="/data/zopen/usr/local/zopen/curl/curl"
-# export BLAS_HOME="/usr/lpp/cbclib"
+export BLAS_HOME="/usr/lpp/cbclib"
 
 export ZOPEN_CONFIGURE="cmake"
-export ZOPEN_CONFIGURE_OPTS="-B ../build --install-prefix \"\$ZOPEN_INSTALL_DIR/\" -DCURL_LIBRARY=\$CURL_HOME/lib/libcurl.a -DCURL_INCLUDE_DIR=\$CURL_HOME/include -DBUILD_SHARED_LIBS_DEFAULT=OFF -DBUILD_SHARED_LIBS=OFF -DGGML_STATIC=ON -DGGML_BACKEND_DL=OFF -DGGML_BLAS=ON -DGGML_BLAS_VENDOR=OpenBLAS -DBLAS_INCLUDE_DIRS=\$BLAS_HOME/include/openblas -DBLAS_LIBRARIES=\$BLAS_HOME/lib/libopenblas.so -DLLAMA_BUILD_TESTS=ON ."
+export ZOPEN_CONFIGURE_OPTS="-B ../build --install-prefix \"\$ZOPEN_INSTALL_DIR/\" -DCMAKE_C_FLAGS=\"-fzvector -m64 -march=z15 -lmass.arch13\" -DCMAKE_C_STANDARD=11 -DCMAKE_C_STANDARD_REQUIRED=ON -DCMAKE_C_EXTENSIONS=OFF -DCMAKE_BUILD_TYPE=Release -DLLAMA_SERVER_SSL=ON -DOPENSSL_ROOT_DIR=\$OPENSSL_HOME -DCURL_LIBRARY=\$CURL_HOME/lib/libcurl.a -DCURL_INCLUDE_DIR=\$CURL_HOME/include -DBUILD_SHARED_LIBS_DEFAULT=OFF -DBUILD_SHARED_LIBS=OFF -DGGML_STATIC=ON -DGGML_BACKEND_DL=OFF -DGGML_OPENBLAS=ON -DGGML_BLAS_VENDOR=OpenBLAS -DBLAS_INCLUDE_DIRS=/usr/lpp/cbclib/include/openblas -DBLAS_LIBRARIES=/usr/lpp/cbclib/lib/libopenblas.so -DLLAMA_BUILD_TESTS=ON -DLLAMA_CURL=ON ."
 
 export ZOPEN_MAKE="cmake"
 export ZOPEN_MAKE_OPTS="--build ../build --parallel \$ZOPEN_NUM_JOBS --config Release"
@@ -39,10 +41,10 @@ zopen_check_results()
   dir="$1"
   pfx="$2"
   chk="$1/$2_check.log"
-
+  
   if [[ -f "$chk" ]]; then
-    total=$(grep -cE "Test #[0-9]+" "$chk")
-    failed=$(grep -cE "Failed|Subprocess aborted" "$chk")
+    total=$(grep -cE "Test +#" "$chk")
+    failed=$(grep -cE "\*\*\*Failed|Subprocess aborted\*\*\*" "$chk")
     skipped=$(grep -c "Skipped" "$chk")
     passed=$((total - failed - skipped))
   else
@@ -57,15 +59,32 @@ zopen_check_results()
   echo "actualPassed:$passed"
   echo "actualSkipped:$skipped"
   echo "totalTests:$total"
-  echo "expectedFailures:0"
+  echo "expectedFailures:3"
   echo "expectedTotalTests:$total"
 }
 
-zopen_append_to_env()
+zopen_pre_check()
 {
-  # echo envars outside of PATH, MANPATH, LIBPATH
+# unset SSL_CERT_FILE
+unset SSL_CERT_PATH
+export SSL_CERT_PATH=$(curl-config --ca)
+export CFLAGS="$CFLAGS -march=z15 -mzvector"
+export CPPFLAGS="$CPPFLAGS -march=z15"
 }
 
+# zopen_append_to_env()
+# {
+#  export SSL_CERT_PATH=$(curl-config --ca)
+#  # echo envars outside of PATH, MANPATH, LIBPATH
+# }
+
+# zopen_append_to_zoslib_env()
+# {
+#  cat<<EOF
+#  SSL_CERT_PATH|set|$(curl-config --ca)
+#  EOF
+# }
+
 zopen_append_to_setup()
 {
   # echo commands that will run when installing via setup.sh
@@ -76,4 +95,4 @@ zopen_get_version()
   # Modify to echo the version of your tool/library
   # Rather than hardcoding the version, obtain the version by running the tool/library
   echo "1.0.0"
-}
+}
diff --git a/examples/frontend/src/components/ChatMode.js b/examples/frontend/src/components/ChatMode.js
@@ -139,7 +139,7 @@ const ChatMode = ({ conversationStarted, setConversationStarted }) => {
             onClick={handleChatSubmit}
             disabled={isTyping}
           >
-            â¤
+            Ã¢ÂÂ¤
           </button>
         </div>
       </div>

diff --git a/patches/CMakeLists.txt.patch b/patches/CMakeLists.txt.patch
@@ -0,0 +1,53 @@
+diff --git a/ggml/src/ggml-cpu/CMakeLists.txt b/ggml/src/ggml-cpu/CMakeLists.txt
+index 66a5ad8d..f71c7dec 100644
+--- a/ggml/src/ggml-cpu/CMakeLists.txt
++++ b/ggml/src/ggml-cpu/CMakeLists.txt
+@@ -51,7 +51,7 @@ function(ggml_add_cpu_backend_variant_impl tag_name)
+
+     target_compile_features(${GGML_CPU_NAME} PRIVATE c_std_11 cxx_std_17)
+     target_include_directories(${GGML_CPU_NAME} PRIVATE . ggml-cpu)
+-
++    list(APPEND ARCH_FLAGS -fzvector -m64 -march=z15)
+     if (APPLE AND GGML_ACCELERATE)
+         find_library(ACCELERATE_FRAMEWORK Accelerate)
+         if (ACCELERATE_FRAMEWORK)
+@@ -94,7 +94,7 @@ function(ggml_add_cpu_backend_variant_impl tag_name)
+         target_compile_definitions(${GGML_CPU_NAME} PRIVATE GGML_USE_CPU_HBM)
+
+         target_link_libraries(${GGML_CPU_NAME} PUBLIC memkind)
+-    endif()
++   endif()
+
+     if (GGML_SYSTEM_ARCH STREQUAL "ARM")
+         message(STATUS "ARM detected")
+@@ -463,11 +463,10 @@ function(ggml_add_cpu_backend_variant_impl tag_name)
+             message(WARNING "Unknown target. If you are compiling for z14 and earlier, you might have to add -DGGML_VXE=OFF.")
+             list(APPEND ARCH_FLAGS -march=native -mtune=native)
+         endif()
+-
+         if (GGML_VXE)
+             message(STATUS "VX/VXE/VXE2 enabled")
+             list(APPEND ARCH_FLAGS -mvx -mzvector)
+-            list(APPEND ARCH_DEFINITIONS GGML_VXE)
++	    list(APPEND ARCH_DEFINITIONS GGML_VXE)
+         endif()
+
+         if (GGML_NNPA)
+@@ -480,6 +479,17 @@ function(ggml_add_cpu_backend_variant_impl tag_name)
+     else()
+         message(WARNING "Unknown CPU architecture. Falling back to generic implementations.")
+         list(APPEND ARCH_FLAGS -DGGML_CPU_GENERIC)
++	list(APPEND ARCH_DEFINITIONS GGML_VXE)
++	message(STATUS "Added GGML_VXE Flag")
++	if(NOT DEFINED TARGET_ARCH)
++  	  set(TARGET_ARCH 13)
++	endif()
++	if(TARGET_ARCH GREATER 13)
++  	  target_compile_options(${GGML_CPU_NAME} PRIVATE "-qarch=${TARGET_ARCH}")
++	endif()
++	target_include_directories(${GGML_CPU_NAME} PRIVATE /usr/include)
++	target_link_libraries(${GGML_CPU_NAME} PRIVATE "/usr/lpp/cbclib/lib/libmass.arch${TARGET_ARCH}.a")
++	message(STATUS "Found MASS: /usr/lpp/cbclib/lib/libmass.arch${TARGET_ARCH}.a")
+     endif()
+
+     if (GGML_CPU_REPACK)
diff --git a/patches/arg.cpp.patch b/patches/arg.cpp.patch
@@ -1,5 +1,5 @@
 diff --git a/common/arg.cpp b/common/arg.cpp
-index 40af7e57..46fec792 100644
+index 06005359..28220aac 100644
 --- a/common/arg.cpp
 +++ b/common/arg.cpp
 @@ -36,6 +36,9 @@
@@ -10,7 +10,7 @@ index 40af7e57..46fec792 100644
 +#   include <cstdlib>
 +#   endif
  #endif
-
+ 
  using json = nlohmann::ordered_json;
 @@ -195,6 +198,8 @@ bool common_has_curl() {
  #   endif
@@ -30,7 +30,17 @@ index 40af7e57..46fec792 100644
      http_headers.ptr = curl_slist_append(http_headers.ptr, "User-Agent: llama-cpp");
      // Check if hf-token or bearer-token was specified
      if (!bearer_token.empty()) {
-@@ -569,6 +574,7 @@ std::pair<long, std::vector<char>> common_remote_get_content(const std::string &
+@@ -506,7 +511,8 @@ static bool common_download_model(
+         struct gguf_init_params gguf_params = {
+             /*.no_alloc = */ true,
+             /*.ctx      = */ NULL,
+-        };
++            /* .allow_byteswapping = */ true,
++	};
+         auto * ctx_gguf = gguf_init_from_file(model.path.c_str(), gguf_params);
+         if (!ctx_gguf) {
+             LOG_ERR("\n%s:  failed to load input GGUF from %s\n", __func__, model.path.c_str());
+@@ -569,6 +575,7 @@ std::pair<long, std::vector<char>> common_remote_get_content(const std::string &
      curl_easy_setopt(curl.get(), CURLOPT_URL, url.c_str());
      curl_easy_setopt(curl.get(), CURLOPT_NOPROGRESS, 1L);
      curl_easy_setopt(curl.get(), CURLOPT_FOLLOWLOCATION, 1L);

diff --git a/patches/clip.cpp.patch b/patches/clip.cpp.patch
@@ -0,0 +1,12 @@
+diff --git a/tools/mtmd/clip.cpp b/tools/mtmd/clip.cpp
+index a4b62f9a..2d89d4b1 100644
+--- a/tools/mtmd/clip.cpp
++++ b/tools/mtmd/clip.cpp
+@@ -2028,6 +2028,7 @@ struct clip_model_loader {
+         struct gguf_init_params params = {
+             /*.no_alloc = */ true,
+             /*.ctx      = */ &meta,
++	    /*.allow_byteswapping = */ true,
+         };
+
+         ctx_gguf = gguf_context_ptr(gguf_init_from_file(fname, params));
diff --git a/patches/common.cpp.patch b/patches/common.cpp.patch
@@ -1,8 +1,8 @@
 diff --git a/common/common.cpp b/common/common.cpp
-index 4cc40ed..234ad95 100644
+index d8c4d988..00aa7d43 100644
 --- a/common/common.cpp
 +++ b/common/common.cpp
-@@ -851,7 +851,7 @@ std::string fs_get_cache_directory() {
+@@ -874,7 +874,7 @@ std::string fs_get_cache_directory() {
      if (getenv("LLAMA_CACHE")) {
          cache_directory = std::getenv("LLAMA_CACHE");
      } else {
@@ -11,3 +11,11 @@ index 4cc40ed..234ad95 100644
          if (std::getenv("XDG_CACHE_HOME")) {
              cache_directory = std::getenv("XDG_CACHE_HOME");
          } else {
+@@ -1436,6 +1436,7 @@ static common_control_vector_data common_control_vector_load_one(const common_co
+     struct gguf_init_params meta_gguf_params = {
+         /* .no_alloc = */ false,
+         /* .ctx      = */ &ctx,
++	/* .allow_byteswapping = */ true,
+     };
+     struct gguf_context * ctx_gguf = gguf_init_from_file(load_info.fname.c_str(), meta_gguf_params);
+     if (!ctx_gguf) {
diff --git a/patches/convert-llama2c-to-ggml.cpp.patch b/patches/convert-llama2c-to-ggml.cpp.patch
@@ -0,0 +1,12 @@
+diff --git a/examples/convert-llama2c-to-ggml/convert-llama2c-to-ggml.cpp b/examples/convert-llama2c-to-ggml/convert-llama2c-to-ggml.cpp
+index bdf0eed2..b36170bc 100644
+--- a/examples/convert-llama2c-to-ggml/convert-llama2c-to-ggml.cpp
++++ b/examples/convert-llama2c-to-ggml/convert-llama2c-to-ggml.cpp
+@@ -535,6 +535,7 @@ static void load_vocab(const char * filename, const Config * config, struct my_l
+         struct gguf_init_params params = {
+             /*.no_alloc = */ false,
+             /*.ctx      = */ &ctx_data,
++	    /*.allow_byteswapping = */ true,
+         };
+
+         struct gguf_context * ctx = gguf_init_from_file(filename, params);
diff --git a/patches/examples_gguf.cpp.patch b/patches/examples_gguf.cpp.patch
@@ -0,0 +1,12 @@
+diff --git a/examples/gguf/gguf.cpp b/examples/gguf/gguf.cpp
+index f31989c8..94ac3e5a 100644
+--- a/examples/gguf/gguf.cpp
++++ b/examples/gguf/gguf.cpp
+@@ -87,6 +87,7 @@ static bool gguf_ex_read_0(const std::string & fname) {
+     struct gguf_init_params params = {
+         /*.no_alloc = */ false,
+         /*.ctx      = */ NULL,
++	/*.allow_byteswapping = */ true,
+     };
+
+     struct gguf_context * ctx = gguf_init_from_file(fname.c_str(), params);
diff --git a/patches/export-lora.cpp.patch b/patches/export-lora.cpp.patch
@@ -0,0 +1,12 @@
+diff --git a/tools/export-lora/export-lora.cpp b/tools/export-lora/export-lora.cpp
+index f038019b..bf750ab9 100644
+--- a/tools/export-lora/export-lora.cpp
++++ b/tools/export-lora/export-lora.cpp
+@@ -50,6 +50,7 @@ static struct gguf_context * load_gguf(std::string & fname, struct ggml_context
+     struct gguf_init_params params = {
+         /*.no_alloc = */ true,
+         /*.ctx      = */ ctx_ggml,
++	/*.allow_byteswapping = */ true,
+     };
+     struct gguf_context * ctx_gguf = gguf_init_from_file(fname.c_str(), params);
+     if (!ctx_gguf) {
diff --git a/patches/ggml-backend-reg.cpp.patch b/patches/ggml-backend-reg.cpp.patch
@@ -1,15 +1,15 @@
 diff --git a/ggml/src/ggml-backend-reg.cpp b/ggml/src/ggml-backend-reg.cpp
-index 405d8e3..b3682a9 100644
+index f0cdac31..29247c6a 100644
 --- a/ggml/src/ggml-backend-reg.cpp
 +++ b/ggml/src/ggml-backend-reg.cpp
-@@ -556,7 +556,9 @@ static ggml_backend_reg_t ggml_backend_load_best(const char * name, bool silent,
+@@ -561,7 +561,9 @@ static ggml_backend_reg_t ggml_backend_load_best(const char * name, bool silent,
  }
-
+ 
  void ggml_backend_load_all() {
 -    ggml_backend_load_all_from_path(nullptr);
 +#ifdef GGML_BACKEND_DL
 +     ggml_backend_load_all_from_path(nullptr);
 +#endif
  }
-
+ 
  void ggml_backend_load_all_from_path(const char * dir_path) {
diff --git a/patches/ggml-cpu-impl.h.patch b/patches/ggml-cpu-impl.h.patch
@@ -0,0 +1,31 @@
+diff --git a/ggml/src/ggml-cpu/ggml-cpu-impl.h b/ggml/src/ggml-cpu/ggml-cpu-impl.h
+index d839cf5c..3f8532f7 100644
+--- a/ggml/src/ggml-cpu/ggml-cpu-impl.h
++++ b/ggml/src/ggml-cpu/ggml-cpu-impl.h
+@@ -68,6 +68,15 @@ struct ggml_compute_params {
+ #endif  // __VXE2__
+ #endif  // __s390x__ && __VEC__
+
++#if defined(__MVS__) && defined(__VEC__)
++#ifndef __VXE__
++#define __VXE__
++#endif  // __VXE__
++#ifndef __VXE2__
++#define __VXE2__
++#endif  // __VXE2__
++#endif  // __MVS__ && __VEC__
++
+ #if defined(__s390x__) && defined(GGML_NNPA)
+ #ifndef __NNPA__
+ #define __NNPA__
+@@ -352,8 +361,9 @@ inline static int32x4_t ggml_vdotq_s32(int32x4_t acc, int8x16_t a, int8x16_t b)
+ #endif
+
+ #if defined(__VXE__) || defined(__VXE2__)
++#ifndef __VEC__
+ #include <vecintrin.h>
+-
++#endif
+ #define vec_neg(a)    (-(a))                // Vector Negate
+ #define vec_add(a, b) ((a) + (b))           // Vector Add
+ #define vec_sub(a, b) ((a) - (b))           // Vector Subtract
diff --git a/patches/ggml-cpu.c.patch b/patches/ggml-cpu.c.patch
@@ -1,12 +1,13 @@
 diff --git a/ggml/src/ggml-cpu/ggml-cpu.c b/ggml/src/ggml-cpu/ggml-cpu.c
-index c7426df..e074799 100644
+index c5271b77..6a547e4b 100644
 --- a/ggml/src/ggml-cpu/ggml-cpu.c
 +++ b/ggml/src/ggml-cpu/ggml-cpu.c
 @@ -17,7 +17,7 @@
-
+ 
  #if defined(_MSC_VER) || defined(__MINGW32__)
  #include <malloc.h> // using malloc.h with MSC/MINGW
 -#elif !defined(__FreeBSD__) && !defined(__NetBSD__) && !defined(__OpenBSD__)
 +#elif !defined(__FreeBSD__) && !defined(__NetBSD__) && !defined(__OpenBSD__) && !defined(__MVS__)
  #include <alloca.h>
  #endif
+