Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 4 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

# llama.cpp

Enable AI inferencing on z/os
Enable AI inferencing on z/OS

# Installation and Usage

Expand All @@ -27,8 +27,9 @@ See the [zopen porting guide](https://zopen.community/#/Guides/Porting) for more

# Documentation


# Troubleshooting

While building if an error is encountered in the `ggml-cpu.cpp` file (perhaps related to pthread), run `zopen upgrade zoslib -y` and try building again.

# Contributing
Contributions are welcome! Please follow the [zopen contribution guidelines](https://github.com/zopencommunity/meta/blob/main/CONTRIBUTING.md).
Contributions are welcome! Please follow the [zopen contribution guidelines](https://github.com/zopencommunity/meta/blob/main/CONTRIBUTING.md).
45 changes: 32 additions & 13 deletions buildenv
Original file line number Diff line number Diff line change
Expand Up @@ -3,21 +3,23 @@ export ZOPEN_STABLE_DEPS="zoslib make cmake curl"
export ZOPEN_DEV_URL="https://github.com/ggerganov/llama.cpp.git"
export ZOPEN_DEV_DEPS="zoslib make cmake curl openssl libssh2 zlib libpsl"
export ZOPEN_CATEGORIES="ai"
export ZOPEN_DEV_TAG="master"
export ZOPEN_DEV_TAG="b6027"
export ZOPEN_NAME="llamacpp-master"
export ZOPEN_RUNTIME_DEPS="ncurses"

# rm -f "llama"
# ln -s "llama.cpp" "llama"
# ln -s "llama.cpp" $ZOPEN_NAME
# export ZOPEN_SKIP_ZOSLIB_ENV_HOOK=1

rm -f "llama"
ln -s "llama.cpp" "llama"
ln -s "llama.cpp" $ZOPEN_NAME

export ZOPEN_COMP="CLANG"
# set env variables
# export CURL_HOME="/data/zopen/usr/local/zopen/curl/curl"
# export BLAS_HOME="/usr/lpp/cbclib"
export BLAS_HOME="/usr/lpp/cbclib"

export ZOPEN_CONFIGURE="cmake"
export ZOPEN_CONFIGURE_OPTS="-B ../build --install-prefix \"\$ZOPEN_INSTALL_DIR/\" -DCURL_LIBRARY=\$CURL_HOME/lib/libcurl.a -DCURL_INCLUDE_DIR=\$CURL_HOME/include -DBUILD_SHARED_LIBS_DEFAULT=OFF -DBUILD_SHARED_LIBS=OFF -DGGML_STATIC=ON -DGGML_BACKEND_DL=OFF -DGGML_BLAS=ON -DGGML_BLAS_VENDOR=OpenBLAS -DBLAS_INCLUDE_DIRS=\$BLAS_HOME/include/openblas -DBLAS_LIBRARIES=\$BLAS_HOME/lib/libopenblas.so -DLLAMA_BUILD_TESTS=ON ."
export ZOPEN_CONFIGURE_OPTS="-B ../build --install-prefix \"\$ZOPEN_INSTALL_DIR/\" -DCMAKE_C_FLAGS=\"-fzvector -m64 -march=z15 -lmass.arch13\" -DCMAKE_C_STANDARD=11 -DCMAKE_C_STANDARD_REQUIRED=ON -DCMAKE_C_EXTENSIONS=OFF -DCMAKE_BUILD_TYPE=Release -DLLAMA_SERVER_SSL=ON -DOPENSSL_ROOT_DIR=\$OPENSSL_HOME -DCURL_LIBRARY=\$CURL_HOME/lib/libcurl.a -DCURL_INCLUDE_DIR=\$CURL_HOME/include -DBUILD_SHARED_LIBS_DEFAULT=OFF -DBUILD_SHARED_LIBS=OFF -DGGML_STATIC=ON -DGGML_BACKEND_DL=OFF -DGGML_OPENBLAS=ON -DGGML_BLAS_VENDOR=OpenBLAS -DBLAS_INCLUDE_DIRS=/usr/lpp/cbclib/include/openblas -DBLAS_LIBRARIES=/usr/lpp/cbclib/lib/libopenblas.so -DLLAMA_BUILD_TESTS=ON -DLLAMA_CURL=ON ."

export ZOPEN_MAKE="cmake"
export ZOPEN_MAKE_OPTS="--build ../build --parallel \$ZOPEN_NUM_JOBS --config Release"
Expand All @@ -39,10 +41,10 @@ zopen_check_results()
dir="$1"
pfx="$2"
chk="$1/$2_check.log"

if [[ -f "$chk" ]]; then
total=$(grep -cE "Test #[0-9]+" "$chk")
failed=$(grep -cE "Failed|Subprocess aborted" "$chk")
total=$(grep -cE "Test +#" "$chk")
failed=$(grep -cE "\*\*\*Failed|Subprocess aborted\*\*\*" "$chk")
skipped=$(grep -c "Skipped" "$chk")
passed=$((total - failed - skipped))
else
Expand All @@ -57,15 +59,32 @@ zopen_check_results()
echo "actualPassed:$passed"
echo "actualSkipped:$skipped"
echo "totalTests:$total"
echo "expectedFailures:0"
echo "expectedFailures:3"
echo "expectedTotalTests:$total"
}

zopen_append_to_env()
zopen_pre_check()
{
# echo envars outside of PATH, MANPATH, LIBPATH
# unset SSL_CERT_FILE
unset SSL_CERT_PATH
export SSL_CERT_PATH=$(curl-config --ca)
export CFLAGS="$CFLAGS -march=z15 -mzvector"
export CPPFLAGS="$CPPFLAGS -march=z15"
}

# zopen_append_to_env()
# {
# export SSL_CERT_PATH=$(curl-config --ca)
# # echo envars outside of PATH, MANPATH, LIBPATH
# }

# zopen_append_to_zoslib_env()
# {
# cat<<EOF
# SSL_CERT_PATH|set|$(curl-config --ca)
# EOF
# }

zopen_append_to_setup()
{
# echo commands that will run when installing via setup.sh
Expand All @@ -76,4 +95,4 @@ zopen_get_version()
# Modify to echo the version of your tool/library
# Rather than hardcoding the version, obtain the version by running the tool/library
echo "1.0.0"
}
}
2 changes: 1 addition & 1 deletion examples/frontend/src/components/ChatMode.js
Original file line number Diff line number Diff line change
Expand Up @@ -139,7 +139,7 @@ const ChatMode = ({ conversationStarted, setConversationStarted }) => {
onClick={handleChatSubmit}
disabled={isTyping}
>
➤
➤
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is this change intentional?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

No, it's some kind of encoding issue with the send symbol on the demo website (I have attached a picture of it)

We weren't able to git restore the file to its original state, and it seemed to be a one-time issue with encoding that character so we just committed it for now.

image

</button>
</div>
</div>
Expand Down
53 changes: 53 additions & 0 deletions patches/CMakeLists.txt.patch
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
diff --git a/ggml/src/ggml-cpu/CMakeLists.txt b/ggml/src/ggml-cpu/CMakeLists.txt
index 66a5ad8d..f71c7dec 100644
--- a/ggml/src/ggml-cpu/CMakeLists.txt
+++ b/ggml/src/ggml-cpu/CMakeLists.txt
@@ -51,7 +51,7 @@ function(ggml_add_cpu_backend_variant_impl tag_name)

target_compile_features(${GGML_CPU_NAME} PRIVATE c_std_11 cxx_std_17)
target_include_directories(${GGML_CPU_NAME} PRIVATE . ggml-cpu)
-
+ list(APPEND ARCH_FLAGS -fzvector -m64 -march=z15)
if (APPLE AND GGML_ACCELERATE)
find_library(ACCELERATE_FRAMEWORK Accelerate)
if (ACCELERATE_FRAMEWORK)
@@ -94,7 +94,7 @@ function(ggml_add_cpu_backend_variant_impl tag_name)
target_compile_definitions(${GGML_CPU_NAME} PRIVATE GGML_USE_CPU_HBM)

target_link_libraries(${GGML_CPU_NAME} PUBLIC memkind)
- endif()
+ endif()

if (GGML_SYSTEM_ARCH STREQUAL "ARM")
message(STATUS "ARM detected")
@@ -463,11 +463,10 @@ function(ggml_add_cpu_backend_variant_impl tag_name)
message(WARNING "Unknown target. If you are compiling for z14 and earlier, you might have to add -DGGML_VXE=OFF.")
list(APPEND ARCH_FLAGS -march=native -mtune=native)
endif()
-
if (GGML_VXE)
message(STATUS "VX/VXE/VXE2 enabled")
list(APPEND ARCH_FLAGS -mvx -mzvector)
- list(APPEND ARCH_DEFINITIONS GGML_VXE)
+ list(APPEND ARCH_DEFINITIONS GGML_VXE)
endif()

if (GGML_NNPA)
@@ -480,6 +479,17 @@ function(ggml_add_cpu_backend_variant_impl tag_name)
else()
message(WARNING "Unknown CPU architecture. Falling back to generic implementations.")
list(APPEND ARCH_FLAGS -DGGML_CPU_GENERIC)
+ list(APPEND ARCH_DEFINITIONS GGML_VXE)
+ message(STATUS "Added GGML_VXE Flag")
+ if(NOT DEFINED TARGET_ARCH)
+ set(TARGET_ARCH 13)
+ endif()
+ if(TARGET_ARCH GREATER 13)
+ target_compile_options(${GGML_CPU_NAME} PRIVATE "-qarch=${TARGET_ARCH}")
+ endif()
+ target_include_directories(${GGML_CPU_NAME} PRIVATE /usr/include)
+ target_link_libraries(${GGML_CPU_NAME} PRIVATE "/usr/lpp/cbclib/lib/libmass.arch${TARGET_ARCH}.a")
+ message(STATUS "Found MASS: /usr/lpp/cbclib/lib/libmass.arch${TARGET_ARCH}.a")
endif()

if (GGML_CPU_REPACK)
16 changes: 13 additions & 3 deletions patches/arg.cpp.patch
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
diff --git a/common/arg.cpp b/common/arg.cpp
index 40af7e57..46fec792 100644
index 06005359..28220aac 100644
--- a/common/arg.cpp
+++ b/common/arg.cpp
@@ -36,6 +36,9 @@
Expand All @@ -10,7 +10,7 @@ index 40af7e57..46fec792 100644
+# include <cstdlib>
+# endif
#endif

using json = nlohmann::ordered_json;
@@ -195,6 +198,8 @@ bool common_has_curl() {
# endif
Expand All @@ -30,7 +30,17 @@ index 40af7e57..46fec792 100644
http_headers.ptr = curl_slist_append(http_headers.ptr, "User-Agent: llama-cpp");
// Check if hf-token or bearer-token was specified
if (!bearer_token.empty()) {
@@ -569,6 +574,7 @@ std::pair<long, std::vector<char>> common_remote_get_content(const std::string &
@@ -506,7 +511,8 @@ static bool common_download_model(
struct gguf_init_params gguf_params = {
/*.no_alloc = */ true,
/*.ctx = */ NULL,
- };
+ /* .allow_byteswapping = */ true,
+ };
auto * ctx_gguf = gguf_init_from_file(model.path.c_str(), gguf_params);
if (!ctx_gguf) {
LOG_ERR("\n%s: failed to load input GGUF from %s\n", __func__, model.path.c_str());
@@ -569,6 +575,7 @@ std::pair<long, std::vector<char>> common_remote_get_content(const std::string &
curl_easy_setopt(curl.get(), CURLOPT_URL, url.c_str());
curl_easy_setopt(curl.get(), CURLOPT_NOPROGRESS, 1L);
curl_easy_setopt(curl.get(), CURLOPT_FOLLOWLOCATION, 1L);
Expand Down
12 changes: 12 additions & 0 deletions patches/clip.cpp.patch
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
diff --git a/tools/mtmd/clip.cpp b/tools/mtmd/clip.cpp
index a4b62f9a..2d89d4b1 100644
--- a/tools/mtmd/clip.cpp
+++ b/tools/mtmd/clip.cpp
@@ -2028,6 +2028,7 @@ struct clip_model_loader {
struct gguf_init_params params = {
/*.no_alloc = */ true,
/*.ctx = */ &meta,
+ /*.allow_byteswapping = */ true,
};

ctx_gguf = gguf_context_ptr(gguf_init_from_file(fname, params));
12 changes: 10 additions & 2 deletions patches/common.cpp.patch
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
diff --git a/common/common.cpp b/common/common.cpp
index 4cc40ed..234ad95 100644
index d8c4d988..00aa7d43 100644
--- a/common/common.cpp
+++ b/common/common.cpp
@@ -851,7 +851,7 @@ std::string fs_get_cache_directory() {
@@ -874,7 +874,7 @@ std::string fs_get_cache_directory() {
if (getenv("LLAMA_CACHE")) {
cache_directory = std::getenv("LLAMA_CACHE");
} else {
Expand All @@ -11,3 +11,11 @@ index 4cc40ed..234ad95 100644
if (std::getenv("XDG_CACHE_HOME")) {
cache_directory = std::getenv("XDG_CACHE_HOME");
} else {
@@ -1436,6 +1436,7 @@ static common_control_vector_data common_control_vector_load_one(const common_co
struct gguf_init_params meta_gguf_params = {
/* .no_alloc = */ false,
/* .ctx = */ &ctx,
+ /* .allow_byteswapping = */ true,
};
struct gguf_context * ctx_gguf = gguf_init_from_file(load_info.fname.c_str(), meta_gguf_params);
if (!ctx_gguf) {
12 changes: 12 additions & 0 deletions patches/convert-llama2c-to-ggml.cpp.patch
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
diff --git a/examples/convert-llama2c-to-ggml/convert-llama2c-to-ggml.cpp b/examples/convert-llama2c-to-ggml/convert-llama2c-to-ggml.cpp
index bdf0eed2..b36170bc 100644
--- a/examples/convert-llama2c-to-ggml/convert-llama2c-to-ggml.cpp
+++ b/examples/convert-llama2c-to-ggml/convert-llama2c-to-ggml.cpp
@@ -535,6 +535,7 @@ static void load_vocab(const char * filename, const Config * config, struct my_l
struct gguf_init_params params = {
/*.no_alloc = */ false,
/*.ctx = */ &ctx_data,
+ /*.allow_byteswapping = */ true,
};

struct gguf_context * ctx = gguf_init_from_file(filename, params);
12 changes: 12 additions & 0 deletions patches/examples_gguf.cpp.patch
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
diff --git a/examples/gguf/gguf.cpp b/examples/gguf/gguf.cpp
index f31989c8..94ac3e5a 100644
--- a/examples/gguf/gguf.cpp
+++ b/examples/gguf/gguf.cpp
@@ -87,6 +87,7 @@ static bool gguf_ex_read_0(const std::string & fname) {
struct gguf_init_params params = {
/*.no_alloc = */ false,
/*.ctx = */ NULL,
+ /*.allow_byteswapping = */ true,
};

struct gguf_context * ctx = gguf_init_from_file(fname.c_str(), params);
12 changes: 12 additions & 0 deletions patches/export-lora.cpp.patch
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
diff --git a/tools/export-lora/export-lora.cpp b/tools/export-lora/export-lora.cpp
index f038019b..bf750ab9 100644
--- a/tools/export-lora/export-lora.cpp
+++ b/tools/export-lora/export-lora.cpp
@@ -50,6 +50,7 @@ static struct gguf_context * load_gguf(std::string & fname, struct ggml_context
struct gguf_init_params params = {
/*.no_alloc = */ true,
/*.ctx = */ ctx_ggml,
+ /*.allow_byteswapping = */ true,
};
struct gguf_context * ctx_gguf = gguf_init_from_file(fname.c_str(), params);
if (!ctx_gguf) {
8 changes: 4 additions & 4 deletions patches/ggml-backend-reg.cpp.patch
Original file line number Diff line number Diff line change
@@ -1,15 +1,15 @@
diff --git a/ggml/src/ggml-backend-reg.cpp b/ggml/src/ggml-backend-reg.cpp
index 405d8e3..b3682a9 100644
index f0cdac31..29247c6a 100644
--- a/ggml/src/ggml-backend-reg.cpp
+++ b/ggml/src/ggml-backend-reg.cpp
@@ -556,7 +556,9 @@ static ggml_backend_reg_t ggml_backend_load_best(const char * name, bool silent,
@@ -561,7 +561,9 @@ static ggml_backend_reg_t ggml_backend_load_best(const char * name, bool silent,
}

void ggml_backend_load_all() {
- ggml_backend_load_all_from_path(nullptr);
+#ifdef GGML_BACKEND_DL
+ ggml_backend_load_all_from_path(nullptr);
+#endif
}

void ggml_backend_load_all_from_path(const char * dir_path) {
31 changes: 31 additions & 0 deletions patches/ggml-cpu-impl.h.patch
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
diff --git a/ggml/src/ggml-cpu/ggml-cpu-impl.h b/ggml/src/ggml-cpu/ggml-cpu-impl.h
index d839cf5c..3f8532f7 100644
--- a/ggml/src/ggml-cpu/ggml-cpu-impl.h
+++ b/ggml/src/ggml-cpu/ggml-cpu-impl.h
@@ -68,6 +68,15 @@ struct ggml_compute_params {
#endif // __VXE2__
#endif // __s390x__ && __VEC__

+#if defined(__MVS__) && defined(__VEC__)
+#ifndef __VXE__
+#define __VXE__
+#endif // __VXE__
+#ifndef __VXE2__
+#define __VXE2__
+#endif // __VXE2__
+#endif // __MVS__ && __VEC__
+
#if defined(__s390x__) && defined(GGML_NNPA)
#ifndef __NNPA__
#define __NNPA__
@@ -352,8 +361,9 @@ inline static int32x4_t ggml_vdotq_s32(int32x4_t acc, int8x16_t a, int8x16_t b)
#endif

#if defined(__VXE__) || defined(__VXE2__)
+#ifndef __VEC__
#include <vecintrin.h>
-
+#endif
#define vec_neg(a) (-(a)) // Vector Negate
#define vec_add(a, b) ((a) + (b)) // Vector Add
#define vec_sub(a, b) ((a) - (b)) // Vector Subtract
5 changes: 3 additions & 2 deletions patches/ggml-cpu.c.patch
Original file line number Diff line number Diff line change
@@ -1,12 +1,13 @@
diff --git a/ggml/src/ggml-cpu/ggml-cpu.c b/ggml/src/ggml-cpu/ggml-cpu.c
index c7426df..e074799 100644
index c5271b77..6a547e4b 100644
--- a/ggml/src/ggml-cpu/ggml-cpu.c
+++ b/ggml/src/ggml-cpu/ggml-cpu.c
@@ -17,7 +17,7 @@

#if defined(_MSC_VER) || defined(__MINGW32__)
#include <malloc.h> // using malloc.h with MSC/MINGW
-#elif !defined(__FreeBSD__) && !defined(__NetBSD__) && !defined(__OpenBSD__)
+#elif !defined(__FreeBSD__) && !defined(__NetBSD__) && !defined(__OpenBSD__) && !defined(__MVS__)
#include <alloca.h>
#endif

Loading