Skip to content

Commit b73e203

Browse files
Merge pull request #4 from PESU-IBM-GRM-zOS-AI-Inferencing/main
Enable automatic endianness conversion, SIMD & MASS support
2 parents fa97aad + 5d337b7 commit b73e203

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

41 files changed

+1377
-326
lines changed

README.md

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22

33
# llama.cpp
44

5-
Enable AI inferencing on z/os
5+
Enable AI inferencing on z/OS
66

77
# Installation and Usage
88

@@ -27,8 +27,9 @@ See the [zopen porting guide](https://zopen.community/#/Guides/Porting) for more
2727

2828
# Documentation
2929

30-
3130
# Troubleshooting
3231

32+
While building if an error is encountered in the `ggml-cpu.cpp` file (perhaps related to pthread), run `zopen upgrade zoslib -y` and try building again.
33+
3334
# Contributing
34-
Contributions are welcome! Please follow the [zopen contribution guidelines](https://github.com/zopencommunity/meta/blob/main/CONTRIBUTING.md).
35+
Contributions are welcome! Please follow the [zopen contribution guidelines](https://github.com/zopencommunity/meta/blob/main/CONTRIBUTING.md).

buildenv

Lines changed: 32 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -3,21 +3,23 @@ export ZOPEN_STABLE_DEPS="zoslib make cmake curl"
33
export ZOPEN_DEV_URL="https://github.com/ggerganov/llama.cpp.git"
44
export ZOPEN_DEV_DEPS="zoslib make cmake curl openssl libssh2 zlib libpsl"
55
export ZOPEN_CATEGORIES="ai"
6-
export ZOPEN_DEV_TAG="master"
6+
export ZOPEN_DEV_TAG="b6027"
77
export ZOPEN_NAME="llamacpp-master"
88
export ZOPEN_RUNTIME_DEPS="ncurses"
99

10-
# rm -f "llama"
11-
# ln -s "llama.cpp" "llama"
12-
# ln -s "llama.cpp" $ZOPEN_NAME
10+
# export ZOPEN_SKIP_ZOSLIB_ENV_HOOK=1
11+
12+
rm -f "llama"
13+
ln -s "llama.cpp" "llama"
14+
ln -s "llama.cpp" $ZOPEN_NAME
1315

1416
export ZOPEN_COMP="CLANG"
1517
# set env variables
1618
# export CURL_HOME="/data/zopen/usr/local/zopen/curl/curl"
17-
# export BLAS_HOME="/usr/lpp/cbclib"
19+
export BLAS_HOME="/usr/lpp/cbclib"
1820

1921
export ZOPEN_CONFIGURE="cmake"
20-
export ZOPEN_CONFIGURE_OPTS="-B ../build --install-prefix \"\$ZOPEN_INSTALL_DIR/\" -DCURL_LIBRARY=\$CURL_HOME/lib/libcurl.a -DCURL_INCLUDE_DIR=\$CURL_HOME/include -DBUILD_SHARED_LIBS_DEFAULT=OFF -DBUILD_SHARED_LIBS=OFF -DGGML_STATIC=ON -DGGML_BACKEND_DL=OFF -DGGML_BLAS=ON -DGGML_BLAS_VENDOR=OpenBLAS -DBLAS_INCLUDE_DIRS=\$BLAS_HOME/include/openblas -DBLAS_LIBRARIES=\$BLAS_HOME/lib/libopenblas.so -DLLAMA_BUILD_TESTS=ON ."
22+
export ZOPEN_CONFIGURE_OPTS="-B ../build --install-prefix \"\$ZOPEN_INSTALL_DIR/\" -DCMAKE_C_FLAGS=\"-fzvector -m64 -march=z15 -lmass.arch13\" -DCMAKE_C_STANDARD=11 -DCMAKE_C_STANDARD_REQUIRED=ON -DCMAKE_C_EXTENSIONS=OFF -DCMAKE_BUILD_TYPE=Release -DLLAMA_SERVER_SSL=ON -DOPENSSL_ROOT_DIR=\$OPENSSL_HOME -DCURL_LIBRARY=\$CURL_HOME/lib/libcurl.a -DCURL_INCLUDE_DIR=\$CURL_HOME/include -DBUILD_SHARED_LIBS_DEFAULT=OFF -DBUILD_SHARED_LIBS=OFF -DGGML_STATIC=ON -DGGML_BACKEND_DL=OFF -DGGML_OPENBLAS=ON -DGGML_BLAS_VENDOR=OpenBLAS -DBLAS_INCLUDE_DIRS=/usr/lpp/cbclib/include/openblas -DBLAS_LIBRARIES=/usr/lpp/cbclib/lib/libopenblas.so -DLLAMA_BUILD_TESTS=ON -DLLAMA_CURL=ON ."
2123

2224
export ZOPEN_MAKE="cmake"
2325
export ZOPEN_MAKE_OPTS="--build ../build --parallel \$ZOPEN_NUM_JOBS --config Release"
@@ -39,10 +41,10 @@ zopen_check_results()
3941
dir="$1"
4042
pfx="$2"
4143
chk="$1/$2_check.log"
42-
44+
4345
if [[ -f "$chk" ]]; then
44-
total=$(grep -cE "Test #[0-9]+" "$chk")
45-
failed=$(grep -cE "Failed|Subprocess aborted" "$chk")
46+
total=$(grep -cE "Test +#" "$chk")
47+
failed=$(grep -cE "\*\*\*Failed|Subprocess aborted\*\*\*" "$chk")
4648
skipped=$(grep -c "Skipped" "$chk")
4749
passed=$((total - failed - skipped))
4850
else
@@ -57,15 +59,32 @@ zopen_check_results()
5759
echo "actualPassed:$passed"
5860
echo "actualSkipped:$skipped"
5961
echo "totalTests:$total"
60-
echo "expectedFailures:0"
62+
echo "expectedFailures:3"
6163
echo "expectedTotalTests:$total"
6264
}
6365

64-
zopen_append_to_env()
66+
zopen_pre_check()
6567
{
66-
# echo envars outside of PATH, MANPATH, LIBPATH
68+
# unset SSL_CERT_FILE
69+
unset SSL_CERT_PATH
70+
export SSL_CERT_PATH=$(curl-config --ca)
71+
export CFLAGS="$CFLAGS -march=z15 -mzvector"
72+
export CPPFLAGS="$CPPFLAGS -march=z15"
6773
}
6874

75+
# zopen_append_to_env()
76+
# {
77+
# export SSL_CERT_PATH=$(curl-config --ca)
78+
# # echo envars outside of PATH, MANPATH, LIBPATH
79+
# }
80+
81+
# zopen_append_to_zoslib_env()
82+
# {
83+
# cat<<EOF
84+
# SSL_CERT_PATH|set|$(curl-config --ca)
85+
# EOF
86+
# }
87+
6988
zopen_append_to_setup()
7089
{
7190
# echo commands that will run when installing via setup.sh
@@ -76,4 +95,4 @@ zopen_get_version()
7695
# Modify to echo the version of your tool/library
7796
# Rather than hardcoding the version, obtain the version by running the tool/library
7897
echo "1.0.0"
79-
}
98+
}

examples/frontend/src/components/ChatMode.js

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -139,7 +139,7 @@ const ChatMode = ({ conversationStarted, setConversationStarted }) => {
139139
onClick={handleChatSubmit}
140140
disabled={isTyping}
141141
>
142-
➤
142+
➤
143143
</button>
144144
</div>
145145
</div>

patches/CMakeLists.txt.patch

Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,53 @@
1+
diff --git a/ggml/src/ggml-cpu/CMakeLists.txt b/ggml/src/ggml-cpu/CMakeLists.txt
2+
index 66a5ad8d..f71c7dec 100644
3+
--- a/ggml/src/ggml-cpu/CMakeLists.txt
4+
+++ b/ggml/src/ggml-cpu/CMakeLists.txt
5+
@@ -51,7 +51,7 @@ function(ggml_add_cpu_backend_variant_impl tag_name)
6+
7+
target_compile_features(${GGML_CPU_NAME} PRIVATE c_std_11 cxx_std_17)
8+
target_include_directories(${GGML_CPU_NAME} PRIVATE . ggml-cpu)
9+
-
10+
+ list(APPEND ARCH_FLAGS -fzvector -m64 -march=z15)
11+
if (APPLE AND GGML_ACCELERATE)
12+
find_library(ACCELERATE_FRAMEWORK Accelerate)
13+
if (ACCELERATE_FRAMEWORK)
14+
@@ -94,7 +94,7 @@ function(ggml_add_cpu_backend_variant_impl tag_name)
15+
target_compile_definitions(${GGML_CPU_NAME} PRIVATE GGML_USE_CPU_HBM)
16+
17+
target_link_libraries(${GGML_CPU_NAME} PUBLIC memkind)
18+
- endif()
19+
+ endif()
20+
21+
if (GGML_SYSTEM_ARCH STREQUAL "ARM")
22+
message(STATUS "ARM detected")
23+
@@ -463,11 +463,10 @@ function(ggml_add_cpu_backend_variant_impl tag_name)
24+
message(WARNING "Unknown target. If you are compiling for z14 and earlier, you might have to add -DGGML_VXE=OFF.")
25+
list(APPEND ARCH_FLAGS -march=native -mtune=native)
26+
endif()
27+
-
28+
if (GGML_VXE)
29+
message(STATUS "VX/VXE/VXE2 enabled")
30+
list(APPEND ARCH_FLAGS -mvx -mzvector)
31+
- list(APPEND ARCH_DEFINITIONS GGML_VXE)
32+
+ list(APPEND ARCH_DEFINITIONS GGML_VXE)
33+
endif()
34+
35+
if (GGML_NNPA)
36+
@@ -480,6 +479,17 @@ function(ggml_add_cpu_backend_variant_impl tag_name)
37+
else()
38+
message(WARNING "Unknown CPU architecture. Falling back to generic implementations.")
39+
list(APPEND ARCH_FLAGS -DGGML_CPU_GENERIC)
40+
+ list(APPEND ARCH_DEFINITIONS GGML_VXE)
41+
+ message(STATUS "Added GGML_VXE Flag")
42+
+ if(NOT DEFINED TARGET_ARCH)
43+
+ set(TARGET_ARCH 13)
44+
+ endif()
45+
+ if(TARGET_ARCH GREATER 13)
46+
+ target_compile_options(${GGML_CPU_NAME} PRIVATE "-qarch=${TARGET_ARCH}")
47+
+ endif()
48+
+ target_include_directories(${GGML_CPU_NAME} PRIVATE /usr/include)
49+
+ target_link_libraries(${GGML_CPU_NAME} PRIVATE "/usr/lpp/cbclib/lib/libmass.arch${TARGET_ARCH}.a")
50+
+ message(STATUS "Found MASS: /usr/lpp/cbclib/lib/libmass.arch${TARGET_ARCH}.a")
51+
endif()
52+
53+
if (GGML_CPU_REPACK)

patches/arg.cpp.patch

Lines changed: 13 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
diff --git a/common/arg.cpp b/common/arg.cpp
2-
index 40af7e57..46fec792 100644
2+
index 06005359..28220aac 100644
33
--- a/common/arg.cpp
44
+++ b/common/arg.cpp
55
@@ -36,6 +36,9 @@
@@ -10,7 +10,7 @@ index 40af7e57..46fec792 100644
1010
+# include <cstdlib>
1111
+# endif
1212
#endif
13-
13+
1414
using json = nlohmann::ordered_json;
1515
@@ -195,6 +198,8 @@ bool common_has_curl() {
1616
# endif
@@ -30,7 +30,17 @@ index 40af7e57..46fec792 100644
3030
http_headers.ptr = curl_slist_append(http_headers.ptr, "User-Agent: llama-cpp");
3131
// Check if hf-token or bearer-token was specified
3232
if (!bearer_token.empty()) {
33-
@@ -569,6 +574,7 @@ std::pair<long, std::vector<char>> common_remote_get_content(const std::string &
33+
@@ -506,7 +511,8 @@ static bool common_download_model(
34+
struct gguf_init_params gguf_params = {
35+
/*.no_alloc = */ true,
36+
/*.ctx = */ NULL,
37+
- };
38+
+ /* .allow_byteswapping = */ true,
39+
+ };
40+
auto * ctx_gguf = gguf_init_from_file(model.path.c_str(), gguf_params);
41+
if (!ctx_gguf) {
42+
LOG_ERR("\n%s: failed to load input GGUF from %s\n", __func__, model.path.c_str());
43+
@@ -569,6 +575,7 @@ std::pair<long, std::vector<char>> common_remote_get_content(const std::string &
3444
curl_easy_setopt(curl.get(), CURLOPT_URL, url.c_str());
3545
curl_easy_setopt(curl.get(), CURLOPT_NOPROGRESS, 1L);
3646
curl_easy_setopt(curl.get(), CURLOPT_FOLLOWLOCATION, 1L);

patches/clip.cpp.patch

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
diff --git a/tools/mtmd/clip.cpp b/tools/mtmd/clip.cpp
2+
index a4b62f9a..2d89d4b1 100644
3+
--- a/tools/mtmd/clip.cpp
4+
+++ b/tools/mtmd/clip.cpp
5+
@@ -2028,6 +2028,7 @@ struct clip_model_loader {
6+
struct gguf_init_params params = {
7+
/*.no_alloc = */ true,
8+
/*.ctx = */ &meta,
9+
+ /*.allow_byteswapping = */ true,
10+
};
11+
12+
ctx_gguf = gguf_context_ptr(gguf_init_from_file(fname, params));

patches/common.cpp.patch

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,8 @@
11
diff --git a/common/common.cpp b/common/common.cpp
2-
index 4cc40ed..234ad95 100644
2+
index d8c4d988..00aa7d43 100644
33
--- a/common/common.cpp
44
+++ b/common/common.cpp
5-
@@ -851,7 +851,7 @@ std::string fs_get_cache_directory() {
5+
@@ -874,7 +874,7 @@ std::string fs_get_cache_directory() {
66
if (getenv("LLAMA_CACHE")) {
77
cache_directory = std::getenv("LLAMA_CACHE");
88
} else {
@@ -11,3 +11,11 @@ index 4cc40ed..234ad95 100644
1111
if (std::getenv("XDG_CACHE_HOME")) {
1212
cache_directory = std::getenv("XDG_CACHE_HOME");
1313
} else {
14+
@@ -1436,6 +1436,7 @@ static common_control_vector_data common_control_vector_load_one(const common_co
15+
struct gguf_init_params meta_gguf_params = {
16+
/* .no_alloc = */ false,
17+
/* .ctx = */ &ctx,
18+
+ /* .allow_byteswapping = */ true,
19+
};
20+
struct gguf_context * ctx_gguf = gguf_init_from_file(load_info.fname.c_str(), meta_gguf_params);
21+
if (!ctx_gguf) {
Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
diff --git a/examples/convert-llama2c-to-ggml/convert-llama2c-to-ggml.cpp b/examples/convert-llama2c-to-ggml/convert-llama2c-to-ggml.cpp
2+
index bdf0eed2..b36170bc 100644
3+
--- a/examples/convert-llama2c-to-ggml/convert-llama2c-to-ggml.cpp
4+
+++ b/examples/convert-llama2c-to-ggml/convert-llama2c-to-ggml.cpp
5+
@@ -535,6 +535,7 @@ static void load_vocab(const char * filename, const Config * config, struct my_l
6+
struct gguf_init_params params = {
7+
/*.no_alloc = */ false,
8+
/*.ctx = */ &ctx_data,
9+
+ /*.allow_byteswapping = */ true,
10+
};
11+
12+
struct gguf_context * ctx = gguf_init_from_file(filename, params);

patches/examples_gguf.cpp.patch

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
diff --git a/examples/gguf/gguf.cpp b/examples/gguf/gguf.cpp
2+
index f31989c8..94ac3e5a 100644
3+
--- a/examples/gguf/gguf.cpp
4+
+++ b/examples/gguf/gguf.cpp
5+
@@ -87,6 +87,7 @@ static bool gguf_ex_read_0(const std::string & fname) {
6+
struct gguf_init_params params = {
7+
/*.no_alloc = */ false,
8+
/*.ctx = */ NULL,
9+
+ /*.allow_byteswapping = */ true,
10+
};
11+
12+
struct gguf_context * ctx = gguf_init_from_file(fname.c_str(), params);

patches/export-lora.cpp.patch

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
diff --git a/tools/export-lora/export-lora.cpp b/tools/export-lora/export-lora.cpp
2+
index f038019b..bf750ab9 100644
3+
--- a/tools/export-lora/export-lora.cpp
4+
+++ b/tools/export-lora/export-lora.cpp
5+
@@ -50,6 +50,7 @@ static struct gguf_context * load_gguf(std::string & fname, struct ggml_context
6+
struct gguf_init_params params = {
7+
/*.no_alloc = */ true,
8+
/*.ctx = */ ctx_ggml,
9+
+ /*.allow_byteswapping = */ true,
10+
};
11+
struct gguf_context * ctx_gguf = gguf_init_from_file(fname.c_str(), params);
12+
if (!ctx_gguf) {

0 commit comments

Comments
 (0)