Skip to content

Commit 5d337b7

Browse files
committed
Implemented fixes suggested in PR review
1 parent b52f31b commit 5d337b7

File tree

3 files changed

+16
-64
lines changed

3 files changed

+16
-64
lines changed

README.md

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22

33
# llama.cpp
44

5-
Enable AI inferencing on z/os
5+
Enable AI inferencing on z/OS
66

77
# Installation and Usage
88

@@ -27,8 +27,9 @@ See the [zopen porting guide](https://zopen.community/#/Guides/Porting) for more
2727

2828
# Documentation
2929

30-
3130
# Troubleshooting
3231

32+
While building if an error is encountered in the `ggml-cpu.cpp` file (perhaps related to pthread), run `zopen upgrade zoslib -y` and try building again.
33+
3334
# Contributing
34-
Contributions are welcome! Please follow the [zopen contribution guidelines](https://github.com/zopencommunity/meta/blob/main/CONTRIBUTING.md).
35+
Contributions are welcome! Please follow the [zopen contribution guidelines](https://github.com/zopencommunity/meta/blob/main/CONTRIBUTING.md).

buildenv

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ export ZOPEN_DEV_TAG="b6027"
77
export ZOPEN_NAME="llamacpp-master"
88
export ZOPEN_RUNTIME_DEPS="ncurses"
99

10-
export ZOPEN_SKIP_ZOSLIB_ENV_HOOK=1
10+
# export ZOPEN_SKIP_ZOSLIB_ENV_HOOK=1
1111

1212
rm -f "llama"
1313
ln -s "llama.cpp" "llama"

patches/repack.cpp.patch

Lines changed: 11 additions & 60 deletions
Original file line numberDiff line numberDiff line change
@@ -1,69 +1,20 @@
11
diff --git a/ggml/src/ggml-cpu/repack.cpp b/ggml/src/ggml-cpu/repack.cpp
2-
index 74c1c029..134b6daa 100644
2+
index 74c1c02..604bd17 100644
33
--- a/ggml/src/ggml-cpu/repack.cpp
44
+++ b/ggml/src/ggml-cpu/repack.cpp
5-
@@ -1424,35 +1424,37 @@ static const ggml::cpu::tensor_traits * ggml_repack_get_optimal_repack_type(cons
5+
@@ -1424,6 +1424,7 @@ static const ggml::cpu::tensor_traits * ggml_repack_get_optimal_repack_type(cons
66
// instance for IQ4
77
static const ggml::cpu::repack::tensor_traits<block_iq4_nl, 4, 4, GGML_TYPE_Q8_0> iq4_nl_4x4_q8_0;
88

9-
- if (cur->type == GGML_TYPE_Q4_0) {
10-
- if (ggml_cpu_has_avx2() || (ggml_cpu_has_sve() && ggml_cpu_has_matmul_int8() && ggml_cpu_get_sve_cnt() == QK8_0)) {
11-
- if (cur->ne[1] % 8 == 0) {
12-
- return &q4_0_8x8_q8_0;
13-
- }
14-
- }
15-
- if (ggml_cpu_has_neon() && ggml_cpu_has_matmul_int8()) {
16-
- if (cur->ne[1] % 4 == 0) {
17-
- return &q4_0_4x8_q8_0;
18-
- }
19-
- }
20-
- if (ggml_cpu_has_neon() && ggml_cpu_has_dotprod()) {
21-
- if (cur->ne[1] % 4 == 0) {
22-
- return &q4_0_4x4_q8_0;
23-
- }
24-
- }
25-
- } else if (cur->type == GGML_TYPE_Q4_K) {
26-
- if (ggml_cpu_has_avx2()) {
27-
- if (cur->ne[1] % 8 == 0) {
28-
- return &q4_K_8x8_q8_K;
29-
- }
30-
- }
31-
- } else if (cur->type == GGML_TYPE_IQ4_NL) {
32-
- if (ggml_cpu_has_neon() && ggml_cpu_has_dotprod()) {
33-
- if (cur->ne[1] % 4 == 0) {
34-
- return &iq4_nl_4x4_q8_0;
35-
- }
36-
- }
37-
- }
38-
+ // if (cur->type == GGML_TYPE_Q4_0) {
39-
+ // if (ggml_cpu_has_avx2() || (ggml_cpu_has_sve() && ggml_cpu_has_matmul_int8() && ggml_cpu_get_sve_cnt() == QK8_0)) {
40-
+ // if (cur->ne[1] % 8 == 0) {
41-
+ // return &q4_0_8x8_q8_0;
42-
+ // }
43-
+ // }
44-
+ // if (ggml_cpu_has_neon() && ggml_cpu_has_matmul_int8()) {
45-
+ // if (cur->ne[1] % 4 == 0) {
46-
+ // return &q4_0_4x8_q8_0;
47-
+ // }
48-
+ // }
49-
+ // if (ggml_cpu_has_neon() && ggml_cpu_has_dotprod()) {
50-
+ // if (cur->ne[1] % 4 == 0) {
51-
+ // return &q4_0_4x4_q8_0;
52-
+ // }
53-
+ // }
54-
+ // } else if (cur->type == GGML_TYPE_Q4_K) {
55-
+ // if (ggml_cpu_has_avx2()) {
56-
+ // if (cur->ne[1] % 8 == 0) {
57-
+ // return &q4_K_8x8_q8_K;
58-
+ // }
59-
+ // }
60-
+ // } else if (cur->type == GGML_TYPE_IQ4_NL) {
61-
+ // if (ggml_cpu_has_neon() && ggml_cpu_has_dotprod()) {
62-
+ // if (cur->ne[1] % 4 == 0) {
63-
+ // return &iq4_nl_4x4_q8_0;
64-
+ // }
65-
+ // }
66-
+ // }
9+
+#ifndef __MVS__
10+
if (cur->type == GGML_TYPE_Q4_0) {
11+
if (ggml_cpu_has_avx2() || (ggml_cpu_has_sve() && ggml_cpu_has_matmul_int8() && ggml_cpu_get_sve_cnt() == QK8_0)) {
12+
if (cur->ne[1] % 8 == 0) {
13+
@@ -1453,6 +1454,9 @@ static const ggml::cpu::tensor_traits * ggml_repack_get_optimal_repack_type(cons
14+
}
15+
}
16+
}
17+
+#endif
6718
+
6819
+ GGML_LOG_DEBUG("REPACK DEBUG: %s: Repacking disabled for tensor '%s' (type %s)\n", __func__, ggml_get_name(cur) ? ggml_get_name(cur) : "unnamed", ggml_type_name(cur->type));
6920

0 commit comments

Comments
 (0)