Skip to content

Commit fa97aad

Browse files
authored
Merge pull request #3 from PESU-IBM-GRM-zOS-AI-Inferencing/main
Port llama.cpp to z/OS
2 parents 4ad096e + 7e4329d commit fa97aad

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

54 files changed

+20025
-402
lines changed

.gitattributes

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
* text working-tree-encoding=ISO8859-1
2-
*.pem text working-tree-encoding=UTF-8
3-
*.png binary
4-
*.gif binary
2+
*.pem text working-tree-encoding=UTF-8
3+
*.png -text
4+
*.gif -text
5+
*.mp4 -text
56
*.pax.Z binary

README.md

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
1-
# Llamacpp
1+
[![Automatic version updates](https://github.com/zopencommunity/llama.cppport/actions/workflows/bump.yml/badge.svg)](https://github.com/ZOSOpenTools/llama.cppport/actions/workflows/bump.yml)
22

3-
A C++ library for writing high-performance network applications
3+
# llama.cpp
4+
5+
Enable AI inferencing on z/os
46

57
# Installation and Usage
68

@@ -27,7 +29,6 @@ See the [zopen porting guide](https://zopen.community/#/Guides/Porting) for more
2729

2830

2931
# Troubleshooting
30-
TBD
3132

3233
# Contributing
33-
Contributions are welcome! Please follow the [zopen contribution guidelines](https://github.com/zopencommunity/meta/blob/main/CONTRIBUTING.md).
34+
Contributions are welcome! Please follow the [zopen contribution guidelines](https://github.com/zopencommunity/meta/blob/main/CONTRIBUTING.md).

buildenv

Lines changed: 34 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -1,21 +1,23 @@
11
export ZOPEN_BUILD_LINE="DEV"
2-
export ZOPEN_STABLE_DEPS="zoslib make cmake"
2+
export ZOPEN_STABLE_DEPS="zoslib make cmake curl"
33
export ZOPEN_DEV_URL="https://github.com/ggerganov/llama.cpp.git"
4-
export ZOPEN_DEV_DEPS="zoslib make cmake"
4+
export ZOPEN_DEV_DEPS="zoslib make cmake curl openssl libssh2 zlib libpsl"
55
export ZOPEN_CATEGORIES="ai"
6-
export ZOPEN_DEV_TAG="master-9e232f0"
7-
export ZOPEN_BUILD_LINE="DEV"
6+
export ZOPEN_DEV_TAG="master"
87
export ZOPEN_NAME="llamacpp-master"
98
export ZOPEN_RUNTIME_DEPS="ncurses"
109

11-
rm -f "llama"
12-
ln -s "llama.cpp" "llama"
13-
ln -s "llama.cpp" $ZOPEN_NAME
10+
# rm -f "llama"
11+
# ln -s "llama.cpp" "llama"
12+
# ln -s "llama.cpp" $ZOPEN_NAME
1413

1514
export ZOPEN_COMP="CLANG"
15+
# set env variables
16+
# export CURL_HOME="/data/zopen/usr/local/zopen/curl/curl"
17+
# export BLAS_HOME="/usr/lpp/cbclib"
1618

1719
export ZOPEN_CONFIGURE="cmake"
18-
export ZOPEN_CONFIGURE_OPTS="-B ../build --install-prefix \"\$ZOPEN_INSTALL_DIR/\" ."
20+
export ZOPEN_CONFIGURE_OPTS="-B ../build --install-prefix \"\$ZOPEN_INSTALL_DIR/\" -DCURL_LIBRARY=\$CURL_HOME/lib/libcurl.a -DCURL_INCLUDE_DIR=\$CURL_HOME/include -DBUILD_SHARED_LIBS_DEFAULT=OFF -DBUILD_SHARED_LIBS=OFF -DGGML_STATIC=ON -DGGML_BACKEND_DL=OFF -DGGML_BLAS=ON -DGGML_BLAS_VENDOR=OpenBLAS -DBLAS_INCLUDE_DIRS=\$BLAS_HOME/include/openblas -DBLAS_LIBRARIES=\$BLAS_HOME/lib/libopenblas.so -DLLAMA_BUILD_TESTS=ON ."
1921

2022
export ZOPEN_MAKE="cmake"
2123
export ZOPEN_MAKE_OPTS="--build ../build --parallel \$ZOPEN_NUM_JOBS --config Release"
@@ -24,25 +26,39 @@ export ZOPEN_MAKE_MINIMAL=Y
2426
export ZOPEN_INSTALL="cmake"
2527
export ZOPEN_INSTALL_OPTS="--install ../build"
2628

27-
# Needs more testing
28-
if false; then
29-
export ZOPEN_EXTRA_CPPFLAGS="-DGGML_USE_OPENBLAS -I /home/itodoro/projects/openblas/openblas/include/openblas -mvx -mzvector -march=z15"
30-
export ZOPEN_EXTRA_LIBS="/home/itodoro/projects/openblas/openblas/libopenblas.x"
31-
fi
29+
export ZOPEN_CHECK="ctest"
30+
export ZOPEN_CHECK_OPTS="--test-dir ../build --output-on-failure"
3231

33-
export ZOPEN_CHECK="skip"
32+
# if false; then
33+
# export ZOPEN_EXTRA_CPPFLAGS="-DGGML_USE_OPENBLAS -I /home/itodoro/projects/openblas/openblas/include/openblas -mvx -mzvector -march=z15"
34+
# export ZOPEN_EXTRA_LIBS="/home/itodoro/projects/openblas/openblas/libopenblas.x"
35+
# fi
3436

3537
zopen_check_results()
3638
{
3739
dir="$1"
3840
pfx="$2"
3941
chk="$1/$2_check.log"
4042

43+
if [[ -f "$chk" ]]; then
44+
total=$(grep -cE "Test #[0-9]+" "$chk")
45+
failed=$(grep -cE "Failed|Subprocess aborted" "$chk")
46+
skipped=$(grep -c "Skipped" "$chk")
47+
passed=$((total - failed - skipped))
48+
else
49+
total=0
50+
passed=0
51+
failed=0
52+
skipped=0
53+
fi
54+
4155
# Echo the following information to gauge build health
42-
echo "actualFailures:0"
43-
echo "totalTests:1"
56+
echo "actualFailures:$failed"
57+
echo "actualPassed:$passed"
58+
echo "actualSkipped:$skipped"
59+
echo "totalTests:$total"
4460
echo "expectedFailures:0"
45-
echo "expectedTotalTests:1"
61+
echo "expectedTotalTests:$total"
4662
}
4763

4864
zopen_append_to_env()
@@ -60,4 +76,4 @@ zopen_get_version()
6076
# Modify to echo the version of your tool/library
6177
# Rather than hardcoding the version, obtain the version by running the tool/library
6278
echo "1.0.0"
63-
}
79+
}

cicd-stable.groovy

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
node('linux')
2+
{
3+
stage ('Poll') {
4+
checkout([
5+
$class: 'GitSCM',
6+
branches: [[name: '*/main']],
7+
doGenerateSubmoduleConfigurations: false,
8+
extensions: [],
9+
userRemoteConfigs: [[url: 'https://github.com/zopencommunity/llamacppport.git']]])
10+
}
11+
stage('Build') {
12+
build job: 'Port-Pipeline', parameters: [string(name: 'PORT_GITHUB_REPO', value: 'https://github.com/zopencommunity/llamacppport.git'), string(name: 'PORT_DESCRIPTION', value: 'Enable AI inferencing on z/os' ), string(name: 'BUILD_LINE', value: 'STABLE') ]
13+
}
14+
}

cicd.groovy

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,6 @@ node('linux')
99
userRemoteConfigs: [[url: 'https://github.com/zopencommunity/llamacppport.git']]])
1010
}
1111
stage('Build') {
12-
build job: 'Port-Pipeline', parameters: [string(name: 'PORT_GITHUB_REPO', value: 'https://github.com/zopencommunity/llamacppport.git'), string(name: 'PORT_DESCRIPTION', value: "Port of Facebook's LLaMA model in C/C++" )]
12+
build job: 'Port-Pipeline', parameters: [string(name: 'PORT_GITHUB_REPO', value: 'https://github.com/zopencommunity/llamacppport.git'), string(name: 'PORT_DESCRIPTION', value: 'Enable AI inferencing on z/os' ), string(name: 'BUILD_LINE', value: 'DEV') ]
1313
}
14-
}
14+
}

examples/README.md

Lines changed: 99 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,99 @@
1+
# zOpen Chat
2+
3+
## Description
4+
**zOpen Chat** is a web-based interface that enables natural language interaction with AI models (like LLaMA 3.2 and Granite 3), designed to explore and work with open-source tools in the z/OS ecosystem.
5+
6+
This project features:
7+
- A **web-based interface** for natural interactions.
8+
- Integration with **llamacpp** backend for inferencing.
9+
- Built-in tools to **search GitHub repositories** relevant to z/OS and files present in the **local file system**.
10+
11+
## Use cases covered
12+
1. **Chat**: A conversational Q&A system where users can ask questions and receive clear, concise answers.
13+
2. **Explain Code**: Request short, contextual explanations for specific code files or components.
14+
3. **Generate Tests**: Ask the system to generate unit tests or test cases for specific files using natural language prompts.
15+
16+
The files can be extracted from the repositories in `zopencommunity` or the `local file system`.
17+
> For usage examples and UI walkthroughs, see `docs/WEBUI.md`
18+
<!-- > Video Demonstration in `docs/videos/Final Use Cases Demo.mp4` -->
19+
20+
## Prerequisites
21+
22+
Before running zOpen Chat, ensure that the following are set up:
23+
- **llamacpp**: from the llamacpp port of z/OS. [Repository Link](https://github.com/zopencommunity/llamacppport)
24+
- **Node.js (LTS)**: [Download Node.js supported by z/OS](https://www.ibm.com/products/sdk-nodejs-compiler-zos)
25+
- **npm**: Comes with Node.js
26+
27+
## Workflow
28+
29+
The **Model Context Protocol (MCP)** is implemented here to orchestrate the routing in this system. It handles user inputs, tool management, and communication between the client interface and the underlying LLM infrastructure.
30+
31+
- The client UI allows users to interact with the system via options like:
32+
- Chat
33+
- Explain Code
34+
- Generate Test Cases
35+
36+
- These requests are sent to the MCP Server, which serves as the orchestrator.
37+
38+
- The MCP Server forwards the request to a Llama Server hosting the Large Language Model (LLM).
39+
40+
- The LLM processes the input and returns a response to the MCP Server.
41+
42+
- The response is routed back to the client UI, completing the workflow loop.
43+
44+
> Folder structure details are explained in `docs/STRUCTURE.md`
45+
46+
![Workflow](docs/images/workflow.png)
47+
48+
## Setup
49+
50+
### 1. Clone the Repository
51+
52+
```bash
53+
git clone https://github.com/zopencommunity/llamacppport.git
54+
cd examples
55+
```
56+
57+
### 2. Run LLaMA Inference Server
58+
Before starting any other servers, make sure the `llama-server` is running in the background.
59+
```bash
60+
llama-server -m /data/work/ai/models/granite-3.0-1b-a400m-instruct-be.Q4_K_M.gguf \
61+
--host 127.0.0.1 \
62+
--port 21099 \
63+
--no-mmap \
64+
--threads 1 \
65+
-v
66+
```
67+
This command starts the LLaMA inference engine. Let it run in the background.
68+
69+
### 3. Python Backend
70+
Create a virtual environment and activate it
71+
```bash
72+
python -m venv venv
73+
source venv/bin/activate
74+
```
75+
76+
Install the dependencies
77+
``` bash
78+
cd backend
79+
pip install -r requirements.txt
80+
```
81+
82+
Once the dependencies are installed run the Flask app
83+
```bash
84+
python app.py
85+
```
86+
This will start the Flask server on `http://127.0.0.1:21098`. To change the port, update the relevant port configuration in the `frontend/src/config.js` code.
87+
88+
### 4. Frontend (Website)
89+
Install the node modules
90+
```bash
91+
cd frontend
92+
npm install
93+
```
94+
95+
Start the website
96+
```bash
97+
npm start
98+
```
99+
The website can be accessed by going to `http://127.0.0.1:21097` on your web browser!

examples/backend/access_github.py

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
import requests
2+
import base64
3+
import dotenv
4+
import os
5+
import re
6+
7+
dotenv.load_dotenv()
8+
GITHUB_TOKEN = os.getenv("GITHUB_TOKEN")
9+
if not GITHUB_TOKEN:
10+
raise Exception("Set your GitHub Token from https://github.com/settings/tokens")
11+
12+
def parse_input(prompt):
13+
match = re.search("explain\s+(.*?)\s+(from|of|in)\s+([^\s]+)", prompt)
14+
if match:
15+
file, _, repo = match.groups()
16+
return file.strip(), repo.strip()
17+
18+
return "Malformed prompt received. Please give the correct prompt."
19+
20+
def get_file_info(file, repo, org='zopencommunity'):
21+
api_url = f"https://api.github.com/repos/{org}/{repo}/contents/{file}"
22+
headers = {"Authentication": GITHUB_TOKEN}
23+
try:
24+
res = requests.get(api_url, headers=headers)
25+
res.raise_for_status()
26+
data= res.json()
27+
if data.get("encoding") == "base64":
28+
return base64.b64decode(data["content"]).decode('utf-8', errors='ignore')
29+
return data.get("content","")
30+
except Exception as e:
31+
return f"Exception: Could not fetch {file} from {repo} \n{e}"

0 commit comments

Comments
 (0)