Skip to content

Commit 3fb72a6

Browse files
committed
ci: add test binaries workflow
1 parent 7c3fd44 commit 3fb72a6

File tree

1 file changed

+230
-0
lines changed

1 file changed

+230
-0
lines changed
Lines changed: 230 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,230 @@
1+
name: Test Binaries
2+
3+
on:
4+
5+
pull_request:
6+
branches:
7+
- dev
8+
paths:
9+
- '.github/workflows/test-binaries.yml'
10+
- '.github/workflows/menlo-build.yml
11+
workflow_dispatch:
12+
inputs:
13+
version:
14+
description: 'Version to test (e.g., b5509, b5857)'
15+
required: false
16+
default: 'latest'
17+
18+
jobs:
19+
test-binaries:
20+
runs-on: ${{ matrix.runs-on }}
21+
strategy:
22+
fail-fast: false
23+
matrix:
24+
include:
25+
- os: "linux"
26+
name: "noavx-x64"
27+
runs-on: "ubuntu-20.04"
28+
binary-name: "llama-server"
29+
artifact-name: "llama-linux-noavx-x64"
30+
- os: "linux"
31+
name: "avx-x64"
32+
runs-on: "ubuntu-20.04"
33+
binary-name: "llama-server"
34+
artifact-name: "llama-linux-avx-x64"
35+
- os: "linux"
36+
name: "avx512-x64"
37+
runs-on: "ubuntu-20.04"
38+
binary-name: "llama-server"
39+
artifact-name: "llama-linux-avx512-x64"
40+
- os: "linux"
41+
name: "vulkan-x64"
42+
runs-on: "ubuntu-22.04"
43+
binary-name: "llama-server"
44+
artifact-name: "llama-linux-vulkan-x64"
45+
- os: "macos"
46+
name: "x64"
47+
runs-on: "macos-selfhosted-12"
48+
binary-name: "llama-server"
49+
artifact-name: "llama-macos-x64"
50+
- os: "macos"
51+
name: "arm64"
52+
runs-on: "macos-selfhosted-12-arm64"
53+
binary-name: "llama-server"
54+
artifact-name: "llama-macos-arm64"
55+
- os: "win"
56+
name: "avx2-x64"
57+
runs-on: "windows-cuda-11-7"
58+
binary-name: "llama-server.exe"
59+
artifact-name: "llama-win-avx2-x64"
60+
- os: "win"
61+
name: "noavx-x64"
62+
runs-on: "windows-cuda-11-7"
63+
binary-name: "llama-server.exe"
64+
artifact-name: "llama-win-noavx-x64"
65+
- os: "win"
66+
name: "avx-x64"
67+
runs-on: "windows-cuda-12-0"
68+
binary-name: "llama-server.exe"
69+
artifact-name: "llama-win-avx-x64"
70+
- os: "win"
71+
name: "avx512-x64"
72+
runs-on: "windows-cuda-12-0"
73+
binary-name: "llama-server.exe"
74+
artifact-name: "llama-win-avx512-x64"
75+
- os: "win"
76+
name: "vulkan-x64"
77+
runs-on: "windows-cuda-11-7"
78+
binary-name: "llama-server.exe"
79+
artifact-name: "llama-win-vulkan-x64"
80+
81+
steps:
82+
- name: Checkout
83+
uses: actions/checkout@v3
84+
85+
- name: Download latest artifacts
86+
uses: actions/download-artifact@v4
87+
with:
88+
name: ${{ matrix.artifact-name }}
89+
path: ./artifacts
90+
91+
- name: Extract artifacts
92+
run: |
93+
# Find the tar.gz file and extract it
94+
find ./artifacts -name "*.tar.gz" -exec tar -xzf {} \;
95+
96+
# Move the extracted directory to llama/
97+
find . -maxdepth 1 -type d -name "llama-*" -exec mv {} llama \;
98+
99+
- name: Make binary executable (Linux/macOS)
100+
if: runner.os != 'Windows'
101+
run: |
102+
chmod +x ./llama/build/bin/${{ matrix.binary-name }}
103+
104+
- name: Download test model
105+
run: |
106+
mkdir -p models
107+
curl -L -o models/Lucy-Q4_0.gguf "https://huggingface.co/Menlo/Lucy-gguf/resolve/main/Lucy-Q4_0.gguf"
108+
109+
- name: Test server startup (Linux/macOS)
110+
if: runner.os != 'Windows'
111+
run: |
112+
echo "Testing ${{ matrix.binary-name }} startup..."
113+
timeout 30s ./llama/build/bin/${{ matrix.binary-name }} --model models/Lucy-Q4_0.gguf --server --port 8080 --host 0.0.0.0 &
114+
SERVER_PID=$!
115+
116+
# Wait a bit for server to start
117+
sleep 10
118+
119+
# Test if server is responding
120+
if curl -s http://localhost:8080/health > /dev/null; then
121+
echo "✅ Server started successfully and is responding"
122+
kill $SERVER_PID
123+
exit 0
124+
else
125+
echo "❌ Server failed to start or respond"
126+
kill $SERVER_PID 2>/dev/null || true
127+
exit 1
128+
fi
129+
130+
- name: Test inference (Linux/macOS)
131+
if: runner.os != 'Windows'
132+
run: |
133+
echo "Testing inference with ${{ matrix.binary-name }}..."
134+
135+
# Start server
136+
./llama/build/bin/${{ matrix.binary-name }} --model models/Lucy-Q4_0.gguf --server --port 8080 --host 0.0.0.0 &
137+
SERVER_PID=$!
138+
139+
# Wait for server to start
140+
sleep 15
141+
142+
# Test inference
143+
curl -X POST http://localhost:8080/completion \
144+
-H "Content-Type: application/json" \
145+
-d '{
146+
"prompt": "Hello, how are you?",
147+
"n_predict": 10,
148+
"temperature": 0.7,
149+
"stop": ["\n", "User:", "Assistant:"]
150+
}' > response.json
151+
152+
# Check if we got a valid response
153+
if [ -s response.json ] && grep -q "content" response.json; then
154+
echo "✅ Inference test passed"
155+
cat response.json
156+
kill $SERVER_PID
157+
exit 0
158+
else
159+
echo "❌ Inference test failed"
160+
cat response.json
161+
kill $SERVER_PID 2>/dev/null || true
162+
exit 1
163+
fi
164+
165+
- name: Test server startup (Windows)
166+
if: runner.os == 'Windows'
167+
shell: pwsh
168+
run: |
169+
Write-Host "Testing ${{ matrix.binary-name }} startup..."
170+
171+
# Start server in background
172+
Start-Process -FilePath ".\llama\build\bin\${{ matrix.binary-name }}" -ArgumentList "--model", "models\Lucy-Q4_0.gguf", "--server", "--port", "8080", "--host", "0.0.0.0" -WindowStyle Hidden
173+
174+
# Wait for server to start
175+
Start-Sleep -Seconds 10
176+
177+
# Test if server is responding
178+
try {
179+
$response = Invoke-RestMethod -Uri "http://localhost:8080/health" -Method Get
180+
Write-Host "✅ Server started successfully and is responding"
181+
Stop-Process -Name "llama-server" -Force -ErrorAction SilentlyContinue
182+
exit 0
183+
} catch {
184+
Write-Host "❌ Server failed to start or respond"
185+
Stop-Process -Name "llama-server" -Force -ErrorAction SilentlyContinue
186+
exit 1
187+
}
188+
189+
- name: Test inference (Windows)
190+
if: runner.os == 'Windows'
191+
shell: pwsh
192+
run: |
193+
Write-Host "Testing inference with ${{ matrix.binary-name }}..."
194+
195+
# Start server in background
196+
Start-Process -FilePath ".\llama\build\bin\${{ matrix.binary-name }}" -ArgumentList "--model", "models\Lucy-Q4_0.gguf", "--server", "--port", "8080", "--host", "0.0.0.0" -WindowStyle Hidden
197+
198+
# Wait for server to start
199+
Start-Sleep -Seconds 15
200+
201+
# Test inference
202+
$body = @{
203+
prompt = "Hello, how are you?"
204+
n_predict = 10
205+
temperature = 0.7
206+
stop = @("\n", "User:", "Assistant:")
207+
} | ConvertTo-Json
208+
209+
try {
210+
$response = Invoke-RestMethod -Uri "http://localhost:8080/completion" -Method Post -Body $body -ContentType "application/json"
211+
Write-Host "✅ Inference test passed"
212+
$response | ConvertTo-Json -Depth 10
213+
Stop-Process -Name "llama-server" -Force -ErrorAction SilentlyContinue
214+
exit 0
215+
} catch {
216+
Write-Host "❌ Inference test failed"
217+
Write-Host $_.Exception.Message
218+
Stop-Process -Name "llama-server" -Force -ErrorAction SilentlyContinue
219+
exit 1
220+
}
221+
222+
- name: Upload test results
223+
uses: actions/upload-artifact@v4
224+
if: always()
225+
with:
226+
name: test-results-${{ matrix.os }}-${{ matrix.name }}
227+
path: |
228+
response.json
229+
*.log
230+
retention-days: 1

0 commit comments

Comments
 (0)