1+ name : Test Binaries
2+
3+ on :
4+
5+ pull_request :
6+ branches :
7+ - dev
8+ paths :
9+ - ' .github/workflows/test-binaries.yml'
10+ - ' .github/workflows/menlo-build.yml
11+ workflow_dispatch:
12+ inputs:
13+ version:
14+ description: ' Version to test (e.g., b5509, b5857)'
15+ required : false
16+ default : ' latest'
17+
18+ jobs :
19+ test-binaries :
20+ runs-on : ${{ matrix.runs-on }}
21+ strategy :
22+ fail-fast : false
23+ matrix :
24+ include :
25+ - os : " linux"
26+ name : " noavx-x64"
27+ runs-on : " ubuntu-20.04"
28+ binary-name : " llama-server"
29+ artifact-name : " llama-linux-noavx-x64"
30+ - os : " linux"
31+ name : " avx-x64"
32+ runs-on : " ubuntu-20.04"
33+ binary-name : " llama-server"
34+ artifact-name : " llama-linux-avx-x64"
35+ - os : " linux"
36+ name : " avx512-x64"
37+ runs-on : " ubuntu-20.04"
38+ binary-name : " llama-server"
39+ artifact-name : " llama-linux-avx512-x64"
40+ - os : " linux"
41+ name : " vulkan-x64"
42+ runs-on : " ubuntu-22.04"
43+ binary-name : " llama-server"
44+ artifact-name : " llama-linux-vulkan-x64"
45+ - os : " macos"
46+ name : " x64"
47+ runs-on : " macos-selfhosted-12"
48+ binary-name : " llama-server"
49+ artifact-name : " llama-macos-x64"
50+ - os : " macos"
51+ name : " arm64"
52+ runs-on : " macos-selfhosted-12-arm64"
53+ binary-name : " llama-server"
54+ artifact-name : " llama-macos-arm64"
55+ - os : " win"
56+ name : " avx2-x64"
57+ runs-on : " windows-cuda-11-7"
58+ binary-name : " llama-server.exe"
59+ artifact-name : " llama-win-avx2-x64"
60+ - os : " win"
61+ name : " noavx-x64"
62+ runs-on : " windows-cuda-11-7"
63+ binary-name : " llama-server.exe"
64+ artifact-name : " llama-win-noavx-x64"
65+ - os : " win"
66+ name : " avx-x64"
67+ runs-on : " windows-cuda-12-0"
68+ binary-name : " llama-server.exe"
69+ artifact-name : " llama-win-avx-x64"
70+ - os : " win"
71+ name : " avx512-x64"
72+ runs-on : " windows-cuda-12-0"
73+ binary-name : " llama-server.exe"
74+ artifact-name : " llama-win-avx512-x64"
75+ - os : " win"
76+ name : " vulkan-x64"
77+ runs-on : " windows-cuda-11-7"
78+ binary-name : " llama-server.exe"
79+ artifact-name : " llama-win-vulkan-x64"
80+
81+ steps :
82+ - name : Checkout
83+ uses : actions/checkout@v3
84+
85+ - name : Download latest artifacts
86+ uses : actions/download-artifact@v4
87+ with :
88+ name : ${{ matrix.artifact-name }}
89+ path : ./artifacts
90+
91+ - name : Extract artifacts
92+ run : |
93+ # Find the tar.gz file and extract it
94+ find ./artifacts -name "*.tar.gz" -exec tar -xzf {} \;
95+
96+ # Move the extracted directory to llama/
97+ find . -maxdepth 1 -type d -name "llama-*" -exec mv {} llama \;
98+
99+ - name : Make binary executable (Linux/macOS)
100+ if : runner.os != 'Windows'
101+ run : |
102+ chmod +x ./llama/build/bin/${{ matrix.binary-name }}
103+
104+ - name : Download test model
105+ run : |
106+ mkdir -p models
107+ curl -L -o models/Lucy-Q4_0.gguf "https://huggingface.co/Menlo/Lucy-gguf/resolve/main/Lucy-Q4_0.gguf"
108+
109+ - name : Test server startup (Linux/macOS)
110+ if : runner.os != 'Windows'
111+ run : |
112+ echo "Testing ${{ matrix.binary-name }} startup..."
113+ timeout 30s ./llama/build/bin/${{ matrix.binary-name }} --model models/Lucy-Q4_0.gguf --server --port 8080 --host 0.0.0.0 &
114+ SERVER_PID=$!
115+
116+ # Wait a bit for server to start
117+ sleep 10
118+
119+ # Test if server is responding
120+ if curl -s http://localhost:8080/health > /dev/null; then
121+ echo "✅ Server started successfully and is responding"
122+ kill $SERVER_PID
123+ exit 0
124+ else
125+ echo "❌ Server failed to start or respond"
126+ kill $SERVER_PID 2>/dev/null || true
127+ exit 1
128+ fi
129+
130+ - name : Test inference (Linux/macOS)
131+ if : runner.os != 'Windows'
132+ run : |
133+ echo "Testing inference with ${{ matrix.binary-name }}..."
134+
135+ # Start server
136+ ./llama/build/bin/${{ matrix.binary-name }} --model models/Lucy-Q4_0.gguf --server --port 8080 --host 0.0.0.0 &
137+ SERVER_PID=$!
138+
139+ # Wait for server to start
140+ sleep 15
141+
142+ # Test inference
143+ curl -X POST http://localhost:8080/completion \
144+ -H "Content-Type: application/json" \
145+ -d '{
146+ "prompt": "Hello, how are you?",
147+ "n_predict": 10,
148+ "temperature": 0.7,
149+ "stop": ["\n", "User:", "Assistant:"]
150+ }' > response.json
151+
152+ # Check if we got a valid response
153+ if [ -s response.json ] && grep -q "content" response.json; then
154+ echo "✅ Inference test passed"
155+ cat response.json
156+ kill $SERVER_PID
157+ exit 0
158+ else
159+ echo "❌ Inference test failed"
160+ cat response.json
161+ kill $SERVER_PID 2>/dev/null || true
162+ exit 1
163+ fi
164+
165+ - name : Test server startup (Windows)
166+ if : runner.os == 'Windows'
167+ shell : pwsh
168+ run : |
169+ Write-Host "Testing ${{ matrix.binary-name }} startup..."
170+
171+ # Start server in background
172+ Start-Process -FilePath ".\llama\build\bin\${{ matrix.binary-name }}" -ArgumentList "--model", "models\Lucy-Q4_0.gguf", "--server", "--port", "8080", "--host", "0.0.0.0" -WindowStyle Hidden
173+
174+ # Wait for server to start
175+ Start-Sleep -Seconds 10
176+
177+ # Test if server is responding
178+ try {
179+ $response = Invoke-RestMethod -Uri "http://localhost:8080/health" -Method Get
180+ Write-Host "✅ Server started successfully and is responding"
181+ Stop-Process -Name "llama-server" -Force -ErrorAction SilentlyContinue
182+ exit 0
183+ } catch {
184+ Write-Host "❌ Server failed to start or respond"
185+ Stop-Process -Name "llama-server" -Force -ErrorAction SilentlyContinue
186+ exit 1
187+ }
188+
189+ - name : Test inference (Windows)
190+ if : runner.os == 'Windows'
191+ shell : pwsh
192+ run : |
193+ Write-Host "Testing inference with ${{ matrix.binary-name }}..."
194+
195+ # Start server in background
196+ Start-Process -FilePath ".\llama\build\bin\${{ matrix.binary-name }}" -ArgumentList "--model", "models\Lucy-Q4_0.gguf", "--server", "--port", "8080", "--host", "0.0.0.0" -WindowStyle Hidden
197+
198+ # Wait for server to start
199+ Start-Sleep -Seconds 15
200+
201+ # Test inference
202+ $body = @{
203+ prompt = "Hello, how are you?"
204+ n_predict = 10
205+ temperature = 0.7
206+ stop = @("\n", "User:", "Assistant:")
207+ } | ConvertTo-Json
208+
209+ try {
210+ $response = Invoke-RestMethod -Uri "http://localhost:8080/completion" -Method Post -Body $body -ContentType "application/json"
211+ Write-Host "✅ Inference test passed"
212+ $response | ConvertTo-Json -Depth 10
213+ Stop-Process -Name "llama-server" -Force -ErrorAction SilentlyContinue
214+ exit 0
215+ } catch {
216+ Write-Host "❌ Inference test failed"
217+ Write-Host $_.Exception.Message
218+ Stop-Process -Name "llama-server" -Force -ErrorAction SilentlyContinue
219+ exit 1
220+ }
221+
222+ - name : Upload test results
223+ uses : actions/upload-artifact@v4
224+ if : always()
225+ with :
226+ name : test-results-${{ matrix.os }}-${{ matrix.name }}
227+ path : |
228+ response.json
229+ *.log
230+ retention-days : 1
0 commit comments