Skip to content

fix: remove non-ASCII characters #1

fix: remove non-ASCII characters

fix: remove non-ASCII characters #1

Workflow file for this run

name: Test Binaries
on:
pull_request:
branches:
- dev
paths:
- '.github/workflows/test-binaries.yml'
- '.github/workflows/menlo-build.yml
workflow_dispatch:
inputs:
version:
description: 'Version to test (e.g., b5509, b5857)'
required: false

Check failure on line 14 in .github/workflows/test-binaries.yml

View workflow run for this annotation

GitHub Actions / .github/workflows/test-binaries.yml

Invalid workflow file

You have an error in your yaml syntax on line 14
default: 'latest'
jobs:
test-binaries:
runs-on: ${{ matrix.runs-on }}
strategy:
fail-fast: false
matrix:
include:
- os: "linux"
name: "noavx-x64"
runs-on: "ubuntu-20.04"
binary-name: "llama-server"
artifact-name: "llama-linux-noavx-x64"
- os: "linux"
name: "avx-x64"
runs-on: "ubuntu-20.04"
binary-name: "llama-server"
artifact-name: "llama-linux-avx-x64"
- os: "linux"
name: "avx512-x64"
runs-on: "ubuntu-20.04"
binary-name: "llama-server"
artifact-name: "llama-linux-avx512-x64"
- os: "linux"
name: "vulkan-x64"
runs-on: "ubuntu-22.04"
binary-name: "llama-server"
artifact-name: "llama-linux-vulkan-x64"
- os: "macos"
name: "x64"
runs-on: "macos-selfhosted-12"
binary-name: "llama-server"
artifact-name: "llama-macos-x64"
- os: "macos"
name: "arm64"
runs-on: "macos-selfhosted-12-arm64"
binary-name: "llama-server"
artifact-name: "llama-macos-arm64"
- os: "win"
name: "avx2-x64"
runs-on: "windows-cuda-11-7"
binary-name: "llama-server.exe"
artifact-name: "llama-win-avx2-x64"
- os: "win"
name: "noavx-x64"
runs-on: "windows-cuda-11-7"
binary-name: "llama-server.exe"
artifact-name: "llama-win-noavx-x64"
- os: "win"
name: "avx-x64"
runs-on: "windows-cuda-12-0"
binary-name: "llama-server.exe"
artifact-name: "llama-win-avx-x64"
- os: "win"
name: "avx512-x64"
runs-on: "windows-cuda-12-0"
binary-name: "llama-server.exe"
artifact-name: "llama-win-avx512-x64"
- os: "win"
name: "vulkan-x64"
runs-on: "windows-cuda-11-7"
binary-name: "llama-server.exe"
artifact-name: "llama-win-vulkan-x64"
steps:
- name: Checkout
uses: actions/checkout@v3
- name: Download latest artifacts
uses: actions/download-artifact@v4
with:
name: ${{ matrix.artifact-name }}
path: ./artifacts
- name: Extract artifacts
run: |
# Find the tar.gz file and extract it
find ./artifacts -name "*.tar.gz" -exec tar -xzf {} \;
# Move the extracted directory to llama/
find . -maxdepth 1 -type d -name "llama-*" -exec mv {} llama \;
- name: Make binary executable (Linux/macOS)
if: runner.os != 'Windows'
run: |
chmod +x ./llama/build/bin/${{ matrix.binary-name }}
- name: Download test model
run: |
mkdir -p models
curl -L -o models/Lucy-Q4_0.gguf "https://huggingface.co/Menlo/Lucy-gguf/resolve/main/Lucy-Q4_0.gguf"
- name: Test server startup (Linux/macOS)
if: runner.os != 'Windows'
run: |
echo "Testing ${{ matrix.binary-name }} startup..."
timeout 30s ./llama/build/bin/${{ matrix.binary-name }} --model models/Lucy-Q4_0.gguf --server --port 8080 --host 0.0.0.0 &
SERVER_PID=$!
# Wait a bit for server to start
sleep 10
# Test if server is responding
if curl -s http://localhost:8080/health > /dev/null; then
echo "[PASSED] Server started successfully and is responding"
kill $SERVER_PID
exit 0
else
echo "[FAILED] Server failed to start or respond"
kill $SERVER_PID 2>/dev/null || true
exit 1
fi
- name: Test inference (Linux/macOS)
if: runner.os != 'Windows'
run: |
echo "Testing inference with ${{ matrix.binary-name }}..."
# Start server
./llama/build/bin/${{ matrix.binary-name }} --model models/Lucy-Q4_0.gguf --server --port 8080 --host 0.0.0.0 &
SERVER_PID=$!
# Wait for server to start
sleep 15
# Test inference
curl -X POST http://localhost:8080/completion \
-H "Content-Type: application/json" \
-d '{
"prompt": "Hello, how are you?",
"n_predict": 10,
"temperature": 0.7,
"stop": ["\n", "User:", "Assistant:"]
}' > response.json
# Check if we got a valid response
if [ -s response.json ] && grep -q "content" response.json; then
echo "[PASSED] Inference test passed"
cat response.json
kill $SERVER_PID
exit 0
else
echo "[FAILED] Inference test failed"
cat response.json
kill $SERVER_PID 2>/dev/null || true
exit 1
fi
- name: Test server startup (Windows)
if: runner.os == 'Windows'
shell: pwsh
run: |
Write-Host "Testing ${{ matrix.binary-name }} startup..."
# Start server in background
Start-Process -FilePath ".\llama\build\bin\${{ matrix.binary-name }}" -ArgumentList "--model", "models\Lucy-Q4_0.gguf", "--server", "--port", "8080", "--host", "0.0.0.0" -WindowStyle Hidden
# Wait for server to start
Start-Sleep -Seconds 10
# Test if server is responding
try {
$response = Invoke-RestMethod -Uri "http://localhost:8080/health" -Method Get
Write-Host "[PASSED] Server started successfully and is responding"
Stop-Process -Name "llama-server" -Force -ErrorAction SilentlyContinue
exit 0
} catch {
Write-Host "[FAILED] Server failed to start or respond"
Stop-Process -Name "llama-server" -Force -ErrorAction SilentlyContinue
exit 1
}
- name: Test inference (Windows)
if: runner.os == 'Windows'
shell: pwsh
run: |
Write-Host "Testing inference with ${{ matrix.binary-name }}..."
# Start server in background
Start-Process -FilePath ".\llama\build\bin\${{ matrix.binary-name }}" -ArgumentList "--model", "models\Lucy-Q4_0.gguf", "--server", "--port", "8080", "--host", "0.0.0.0" -WindowStyle Hidden
# Wait for server to start
Start-Sleep -Seconds 15
# Test inference
$body = @{
prompt = "Hello, how are you?"
n_predict = 10
temperature = 0.7
stop = @("\n", "User:", "Assistant:")
} | ConvertTo-Json
try {
$response = Invoke-RestMethod -Uri "http://localhost:8080/completion" -Method Post -Body $body -ContentType "application/json"
Write-Host "[PASSED] Inference test passed"
$response | ConvertTo-Json -Depth 10
Stop-Process -Name "llama-server" -Force -ErrorAction SilentlyContinue
exit 0
} catch {
Write-Host "[FAILED] Inference test failed"
Write-Host $_.Exception.Message
Stop-Process -Name "llama-server" -Force -ErrorAction SilentlyContinue
exit 1
}
- name: Upload test results
uses: actions/upload-artifact@v4
if: always()
with:
name: test-results-${{ matrix.os }}-${{ matrix.name }}
path: |
response.json
*.log
retention-days: 1