ci: add test binaries workflow #3
Workflow file for this run
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| name: Test Binaries | |
| on: | |
| pull_request: | |
| branches: | |
| - dev | |
| paths: | |
| - '.github/workflows/test-binaries.yml' | |
| - '.github/workflows/menlo-build.yml' | |
| workflow_dispatch: | |
| inputs: | |
| version: | |
| description: 'Version to test' | |
| required: false | |
| default: 'b5857' | |
| env: | |
| TEST_VERSION: 'b5857' | |
| jobs: | |
| test-binaries: | |
| runs-on: ${{ matrix.runs-on }} | |
| timeout-minutes: 30 | |
| strategy: | |
| fail-fast: false | |
| matrix: | |
| include: | |
| - os: "linux" | |
| name: "noavx-x64" | |
| runs-on: "ubuntu-20.04" | |
| binary-name: "llama-server" | |
| artifact-name: "llama-linux-noavx-x64" | |
| - os: "linux" | |
| name: "avx-x64" | |
| runs-on: "ubuntu-20.04" | |
| binary-name: "llama-server" | |
| artifact-name: "llama-linux-avx-x64" | |
| - os: "linux" | |
| name: "avx512-x64" | |
| runs-on: "ubuntu-20.04" | |
| binary-name: "llama-server" | |
| artifact-name: "llama-linux-avx512-x64" | |
| - os: "linux" | |
| name: "vulkan-x64" | |
| runs-on: "ubuntu-22.04" | |
| binary-name: "llama-server" | |
| artifact-name: "llama-linux-vulkan-x64" | |
| - os: "macos" | |
| name: "x64" | |
| runs-on: "macos-selfhosted-12" | |
| binary-name: "llama-server" | |
| artifact-name: "llama-macos-x64" | |
| - os: "macos" | |
| name: "arm64" | |
| runs-on: "macos-selfhosted-12-arm64" | |
| binary-name: "llama-server" | |
| artifact-name: "llama-macos-arm64" | |
| - os: "win" | |
| name: "noavx-x64" | |
| runs-on: "windows-latest" | |
| binary-name: "llama-server.exe" | |
| artifact-name: "llama-win-noavx-x64" | |
| - os: "win" | |
| name: "avx-x64" | |
| runs-on: "windows-latest" | |
| binary-name: "llama-server.exe" | |
| artifact-name: "llama-win-avx-x64" | |
| - os: "win" | |
| name: "avx2-x64" | |
| runs-on: "windows-latest" | |
| binary-name: "llama-server.exe" | |
| artifact-name: "llama-win-avx2-x64" | |
| - os: "win" | |
| name: "avx512-x64" | |
| runs-on: "windows-latest" | |
| binary-name: "llama-server.exe" | |
| artifact-name: "llama-win-avx512-x64" | |
| - os: "win" | |
| name: "vulkan-x64" | |
| runs-on: "windows-latest" | |
| binary-name: "llama-server.exe" | |
| artifact-name: "llama-win-vulkan-x64" | |
| steps: | |
| - name: Checkout | |
| uses: actions/checkout@v3 | |
| - name: Install jq (macOS) | |
| if: runner.os == 'macOS' | |
| run: | | |
| if ! command -v jq &> /dev/null; then | |
| echo "Installing jq..." | |
| brew install jq | |
| else | |
| echo "jq already installed" | |
| fi | |
| - name: Show testing version | |
| run: | | |
| echo "Testing hardcoded version: ${{ env.TEST_VERSION }}" | |
| echo "This will download binaries from release: ${{ env.TEST_VERSION }}" | |
| - name: Download release binaries (Linux/macOS) | |
| if: runner.os != 'Windows' | |
| env: | |
| GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} | |
| run: | | |
| # Download the specific release binary for this matrix combination | |
| RELEASE_TAG="${{ env.TEST_VERSION }}" | |
| ASSET_NAME="llama-${RELEASE_TAG}-bin-${{ matrix.os }}-${{ matrix.name }}.tar.gz" | |
| echo "Downloading asset: $ASSET_NAME" | |
| # Get download URL for the asset | |
| DOWNLOAD_URL=$(curl -s -H "Authorization: token $GITHUB_TOKEN" \ | |
| "https://api.github.com/repos/${{ github.repository }}/releases/tags/$RELEASE_TAG" | \ | |
| jq -r --arg asset_name "$ASSET_NAME" '.assets[] | select(.name == $asset_name) | .browser_download_url') | |
| if [ "$DOWNLOAD_URL" = "null" ] || [ -z "$DOWNLOAD_URL" ]; then | |
| echo "Asset $ASSET_NAME not found in release $RELEASE_TAG" | |
| echo "Available assets:" | |
| curl -s -H "Authorization: token $GITHUB_TOKEN" \ | |
| "https://api.github.com/repos/${{ github.repository }}/releases/tags/$RELEASE_TAG" | \ | |
| jq -r '.assets[].name' | |
| exit 1 | |
| fi | |
| # Download the binary | |
| mkdir -p artifacts | |
| curl -L -H "Authorization: token $GITHUB_TOKEN" \ | |
| -o "artifacts/binary.tar.gz" \ | |
| "$DOWNLOAD_URL" | |
| echo "Downloaded binary successfully" | |
| ls -la artifacts/ | |
| - name: Download release binaries (Windows) | |
| if: runner.os == 'Windows' | |
| shell: pwsh | |
| env: | |
| GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} | |
| run: | | |
| # Download the specific release binary for this matrix combination | |
| $releaseTag = "${{ env.TEST_VERSION }}" | |
| $assetName = "llama-$releaseTag-bin-${{ matrix.os }}-${{ matrix.name }}.tar.gz" | |
| Write-Host "Downloading asset: $assetName" | |
| # Get release info | |
| $headers = @{ | |
| 'Authorization' = "token $env:GITHUB_TOKEN" | |
| 'Accept' = 'application/vnd.github.v3+json' | |
| } | |
| $releaseUrl = "https://api.github.com/repos/${{ github.repository }}/releases/tags/$releaseTag" | |
| $release = Invoke-RestMethod -Uri $releaseUrl -Headers $headers | |
| # Find the asset | |
| $asset = $release.assets | Where-Object { $_.name -eq $assetName } | |
| if (-not $asset) { | |
| Write-Host "Asset $assetName not found in release $releaseTag" | |
| Write-Host "Available assets:" | |
| $release.assets | ForEach-Object { Write-Host $_.name } | |
| exit 1 | |
| } | |
| # Download the binary | |
| New-Item -ItemType Directory -Force -Path "artifacts" | |
| Invoke-WebRequest -Uri $asset.browser_download_url -OutFile "artifacts\binary.tar.gz" -Headers $headers | |
| Write-Host "Downloaded binary successfully" | |
| Get-ChildItem -Path "artifacts" | |
| - name: Extract artifacts (Linux/macOS) | |
| if: runner.os != 'Windows' | |
| run: | | |
| echo "Extracting binary for ${{ matrix.artifact-name }}..." | |
| cd artifacts | |
| tar -xzf binary.tar.gz | |
| cd .. | |
| # List what we extracted | |
| ls -la ./ | |
| find . -name "*llama*" -type d | head -5 | |
| # Find and create standardized structure | |
| if [ -d "llama" ]; then | |
| echo "Found llama directory" | |
| else | |
| # Move extracted directory to llama/ | |
| find . -maxdepth 2 -type d -name "*llama*" -exec mv {} llama \; || true | |
| # Alternative: if extraction creates different structure | |
| if [ ! -d "llama" ]; then | |
| mkdir -p llama | |
| find artifacts/ -name "*.tar.gz" -exec tar -xzf {} -C llama \; | |
| fi | |
| fi | |
| # Verify binary location | |
| find . -name "${{ matrix.binary-name }}" -type f | head -5 | |
| - name: Extract artifacts (Windows) | |
| if: runner.os == 'Windows' | |
| shell: pwsh | |
| run: | | |
| Write-Host "Extracting binary for ${{ matrix.artifact-name }}..." | |
| # Extract using tar | |
| Set-Location artifacts | |
| tar -xzf binary.tar.gz | |
| Set-Location .. | |
| # List what we have | |
| Get-ChildItem -Recurse | Where-Object {$_.Name -like "*llama*"} | Select-Object -First 10 | |
| # Find the binary | |
| $binaryPath = Get-ChildItem -Recurse -Name "${{ matrix.binary-name }}" | Select-Object -First 1 | |
| if ($binaryPath) { | |
| Write-Host "Found binary at: $binaryPath" | |
| } else { | |
| Write-Host "Binary not found, listing all .exe files:" | |
| Get-ChildItem -Recurse -Filter "*.exe" | Select-Object -First 5 | |
| } | |
| - name: Make binary executable (Linux/macOS) | |
| if: runner.os != 'Windows' | |
| run: | | |
| # Find the actual binary location | |
| BINARY_PATH=$(find . -name "${{ matrix.binary-name }}" -type f | head -1) | |
| if [ -n "$BINARY_PATH" ]; then | |
| chmod +x "$BINARY_PATH" | |
| echo "Made executable: $BINARY_PATH" | |
| # Create symlink for consistent path | |
| mkdir -p llama/build/bin | |
| ln -sf "$(realpath $BINARY_PATH)" llama/build/bin/${{ matrix.binary-name }} | |
| else | |
| echo "Binary not found!" | |
| echo "Available files:" | |
| find . -type f -name "*server*" | head -10 | |
| exit 1 | |
| fi | |
| - name: Setup binary path (Windows) | |
| if: runner.os == 'Windows' | |
| shell: pwsh | |
| run: | | |
| # Find the actual binary | |
| $binaryPath = Get-ChildItem -Recurse -Name "${{ matrix.binary-name }}" | Select-Object -First 1 | |
| if ($binaryPath) { | |
| # Create standardized directory structure | |
| New-Item -ItemType Directory -Force -Path "llama\build\bin" | |
| Copy-Item $binaryPath "llama\build\bin\${{ matrix.binary-name }}" | |
| Write-Host "Binary copied to: llama\build\bin\${{ matrix.binary-name }}" | |
| } else { | |
| Write-Host "Binary not found!" | |
| Write-Host "Available files:" | |
| Get-ChildItem -Recurse -Filter "*server*" | Select-Object -First 10 | |
| exit 1 | |
| } | |
| - name: Download test model | |
| run: | | |
| mkdir -p models | |
| curl -L -o models/Lucy-Q4_0.gguf "https://huggingface.co/Menlo/Lucy-gguf/resolve/main/Lucy-Q4_0.gguf" | |
| - name: Verify binary (Linux/macOS) | |
| if: runner.os != 'Windows' | |
| run: | | |
| echo "Testing binary basic functionality..." | |
| ./llama/build/bin/${{ matrix.binary-name }} --version || echo "Version check completed" | |
| echo "Available arguments:" | |
| ./llama/build/bin/${{ matrix.binary-name }} --help || echo "Help check completed" | |
| - name: Verify binary (Windows) | |
| if: runner.os == 'Windows' | |
| shell: pwsh | |
| run: | | |
| Write-Host "Testing binary basic functionality..." | |
| try { | |
| .\llama\build\bin\${{ matrix.binary-name }} --version | |
| } catch { | |
| Write-Host "Version check completed with exit code: $LASTEXITCODE" | |
| } | |
| Write-Host "Available arguments:" | |
| try { | |
| .\llama\build\bin\${{ matrix.binary-name }} --help | |
| } catch { | |
| Write-Host "Help check completed with exit code: $LASTEXITCODE" | |
| } | |
| - name: Test server startup (Linux/macOS) | |
| if: runner.os != 'Windows' | |
| timeout-minutes: 5 | |
| run: | | |
| echo "Testing ${{ matrix.binary-name }} server startup..." | |
| # Try different server argument formats | |
| # Format 1: --server (newer versions) | |
| ./llama/build/bin/${{ matrix.binary-name }} \ | |
| --model models/Lucy-Q4_0.gguf \ | |
| --server --port 8080 --host 127.0.0.1 \ | |
| --n-gpu-layers 0 \ | |
| --ctx-size 512 & | |
| SERVER_PID=$! | |
| echo "Server PID: $SERVER_PID" | |
| # Wait briefly to check if server started correctly | |
| sleep 3 | |
| # Check if process is still running | |
| if ! kill -0 $SERVER_PID 2>/dev/null; then | |
| echo "Server process died, trying alternative format..." | |
| # Format 2: -s (older versions or different build) | |
| ./llama/build/bin/${{ matrix.binary-name }} \ | |
| -m models/Lucy-Q4_0.gguf \ | |
| -s -p 8080 --host 127.0.0.1 \ | |
| --n-gpu-layers 0 \ | |
| -c 512 & | |
| SERVER_PID=$! | |
| sleep 3 | |
| if ! kill -0 $SERVER_PID 2>/dev/null; then | |
| echo "Alternative format also failed, trying simple format..." | |
| # Format 3: Simple format | |
| ./llama/build/bin/${{ matrix.binary-name }} \ | |
| -m models/Lucy-Q4_0.gguf \ | |
| --port 8080 --host 127.0.0.1 & | |
| SERVER_PID=$! | |
| sleep 3 | |
| fi | |
| fi | |
| # Wait for server to start with better error handling | |
| for i in {1..30}; do | |
| if curl -s http://127.0.0.1:8080/health > /dev/null 2>&1; then | |
| echo "[PASSED] Server started successfully and is responding" | |
| kill $SERVER_PID 2>/dev/null || true | |
| exit 0 | |
| fi | |
| echo "Attempt $i/30 - waiting for server..." | |
| sleep 2 | |
| done | |
| echo "[FAILED] Server failed to start or respond within timeout" | |
| kill $SERVER_PID 2>/dev/null || true | |
| exit 1 | |
| - name: Test inference (Linux/macOS) | |
| if: runner.os != 'Windows' | |
| timeout-minutes: 5 | |
| run: | | |
| echo "Testing inference with ${{ matrix.binary-name }}..." | |
| # Start server with the format that worked in previous step | |
| ./llama/build/bin/${{ matrix.binary-name }} \ | |
| --model models/Lucy-Q4_0.gguf \ | |
| --server --port 8080 --host 127.0.0.1 \ | |
| --n-gpu-layers 0 \ | |
| --ctx-size 512 & | |
| SERVER_PID=$! | |
| # Wait for server to start | |
| for i in {1..30}; do | |
| if curl -s http://127.0.0.1:8080/health > /dev/null 2>&1; then | |
| break | |
| fi | |
| sleep 2 | |
| done | |
| # Test inference with shorter response | |
| curl -X POST http://127.0.0.1:8080/completion \ | |
| -H "Content-Type: application/json" \ | |
| -d '{ | |
| "prompt": "Hello", | |
| "n_predict": 5, | |
| "temperature": 0.1 | |
| }' > response.json | |
| # Check response | |
| if [ -s response.json ] && (grep -q "content" response.json || grep -q "choices" response.json || grep -q "text" response.json); then | |
| echo "[PASSED] Inference test passed" | |
| cat response.json | |
| kill $SERVER_PID 2>/dev/null || true | |
| exit 0 | |
| else | |
| echo "[FAILED] Inference test failed" | |
| echo "Response content:" | |
| cat response.json || echo "No response file" | |
| kill $SERVER_PID 2>/dev/null || true | |
| exit 1 | |
| fi | |
| - name: Test server startup (Windows) | |
| if: runner.os == 'Windows' | |
| timeout-minutes: 5 | |
| shell: pwsh | |
| run: | | |
| Write-Host "Testing ${{ matrix.binary-name }} server startup..." | |
| # Start server with CPU mode | |
| $process = Start-Process -FilePath ".\llama\build\bin\${{ matrix.binary-name }}" ` | |
| -ArgumentList "--model", "models\Lucy-Q4_0.gguf", "--server", "--port", "8080", "--host", "127.0.0.1", "--n-gpu-layers", "0", "--ctx-size", "512" ` | |
| -WindowStyle Hidden -PassThru | |
| Write-Host "Server PID: $($process.Id)" | |
| # Wait for server to start | |
| for ($i = 1; $i -le 30; $i++) { | |
| try { | |
| $response = Invoke-RestMethod -Uri "http://127.0.0.1:8080/health" -Method Get -TimeoutSec 2 | |
| Write-Host "[PASSED] Server started successfully and is responding" | |
| Stop-Process -Id $process.Id -Force -ErrorAction SilentlyContinue | |
| exit 0 | |
| } catch { | |
| Write-Host "Attempt $i/30 - waiting for server..." | |
| Start-Sleep -Seconds 2 | |
| } | |
| } | |
| Write-Host "[FAILED] Server failed to start or respond within timeout" | |
| Stop-Process -Id $process.Id -Force -ErrorAction SilentlyContinue | |
| exit 1 | |
| - name: Test inference (Windows) | |
| if: runner.os == 'Windows' | |
| timeout-minutes: 5 | |
| shell: pwsh | |
| run: | | |
| Write-Host "Testing inference with ${{ matrix.binary-name }}..." | |
| # Start server | |
| $process = Start-Process -FilePath ".\llama\build\bin\${{ matrix.binary-name }}" ` | |
| -ArgumentList "--model", "models\Lucy-Q4_0.gguf", "--server", "--port", "8080", "--host", "127.0.0.1", "--n-gpu-layers", "0", "--ctx-size", "512" ` | |
| -WindowStyle Hidden -PassThru | |
| # Wait for server to start | |
| for ($i = 1; $i -le 30; $i++) { | |
| try { | |
| Invoke-RestMethod -Uri "http://127.0.0.1:8080/health" -Method Get -TimeoutSec 2 | Out-Null | |
| break | |
| } catch { | |
| Start-Sleep -Seconds 2 | |
| } | |
| } | |
| # Test inference | |
| $body = @{ | |
| prompt = "Hello" | |
| n_predict = 5 | |
| temperature = 0.1 | |
| } | ConvertTo-Json | |
| try { | |
| $response = Invoke-RestMethod -Uri "http://127.0.0.1:8080/completion" -Method Post -Body $body -ContentType "application/json" | |
| Write-Host "[PASSED] Inference test passed" | |
| $response | ConvertTo-Json -Depth 10 | |
| Stop-Process -Id $process.Id -Force -ErrorAction SilentlyContinue | |
| exit 0 | |
| } catch { | |
| Write-Host "[FAILED] Inference test failed" | |
| Write-Host $_.Exception.Message | |
| Stop-Process -Id $process.Id -Force -ErrorAction SilentlyContinue | |
| exit 1 | |
| } | |
| - name: Upload test results | |
| uses: actions/upload-artifact@v4 | |
| if: always() | |
| with: | |
| name: test-results-${{ matrix.os }}-${{ matrix.name }}-${{ env.TEST_VERSION }} | |
| path: | | |
| response.json | |
| *.log | |
| retention-days: 1 | |
| test-summary: | |
| needs: test-binaries | |
| if: always() | |
| runs-on: ubuntu-latest | |
| steps: | |
| - name: Test Summary | |
| run: | | |
| echo "## CPU Binary Test Results for ${{ env.TEST_VERSION }}" >> $GITHUB_STEP_SUMMARY | |
| echo "Tested CPU-only builds to avoid GPU dependency issues" >> $GITHUB_STEP_SUMMARY | |
| echo "**Version tested:** ${{ env.TEST_VERSION }}" >> $GITHUB_STEP_SUMMARY | |
| echo "" >> $GITHUB_STEP_SUMMARY | |
| if [ "${{ needs.test-binaries.result }}" = "success" ]; then | |
| echo "### [PASSED] All CPU binary tests passed!" >> $GITHUB_STEP_SUMMARY | |
| echo "- All binaries start successfully" >> $GITHUB_STEP_SUMMARY | |
| echo "- Model loading works correctly" >> $GITHUB_STEP_SUMMARY | |
| echo "- Inference API responds properly" >> $GITHUB_STEP_SUMMARY | |
| else | |
| echo "### [FAILED] Some CPU binary tests failed" >> $GITHUB_STEP_SUMMARY | |
| echo "Check individual job logs for details." >> $GITHUB_STEP_SUMMARY | |
| fi | |
| echo "" >> $GITHUB_STEP_SUMMARY | |
| echo "**Note:** CUDA builds are excluded from testing due to lack of GPU hardware." >> $GITHUB_STEP_SUMMARY |