ci: add test binaries workflow #2
Workflow file for this run
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| name: Test Binaries | |
| on: | |
| pull_request: | |
| branches: | |
| - dev | |
| paths: | |
| - '.github/workflows/test-binaries.yml' | |
| - '.github/workflows/menlo-build.yml' | |
| workflow_dispatch: | |
| inputs: | |
| version: | |
| description: 'Version to test' | |
| required: false | |
| default: 'b5857' | |
| env: | |
| TEST_VERSION: 'b5857' | |
| jobs: | |
| test-binaries: | |
| runs-on: ${{ matrix.runs-on }} | |
| timeout-minutes: 30 | |
| strategy: | |
| fail-fast: false | |
| matrix: | |
| include: | |
| - os: "linux" | |
| name: "noavx-x64" | |
| runs-on: "ubuntu-20.04" | |
| binary-name: "llama-server" | |
| artifact-name: "llama-linux-noavx-x64" | |
| - os: "linux" | |
| name: "avx-x64" | |
| runs-on: "ubuntu-20.04" | |
| binary-name: "llama-server" | |
| artifact-name: "llama-linux-avx-x64" | |
| - os: "linux" | |
| name: "avx512-x64" | |
| runs-on: "ubuntu-20.04" | |
| binary-name: "llama-server" | |
| artifact-name: "llama-linux-avx512-x64" | |
| - os: "linux" | |
| name: "vulkan-x64" | |
| runs-on: "ubuntu-22.04" | |
| binary-name: "llama-server" | |
| artifact-name: "llama-linux-vulkan-x64" | |
| - os: "macos" | |
| name: "x64" | |
| runs-on: "macos-selfhosted-12" | |
| binary-name: "llama-server" | |
| artifact-name: "llama-macos-x64" | |
| - os: "macos" | |
| name: "arm64" | |
| runs-on: "macos-selfhosted-12-arm64" | |
| binary-name: "llama-server" | |
| artifact-name: "llama-macos-arm64" | |
| - os: "win" | |
| name: "noavx-x64" | |
| runs-on: "windows-latest" | |
| binary-name: "llama-server.exe" | |
| artifact-name: "llama-win-noavx-x64" | |
| - os: "win" | |
| name: "avx-x64" | |
| runs-on: "windows-latest" | |
| binary-name: "llama-server.exe" | |
| artifact-name: "llama-win-avx-x64" | |
| - os: "win" | |
| name: "avx2-x64" | |
| runs-on: "windows-latest" | |
| binary-name: "llama-server.exe" | |
| artifact-name: "llama-win-avx2-x64" | |
| - os: "win" | |
| name: "avx512-x64" | |
| runs-on: "windows-latest" | |
| binary-name: "llama-server.exe" | |
| artifact-name: "llama-win-avx512-x64" | |
| - os: "win" | |
| name: "vulkan-x64" | |
| runs-on: "windows-latest" | |
| binary-name: "llama-server.exe" | |
| artifact-name: "llama-win-vulkan-x64" | |
| steps: | |
| - name: Checkout | |
| uses: actions/checkout@v3 | |
| - name: Show testing version | |
| run: | | |
| echo "Testing hardcoded version: ${{ env.TEST_VERSION }}" | |
| echo "This will download binaries from release: ${{ env.TEST_VERSION }}" | |
| - name: Download release binaries | |
| env: | |
| GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} | |
| run: | | |
| # Download the specific release binary for this matrix combination | |
| RELEASE_TAG="${{ env.TEST_VERSION }}" | |
| ASSET_NAME="llama-${RELEASE_TAG}-bin-${{ matrix.os }}-${{ matrix.name }}.tar.gz" | |
| echo "Downloading asset: $ASSET_NAME" | |
| # Get download URL for the asset | |
| DOWNLOAD_URL=$(curl -s -H "Authorization: token $GITHUB_TOKEN" \ | |
| "https://api.github.com/repos/${{ github.repository }}/releases/tags/$RELEASE_TAG" | \ | |
| jq -r --arg asset_name "$ASSET_NAME" '.assets[] | select(.name == $asset_name) | .browser_download_url') | |
| if [ "$DOWNLOAD_URL" = "null" ] || [ -z "$DOWNLOAD_URL" ]; then | |
| echo "Asset $ASSET_NAME not found in release $RELEASE_TAG" | |
| echo "Available assets:" | |
| curl -s -H "Authorization: token $GITHUB_TOKEN" \ | |
| "https://api.github.com/repos/${{ github.repository }}/releases/tags/$RELEASE_TAG" | \ | |
| jq -r '.assets[].name' | |
| exit 1 | |
| fi | |
| # Download the binary | |
| mkdir -p artifacts | |
| curl -L -H "Authorization: token $GITHUB_TOKEN" \ | |
| -o "artifacts/binary.tar.gz" \ | |
| "$DOWNLOAD_URL" | |
| echo "Downloaded binary successfully" | |
| ls -la artifacts/ | |
| - name: Extract artifacts (Linux/macOS) | |
| if: runner.os != 'Windows' | |
| run: | | |
| echo "Extracting binary for ${{ matrix.artifact-name }}..." | |
| cd artifacts | |
| tar -xzf binary.tar.gz | |
| cd .. | |
| # List what we extracted | |
| ls -la ./ | |
| find . -name "*llama*" -type d | |
| # Find and create standardized structure | |
| if [ -d "llama" ]; then | |
| echo "Found llama directory" | |
| else | |
| # Move extracted directory to llama/ | |
| find . -maxdepth 2 -type d -name "*llama*" -exec mv {} llama \; || true | |
| # Alternative: if extraction creates different structure | |
| if [ ! -d "llama" ]; then | |
| mkdir -p llama | |
| find artifacts/ -name "*.tar.gz" -exec tar -xzf {} -C llama \; | |
| fi | |
| fi | |
| # Verify binary location | |
| find . -name "${{ matrix.binary-name }}" -type f | head -5 | |
| - name: Extract artifacts (Windows) | |
| if: runner.os == 'Windows' | |
| shell: pwsh | |
| run: | | |
| Write-Host "Extracting binary for ${{ matrix.artifact-name }}..." | |
| # Extract using tar | |
| Set-Location artifacts | |
| tar -xzf binary.tar.gz | |
| Set-Location .. | |
| # List what we have | |
| Get-ChildItem -Recurse | Where-Object {$_.Name -like "*llama*"} | Select-Object -First 10 | |
| # Find the binary | |
| $binaryPath = Get-ChildItem -Recurse -Name "${{ matrix.binary-name }}" | Select-Object -First 1 | |
| if ($binaryPath) { | |
| Write-Host "Found binary at: $binaryPath" | |
| } else { | |
| Write-Host "Binary not found, listing all .exe files:" | |
| Get-ChildItem -Recurse -Filter "*.exe" | Select-Object -First 5 | |
| } | |
| - name: Make binary executable (Linux/macOS) | |
| if: runner.os != 'Windows' | |
| run: | | |
| # Find the actual binary location | |
| BINARY_PATH=$(find . -name "${{ matrix.binary-name }}" -type f | head -1) | |
| if [ -n "$BINARY_PATH" ]; then | |
| chmod +x "$BINARY_PATH" | |
| echo "Made executable: $BINARY_PATH" | |
| # Create symlink for consistent path | |
| mkdir -p llama/build/bin | |
| ln -sf "$(realpath $BINARY_PATH)" llama/build/bin/${{ matrix.binary-name }} | |
| else | |
| echo "Binary not found!" | |
| echo "Available files:" | |
| find . -type f -name "*server*" | head -10 | |
| exit 1 | |
| fi | |
| - name: Setup binary path (Windows) | |
| if: runner.os == 'Windows' | |
| shell: pwsh | |
| run: | | |
| # Find the actual binary | |
| $binaryPath = Get-ChildItem -Recurse -Name "${{ matrix.binary-name }}" | Select-Object -First 1 | |
| if ($binaryPath) { | |
| # Create standardized directory structure | |
| New-Item -ItemType Directory -Force -Path "llama\build\bin" | |
| Copy-Item $binaryPath "llama\build\bin\${{ matrix.binary-name }}" | |
| Write-Host "Binary copied to: llama\build\bin\${{ matrix.binary-name }}" | |
| } else { | |
| Write-Host "Binary not found!" | |
| Write-Host "Available files:" | |
| Get-ChildItem -Recurse -Filter "*server*" | Select-Object -First 10 | |
| exit 1 | |
| } | |
| - name: Download test model | |
| run: | | |
| mkdir -p models | |
| curl -L -o models/Lucy-Q4_0.gguf "https://huggingface.co/Menlo/Lucy-gguf/resolve/main/Lucy-Q4_0.gguf" | |
| - name: Verify binary (Linux/macOS) | |
| if: runner.os != 'Windows' | |
| run: | | |
| echo "Testing binary basic functionality..." | |
| ./llama/build/bin/${{ matrix.binary-name }} --version || echo "Version check completed" | |
| ./llama/build/bin/${{ matrix.binary-name }} --help || echo "Help check completed" | |
| - name: Verify binary (Windows) | |
| if: runner.os == 'Windows' | |
| shell: pwsh | |
| run: | | |
| Write-Host "Testing binary basic functionality..." | |
| try { | |
| .\llama\build\bin\${{ matrix.binary-name }} --version | |
| } catch { | |
| Write-Host "Version check completed with exit code: $LASTEXITCODE" | |
| } | |
| try { | |
| .\llama\build\bin\${{ matrix.binary-name }} --help | |
| } catch { | |
| Write-Host "Help check completed with exit code: $LASTEXITCODE" | |
| } | |
| - name: Test server startup (Linux/macOS) | |
| if: runner.os != 'Windows' | |
| timeout-minutes: 5 | |
| run: | | |
| echo "Testing ${{ matrix.binary-name }} server startup..." | |
| # Force CPU mode for all builds | |
| ./llama/build/bin/${{ matrix.binary-name }} \ | |
| --model models/Lucy-Q4_0.gguf \ | |
| --server --port 8080 --host 127.0.0.1 \ | |
| --n-gpu-layers 0 \ | |
| --ctx-size 512 & | |
| SERVER_PID=$! | |
| echo "Server PID: $SERVER_PID" | |
| # Wait for server to start with better error handling | |
| for i in {1..30}; do | |
| if curl -s http://127.0.0.1:8080/health > /dev/null 2>&1; then | |
| echo "[PASSED] Server started successfully and is responding" | |
| kill $SERVER_PID 2>/dev/null || true | |
| exit 0 | |
| fi | |
| echo "Attempt $i/30 - waiting for server..." | |
| sleep 2 | |
| done | |
| echo "[FAILED] Server failed to start or respond within timeout" | |
| kill $SERVER_PID 2>/dev/null || true | |
| exit 1 | |
| - name: Test inference (Linux/macOS) | |
| if: runner.os != 'Windows' | |
| timeout-minutes: 5 | |
| run: | | |
| echo "Testing inference with ${{ matrix.binary-name }}..." | |
| # Start server | |
| ./llama/build/bin/${{ matrix.binary-name }} \ | |
| --model models/Lucy-Q4_0.gguf \ | |
| --server --port 8080 --host 127.0.0.1 \ | |
| --n-gpu-layers 0 \ | |
| --ctx-size 512 & | |
| SERVER_PID=$! | |
| # Wait for server to start | |
| for i in {1..30}; do | |
| if curl -s http://127.0.0.1:8080/health > /dev/null 2>&1; then | |
| break | |
| fi | |
| sleep 2 | |
| done | |
| # Test inference with shorter response | |
| curl -X POST http://127.0.0.1:8080/completion \ | |
| -H "Content-Type: application/json" \ | |
| -d '{ | |
| "prompt": "Hello", | |
| "n_predict": 5, | |
| "temperature": 0.1 | |
| }' > response.json | |
| # Check response | |
| if [ -s response.json ] && (grep -q "content" response.json || grep -q "choices" response.json); then | |
| echo "[PASSED] Inference test passed" | |
| cat response.json | |
| kill $SERVER_PID 2>/dev/null || true | |
| exit 0 | |
| else | |
| echo "[FAILED] Inference test failed" | |
| echo "Response content:" | |
| cat response.json || echo "No response file" | |
| kill $SERVER_PID 2>/dev/null || true | |
| exit 1 | |
| fi | |
| - name: Test server startup (Windows) | |
| if: runner.os == 'Windows' | |
| timeout-minutes: 5 | |
| shell: pwsh | |
| run: | | |
| Write-Host "Testing ${{ matrix.binary-name }} server startup..." | |
| # Start server with CPU mode | |
| $process = Start-Process -FilePath ".\llama\build\bin\${{ matrix.binary-name }}" ` | |
| -ArgumentList "--model", "models\Lucy-Q4_0.gguf", "--server", "--port", "8080", "--host", "127.0.0.1", "--n-gpu-layers", "0", "--ctx-size", "512" ` | |
| -WindowStyle Hidden -PassThru | |
| Write-Host "Server PID: $($process.Id)" | |
| # Wait for server to start | |
| for ($i = 1; $i -le 30; $i++) { | |
| try { | |
| $response = Invoke-RestMethod -Uri "http://127.0.0.1:8080/health" -Method Get -TimeoutSec 2 | |
| Write-Host "[PASSED] Server started successfully and is responding" | |
| Stop-Process -Id $process.Id -Force -ErrorAction SilentlyContinue | |
| exit 0 | |
| } catch { | |
| Write-Host "Attempt $i/30 - waiting for server..." | |
| Start-Sleep -Seconds 2 | |
| } | |
| } | |
| Write-Host "[FAILED] Server failed to start or respond within timeout" | |
| Stop-Process -Id $process.Id -Force -ErrorAction SilentlyContinue | |
| exit 1 | |
| - name: Test inference (Windows) | |
| if: runner.os == 'Windows' | |
| timeout-minutes: 5 | |
| shell: pwsh | |
| run: | | |
| Write-Host "Testing inference with ${{ matrix.binary-name }}..." | |
| # Start server | |
| $process = Start-Process -FilePath ".\llama\build\bin\${{ matrix.binary-name }}" ` | |
| -ArgumentList "--model", "models\Lucy-Q4_0.gguf", "--server", "--port", "8080", "--host", "127.0.0.1", "--n-gpu-layers", "0", "--ctx-size", "512" ` | |
| -WindowStyle Hidden -PassThru | |
| # Wait for server to start | |
| for ($i = 1; $i -le 30; $i++) { | |
| try { | |
| Invoke-RestMethod -Uri "http://127.0.0.1:8080/health" -Method Get -TimeoutSec 2 | Out-Null | |
| break | |
| } catch { | |
| Start-Sleep -Seconds 2 | |
| } | |
| } | |
| # Test inference | |
| $body = @{ | |
| prompt = "Hello" | |
| n_predict = 5 | |
| temperature = 0.1 | |
| } | ConvertTo-Json | |
| try { | |
| $response = Invoke-RestMethod -Uri "http://127.0.0.1:8080/completion" -Method Post -Body $body -ContentType "application/json" | |
| Write-Host "[PASSED] Inference test passed" | |
| $response | ConvertTo-Json -Depth 10 | |
| Stop-Process -Id $process.Id -Force -ErrorAction SilentlyContinue | |
| exit 0 | |
| } catch { | |
| Write-Host "[FAILED] Inference test failed" | |
| Write-Host $_.Exception.Message | |
| Stop-Process -Id $process.Id -Force -ErrorAction SilentlyContinue | |
| exit 1 | |
| } | |
| - name: Upload test results | |
| uses: actions/upload-artifact@v4 | |
| if: always() | |
| with: | |
| name: test-results-${{ matrix.os }}-${{ matrix.name }}-${{ env.TEST_VERSION }} | |
| path: | | |
| response.json | |
| *.log | |
| retention-days: 1 | |
| test-summary: | |
| needs: test-binaries | |
| if: always() | |
| runs-on: ubuntu-latest | |
| steps: | |
| - name: Test Summary | |
| run: | | |
| echo "## CPU Binary Test Results for ${{ env.TEST_VERSION }}" >> $GITHUB_STEP_SUMMARY | |
| echo "Tested CPU-only builds to avoid GPU dependency issues" >> $GITHUB_STEP_SUMMARY | |
| echo "**Version tested:** ${{ env.TEST_VERSION }}" >> $GITHUB_STEP_SUMMARY | |
| echo "" >> $GITHUB_STEP_SUMMARY | |
| if [ "${{ needs.test-binaries.result }}" = "success" ]; then | |
| echo "### [PASSED] All CPU binary tests passed!" >> $GITHUB_STEP_SUMMARY | |
| echo "- All binaries start successfully" >> $GITHUB_STEP_SUMMARY | |
| echo "- Model loading works correctly" >> $GITHUB_STEP_SUMMARY | |
| echo "- Inference API responds properly" >> $GITHUB_STEP_SUMMARY | |
| else | |
| echo "### [FAILED] Some CPU binary tests failed" >> $GITHUB_STEP_SUMMARY | |
| echo "Check individual job logs for details." >> $GITHUB_STEP_SUMMARY | |
| fi | |
| echo "" >> $GITHUB_STEP_SUMMARY | |
| echo "**Note:** CUDA builds are excluded from testing due to lack of GPU hardware." >> $GITHUB_STEP_SUMMARY |