fix: remove non-ASCII characters #1
Workflow file for this run
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| name: Test Binaries | ||
| on: | ||
| pull_request: | ||
| branches: | ||
| - dev | ||
| paths: | ||
| - '.github/workflows/test-binaries.yml' | ||
| - '.github/workflows/menlo-build.yml | ||
| workflow_dispatch: | ||
| inputs: | ||
| version: | ||
| description: 'Version to test (e.g., b5509, b5857)' | ||
| required: false | ||
| default: 'latest' | ||
| jobs: | ||
| test-binaries: | ||
| runs-on: ${{ matrix.runs-on }} | ||
| strategy: | ||
| fail-fast: false | ||
| matrix: | ||
| include: | ||
| - os: "linux" | ||
| name: "noavx-x64" | ||
| runs-on: "ubuntu-20.04" | ||
| binary-name: "llama-server" | ||
| artifact-name: "llama-linux-noavx-x64" | ||
| - os: "linux" | ||
| name: "avx-x64" | ||
| runs-on: "ubuntu-20.04" | ||
| binary-name: "llama-server" | ||
| artifact-name: "llama-linux-avx-x64" | ||
| - os: "linux" | ||
| name: "avx512-x64" | ||
| runs-on: "ubuntu-20.04" | ||
| binary-name: "llama-server" | ||
| artifact-name: "llama-linux-avx512-x64" | ||
| - os: "linux" | ||
| name: "vulkan-x64" | ||
| runs-on: "ubuntu-22.04" | ||
| binary-name: "llama-server" | ||
| artifact-name: "llama-linux-vulkan-x64" | ||
| - os: "macos" | ||
| name: "x64" | ||
| runs-on: "macos-selfhosted-12" | ||
| binary-name: "llama-server" | ||
| artifact-name: "llama-macos-x64" | ||
| - os: "macos" | ||
| name: "arm64" | ||
| runs-on: "macos-selfhosted-12-arm64" | ||
| binary-name: "llama-server" | ||
| artifact-name: "llama-macos-arm64" | ||
| - os: "win" | ||
| name: "avx2-x64" | ||
| runs-on: "windows-cuda-11-7" | ||
| binary-name: "llama-server.exe" | ||
| artifact-name: "llama-win-avx2-x64" | ||
| - os: "win" | ||
| name: "noavx-x64" | ||
| runs-on: "windows-cuda-11-7" | ||
| binary-name: "llama-server.exe" | ||
| artifact-name: "llama-win-noavx-x64" | ||
| - os: "win" | ||
| name: "avx-x64" | ||
| runs-on: "windows-cuda-12-0" | ||
| binary-name: "llama-server.exe" | ||
| artifact-name: "llama-win-avx-x64" | ||
| - os: "win" | ||
| name: "avx512-x64" | ||
| runs-on: "windows-cuda-12-0" | ||
| binary-name: "llama-server.exe" | ||
| artifact-name: "llama-win-avx512-x64" | ||
| - os: "win" | ||
| name: "vulkan-x64" | ||
| runs-on: "windows-cuda-11-7" | ||
| binary-name: "llama-server.exe" | ||
| artifact-name: "llama-win-vulkan-x64" | ||
| steps: | ||
| - name: Checkout | ||
| uses: actions/checkout@v3 | ||
| - name: Download latest artifacts | ||
| uses: actions/download-artifact@v4 | ||
| with: | ||
| name: ${{ matrix.artifact-name }} | ||
| path: ./artifacts | ||
| - name: Extract artifacts | ||
| run: | | ||
| # Find the tar.gz file and extract it | ||
| find ./artifacts -name "*.tar.gz" -exec tar -xzf {} \; | ||
| # Move the extracted directory to llama/ | ||
| find . -maxdepth 1 -type d -name "llama-*" -exec mv {} llama \; | ||
| - name: Make binary executable (Linux/macOS) | ||
| if: runner.os != 'Windows' | ||
| run: | | ||
| chmod +x ./llama/build/bin/${{ matrix.binary-name }} | ||
| - name: Download test model | ||
| run: | | ||
| mkdir -p models | ||
| curl -L -o models/Lucy-Q4_0.gguf "https://huggingface.co/Menlo/Lucy-gguf/resolve/main/Lucy-Q4_0.gguf" | ||
| - name: Test server startup (Linux/macOS) | ||
| if: runner.os != 'Windows' | ||
| run: | | ||
| echo "Testing ${{ matrix.binary-name }} startup..." | ||
| timeout 30s ./llama/build/bin/${{ matrix.binary-name }} --model models/Lucy-Q4_0.gguf --server --port 8080 --host 0.0.0.0 & | ||
| SERVER_PID=$! | ||
| # Wait a bit for server to start | ||
| sleep 10 | ||
| # Test if server is responding | ||
| if curl -s http://localhost:8080/health > /dev/null; then | ||
| echo "[PASSED] Server started successfully and is responding" | ||
| kill $SERVER_PID | ||
| exit 0 | ||
| else | ||
| echo "[FAILED] Server failed to start or respond" | ||
| kill $SERVER_PID 2>/dev/null || true | ||
| exit 1 | ||
| fi | ||
| - name: Test inference (Linux/macOS) | ||
| if: runner.os != 'Windows' | ||
| run: | | ||
| echo "Testing inference with ${{ matrix.binary-name }}..." | ||
| # Start server | ||
| ./llama/build/bin/${{ matrix.binary-name }} --model models/Lucy-Q4_0.gguf --server --port 8080 --host 0.0.0.0 & | ||
| SERVER_PID=$! | ||
| # Wait for server to start | ||
| sleep 15 | ||
| # Test inference | ||
| curl -X POST http://localhost:8080/completion \ | ||
| -H "Content-Type: application/json" \ | ||
| -d '{ | ||
| "prompt": "Hello, how are you?", | ||
| "n_predict": 10, | ||
| "temperature": 0.7, | ||
| "stop": ["\n", "User:", "Assistant:"] | ||
| }' > response.json | ||
| # Check if we got a valid response | ||
| if [ -s response.json ] && grep -q "content" response.json; then | ||
| echo "[PASSED] Inference test passed" | ||
| cat response.json | ||
| kill $SERVER_PID | ||
| exit 0 | ||
| else | ||
| echo "[FAILED] Inference test failed" | ||
| cat response.json | ||
| kill $SERVER_PID 2>/dev/null || true | ||
| exit 1 | ||
| fi | ||
| - name: Test server startup (Windows) | ||
| if: runner.os == 'Windows' | ||
| shell: pwsh | ||
| run: | | ||
| Write-Host "Testing ${{ matrix.binary-name }} startup..." | ||
| # Start server in background | ||
| Start-Process -FilePath ".\llama\build\bin\${{ matrix.binary-name }}" -ArgumentList "--model", "models\Lucy-Q4_0.gguf", "--server", "--port", "8080", "--host", "0.0.0.0" -WindowStyle Hidden | ||
| # Wait for server to start | ||
| Start-Sleep -Seconds 10 | ||
| # Test if server is responding | ||
| try { | ||
| $response = Invoke-RestMethod -Uri "http://localhost:8080/health" -Method Get | ||
| Write-Host "[PASSED] Server started successfully and is responding" | ||
| Stop-Process -Name "llama-server" -Force -ErrorAction SilentlyContinue | ||
| exit 0 | ||
| } catch { | ||
| Write-Host "[FAILED] Server failed to start or respond" | ||
| Stop-Process -Name "llama-server" -Force -ErrorAction SilentlyContinue | ||
| exit 1 | ||
| } | ||
| - name: Test inference (Windows) | ||
| if: runner.os == 'Windows' | ||
| shell: pwsh | ||
| run: | | ||
| Write-Host "Testing inference with ${{ matrix.binary-name }}..." | ||
| # Start server in background | ||
| Start-Process -FilePath ".\llama\build\bin\${{ matrix.binary-name }}" -ArgumentList "--model", "models\Lucy-Q4_0.gguf", "--server", "--port", "8080", "--host", "0.0.0.0" -WindowStyle Hidden | ||
| # Wait for server to start | ||
| Start-Sleep -Seconds 15 | ||
| # Test inference | ||
| $body = @{ | ||
| prompt = "Hello, how are you?" | ||
| n_predict = 10 | ||
| temperature = 0.7 | ||
| stop = @("\n", "User:", "Assistant:") | ||
| } | ConvertTo-Json | ||
| try { | ||
| $response = Invoke-RestMethod -Uri "http://localhost:8080/completion" -Method Post -Body $body -ContentType "application/json" | ||
| Write-Host "[PASSED] Inference test passed" | ||
| $response | ConvertTo-Json -Depth 10 | ||
| Stop-Process -Name "llama-server" -Force -ErrorAction SilentlyContinue | ||
| exit 0 | ||
| } catch { | ||
| Write-Host "[FAILED] Inference test failed" | ||
| Write-Host $_.Exception.Message | ||
| Stop-Process -Name "llama-server" -Force -ErrorAction SilentlyContinue | ||
| exit 1 | ||
| } | ||
| - name: Upload test results | ||
| uses: actions/upload-artifact@v4 | ||
| if: always() | ||
| with: | ||
| name: test-results-${{ matrix.os }}-${{ matrix.name }} | ||
| path: | | ||
| response.json | ||
| *.log | ||
| retention-days: 1 | ||