vllm/.github/workflows/macos-smoke-test.yml at main · Pradyun92/vllm · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
name: macOS Apple Silicon Smoke Test

on:
  push:
    branches:
      - main
  workflow_dispatch:  # Manual trigger

jobs:
  macos-m1-smoke-test:
    runs-on: macos-latest
    timeout-minutes: 20

    steps:
      - uses: actions/checkout@v4

      - uses: astral-sh/setup-uv@v7
        with:
          enable-cache: true
          cache-dependency-glob: |
            requirements/**/*.txt
            pyproject.toml
          python-version: '3.12'

      - name: Create virtual environment
        run: |
          uv venv
          echo "$GITHUB_WORKSPACE/.venv/bin" >> "$GITHUB_PATH"

      - name: Install dependencies and build vLLM
        run: |
          uv pip install -r requirements/cpu.txt --index-strategy unsafe-best-match
          uv pip install -e .
        env:
          CMAKE_BUILD_PARALLEL_LEVEL: 4

      - name: Verify installation
        run: |
          python -c "import vllm; print(f'vLLM version: {vllm.__version__}')"
          python -c "import torch; print(f'PyTorch: {torch.__version__}')"

      - name: Smoke test vllm serve
        timeout-minutes: 10
        run: |
          # Start server in background
          vllm serve Qwen/Qwen3-0.6B \
            --max-model-len=2048 \
            --load-format=dummy \
            --enforce-eager \
            --port 8000 &

          SERVER_PID=$!

          # Wait for server to start
          for i in {1..30}; do
            if curl -s http://localhost:8000/health > /dev/null; then
              echo "Server started successfully"
              break
            fi
            if [ "$i" -eq 30 ]; then
              echo "Server failed to start"
              kill "$SERVER_PID"
              exit 1
            fi
            sleep 2
          done

          # Test health endpoint
          curl -f http://localhost:8000/health

          # Test completion
          curl -f http://localhost:8000/v1/completions \
            -H "Content-Type: application/json" \
            -d '{
              "model": "Qwen/Qwen3-0.6B",
              "prompt": "Hello",
              "max_tokens": 5
            }'

          # Cleanup
          kill "$SERVER_PID"