aiter/.github/workflows/vllm_benchmark.yaml at d0c313d78eb04b495f6d126a281fe9e29a8d2d89 · EmbeddedLLM/aiter · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
name: vLLM Benchmark

on:
  push:
    branches: [main]
  pull_request:
    branches: [main]
  workflow_dispatch:

concurrency:
  group: ${{ github.workflow }}-${{ github.ref }}
  cancel-in-progress: ${{ github.ref != 'refs/heads/main' }}

env:
  VLLM_BRANCH: "main"
  VLLM_REPOSITORY_URL: "https://github.com/vllm-project/vllm"
  BASE_IMAGE: rocm/vllm-dev:nightly
  GITHUB_REPO_URL: ${{ github.event.pull_request.head.repo.clone_url || 'https://github.com/ROCm/aiter.git' }}
  GITHUB_COMMIT_SHA: ${{ github.event.pull_request.head.sha || github.event.head_commit.id }}

jobs:
  check-signal:
    runs-on: ubuntu-latest
    steps:
      - name: Checkout code
        uses: actions/checkout@v4

      - name: Download and check signal artifact
        run: ./.github/scripts/check_signal.sh
        env:
          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
          GITHUB_SHA: ${{ github.sha }}

  build_vllm_image:
    if: ${{ !github.event.pull_request.head.repo.fork }}
    needs: [check-signal]
    runs-on: aiter-k8s-build

    steps:
      - name: Checkout aiter repo
        uses: actions/checkout@v4

      - name: Sync submodules
        run: |
          set -e
          git submodule sync
          git submodule update --init --recursive --depth 1 --jobs 4

      - name: Docker login
        run: docker login -u rocmshared -p ${{ secrets.DOCKER_PASSWORD }}

      - name: Download the vLLM base image
        run: |
          docker pull ${{ env.BASE_IMAGE }}

      - name: Generate Dockerfile
        run: |
          cat <<EOF > Dockerfile.mod
          FROM ${{ env.BASE_IMAGE }}

          RUN echo "=== Aiter version BEFORE uninstall ===" && pip show aiter || true
          RUN pip uninstall -y aiter
          RUN pip config set global.default-timeout 60 \
              && pip config set global.retries 10
          RUN pip config set global.index-url https://ausartifactory.amd.com/artifactory/api/pypi/hw-cpe-prod-remote/simple
          RUN pip install --upgrade "pybind11>=3.0.1"
          RUN pip show pybind11

          RUN git clone ${{ env.GITHUB_REPO_URL}} /aiter && \\
              cd /aiter && \\
              git checkout ${{ env.GITHUB_COMMIT_SHA }} && \\
              git submodule sync && git submodule update --init --recursive && \\
              python3 setup.py develop

          RUN echo "=== Aiter version AFTER installation ===" && pip show aiter || true
          EOF

      - name: Show Dockerfile
        run: cat Dockerfile.mod

      - name: Build Docker image
        run: |
          IMAGE_TAG=rocm/aiter-ci:${{ env.GITHUB_COMMIT_SHA }}
          docker build --network=host --no-cache -t $IMAGE_TAG -f Dockerfile.mod .

      - name: Push Docker image
        run: |
          IMAGE_TAG=rocm/aiter-ci:${{ env.GITHUB_COMMIT_SHA }}
          docker push $IMAGE_TAG

      - name: Success message
        run: |
          echo "Successfully prepared image: rocm/aiter-ci:${{ env.GITHUB_COMMIT_SHA }}"

  vllm_benchmark:
    if: ${{ !github.event.pull_request.head.repo.fork }}
    runs-on: aiter-8gpu-runner
    needs: build_vllm_image
    strategy:
      fail-fast: false
      matrix:
        model:
          - 'mistralai/Mixtral-8x7B-Instruct-v0.1'
          - 'deepseek-ai/DeepSeek-R1'
        kv_cache_dtype:
          - 'default_kvcache'
          - 'fp8_kvcache'
        exclude:
          - model: 'deepseek-ai/DeepSeek-R1'
            kv_cache_dtype: 'fp8_kvcache'

    steps:
      - name: Docker login
        run: docker login -u rocmshared -p ${{ secrets.DOCKER_PASSWORD }}

      - name: Download the vLLM image
        run: |
          docker pull rocm/aiter-ci:${{ env.GITHUB_COMMIT_SHA }}

      - name: Run benchmarks
        run: |
          set -x -o pipefail
          echo "Starting benchmark for model: ${{ matrix.model }} with kv_cache_dtype: ${{ matrix.kv_cache_dtype }}"

          logFile="result_$(echo '${{ matrix.model }}' | sed 's/\//_/g')_kv_${{ matrix.kv_cache_dtype }}.log"

          if [[ "${{ matrix.model }}" == *DeepSeek* ]]; then
              extraArgs="--block-size 1"
          else
              extraArgs=""
          fi

          if [[ "${{ matrix.kv_cache_dtype }}" == "fp8_kvcache" ]]; then
              extraArgs="${extraArgs} --kv-cache-dtype fp8"
          fi

          docker run --rm --device=/dev/kfd --device=/dev/dri --group-add video \
              --ulimit core=0:0 --ulimit memlock=-1:-1 --ulimit stack=67108864 --cap-add=SYS_PTRACE \
              --network=host --security-opt seccomp=unconfined --shm-size=16G \
              -e HF_TOKEN=${{ secrets.HF_TOKEN_TEST }} -e VLLM_ROCM_USE_AITER=1 \
              rocm/aiter-ci:${{ env.GITHUB_COMMIT_SHA }} python -m vllm.entrypoints.cli.main bench latency \
              --model "${{ matrix.model }}" \
              --batch-size 123 --input-len 456 --output-len 78 \
              --num-iters-warmup 3 --num-iters 10 \
              -tp 8 --load-format dummy ${extraArgs} |& tee ${logFile}
          grep "Avg latency:" ${logFile} | awk '{print $3}'

      - name: Clean up
        if: always()
        run: |
          docker stop aiter-ci:${{ env.GITHUB_COMMIT_SHA }} || true
          docker rm -f aiter-ci:${{ env.GITHUB_COMMIT_SHA }} || true
          docker rmi rocm/aiter-ci:${{ env.GITHUB_COMMIT_SHA }} || true

  skip-job:
    if: ${{ github.event.pull_request.head.repo.fork }}
    runs-on: ubuntu-latest
    steps:
      - name: Skip reason
        run: echo "It's a fork repository, skipping tests."