forked from ROCm/aiter
-
Notifications
You must be signed in to change notification settings - Fork 0
160 lines (132 loc) · 5.38 KB
/
vllm_benchmark.yaml
File metadata and controls
160 lines (132 loc) · 5.38 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
name: vLLM Benchmark
on:
push:
branches: [main]
pull_request:
branches: [main]
workflow_dispatch:
concurrency:
group: ${{ github.workflow }}-${{ github.ref }}
cancel-in-progress: ${{ github.ref != 'refs/heads/main' }}
env:
VLLM_BRANCH: "main"
VLLM_REPOSITORY_URL: "https://github.com/vllm-project/vllm"
BASE_IMAGE: rocm/vllm-dev:nightly
GITHUB_REPO_URL: ${{ github.event.pull_request.head.repo.clone_url || 'https://github.com/ROCm/aiter.git' }}
GITHUB_COMMIT_SHA: ${{ github.event.pull_request.head.sha || github.event.head_commit.id }}
jobs:
check-signal:
runs-on: ubuntu-latest
steps:
- name: Checkout code
uses: actions/checkout@v4
- name: Download and check signal artifact
run: ./.github/scripts/check_signal.sh
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
GITHUB_SHA: ${{ github.sha }}
build_vllm_image:
if: ${{ !github.event.pull_request.head.repo.fork }}
needs: [check-signal]
runs-on: aiter-k8s-build
steps:
- name: Checkout aiter repo
uses: actions/checkout@v4
- name: Sync submodules
run: |
set -e
git submodule sync
git submodule update --init --recursive --depth 1 --jobs 4
- name: Docker login
run: docker login -u rocmshared -p ${{ secrets.DOCKER_PASSWORD }}
- name: Download the vLLM base image
run: |
docker pull ${{ env.BASE_IMAGE }}
- name: Generate Dockerfile
run: |
cat <<EOF > Dockerfile.mod
FROM ${{ env.BASE_IMAGE }}
RUN echo "=== Aiter version BEFORE uninstall ===" && pip show aiter || true
RUN pip uninstall -y aiter
RUN pip config set global.default-timeout 60 \
&& pip config set global.retries 10
RUN pip config set global.index-url https://ausartifactory.amd.com/artifactory/api/pypi/hw-cpe-prod-remote/simple
RUN pip install --upgrade "pybind11>=3.0.1"
RUN pip show pybind11
RUN git clone ${{ env.GITHUB_REPO_URL}} /aiter && \\
cd /aiter && \\
git checkout ${{ env.GITHUB_COMMIT_SHA }} && \\
git submodule sync && git submodule update --init --recursive && \\
python3 setup.py develop
RUN echo "=== Aiter version AFTER installation ===" && pip show aiter || true
EOF
- name: Show Dockerfile
run: cat Dockerfile.mod
- name: Build Docker image
run: |
IMAGE_TAG=rocm/aiter-ci:${{ env.GITHUB_COMMIT_SHA }}
docker build --network=host --no-cache -t $IMAGE_TAG -f Dockerfile.mod .
- name: Push Docker image
run: |
IMAGE_TAG=rocm/aiter-ci:${{ env.GITHUB_COMMIT_SHA }}
docker push $IMAGE_TAG
- name: Success message
run: |
echo "Successfully prepared image: rocm/aiter-ci:${{ env.GITHUB_COMMIT_SHA }}"
vllm_benchmark:
if: ${{ !github.event.pull_request.head.repo.fork }}
runs-on: aiter-8gpu-runner
needs: build_vllm_image
strategy:
fail-fast: false
matrix:
model:
- 'mistralai/Mixtral-8x7B-Instruct-v0.1'
- 'deepseek-ai/DeepSeek-R1'
kv_cache_dtype:
- 'default_kvcache'
- 'fp8_kvcache'
exclude:
- model: 'deepseek-ai/DeepSeek-R1'
kv_cache_dtype: 'fp8_kvcache'
steps:
- name: Docker login
run: docker login -u rocmshared -p ${{ secrets.DOCKER_PASSWORD }}
- name: Download the vLLM image
run: |
docker pull rocm/aiter-ci:${{ env.GITHUB_COMMIT_SHA }}
- name: Run benchmarks
run: |
set -x -o pipefail
echo "Starting benchmark for model: ${{ matrix.model }} with kv_cache_dtype: ${{ matrix.kv_cache_dtype }}"
logFile="result_$(echo '${{ matrix.model }}' | sed 's/\//_/g')_kv_${{ matrix.kv_cache_dtype }}.log"
if [[ "${{ matrix.model }}" == *DeepSeek* ]]; then
extraArgs="--block-size 1"
else
extraArgs=""
fi
if [[ "${{ matrix.kv_cache_dtype }}" == "fp8_kvcache" ]]; then
extraArgs="${extraArgs} --kv-cache-dtype fp8"
fi
docker run --rm --device=/dev/kfd --device=/dev/dri --group-add video \
--ulimit core=0:0 --ulimit memlock=-1:-1 --ulimit stack=67108864 --cap-add=SYS_PTRACE \
--network=host --security-opt seccomp=unconfined --shm-size=16G \
-e HF_TOKEN=${{ secrets.HF_TOKEN_TEST }} -e VLLM_ROCM_USE_AITER=1 \
rocm/aiter-ci:${{ env.GITHUB_COMMIT_SHA }} python -m vllm.entrypoints.cli.main bench latency \
--model "${{ matrix.model }}" \
--batch-size 123 --input-len 456 --output-len 78 \
--num-iters-warmup 3 --num-iters 10 \
-tp 8 --load-format dummy ${extraArgs} |& tee ${logFile}
grep "Avg latency:" ${logFile} | awk '{print $3}'
- name: Clean up
if: always()
run: |
docker stop aiter-ci:${{ env.GITHUB_COMMIT_SHA }} || true
docker rm -f aiter-ci:${{ env.GITHUB_COMMIT_SHA }} || true
docker rmi rocm/aiter-ci:${{ env.GITHUB_COMMIT_SHA }} || true
skip-job:
if: ${{ github.event.pull_request.head.repo.fork }}
runs-on: ubuntu-latest
steps:
- name: Skip reason
run: echo "It's a fork repository, skipping tests."