Skip to content

Commit 0b07e84

Browse files
authored
Merge branch 'main' into enable_bitsandbytes_quant_rocm
2 parents 9b3214a + be263f7 commit 0b07e84

File tree

606 files changed

+30684
-12432
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

606 files changed

+30684
-12432
lines changed

.buildkite/release-pipeline.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -132,7 +132,7 @@ steps:
132132
queue: cpu_queue_postmerge
133133
commands:
134134
- "aws ecr-public get-login-password --region us-east-1 | docker login --username AWS --password-stdin public.ecr.aws/q9t5s3a7"
135-
- "DOCKER_BUILDKIT=1 docker build --build-arg max_jobs=16 --build-arg GIT_REPO_CHECK=1 --build-arg VLLM_CPU_AVX512BF16=true --build-arg VLLM_CPU_AVX512VNNI=true --tag public.ecr.aws/q9t5s3a7/vllm-cpu-release-repo:$(buildkite-agent meta-data get release-version) --tag public.ecr.aws/q9t5s3a7/vllm-cpu-release-repo:latest --progress plain --target vllm-openai -f docker/Dockerfile.cpu ."
135+
- "DOCKER_BUILDKIT=1 docker build --build-arg max_jobs=16 --build-arg GIT_REPO_CHECK=1 --build-arg VLLM_CPU_AVX512BF16=true --build-arg VLLM_CPU_AVX512VNNI=true --build-arg VLLM_CPU_AMXBF16=true --tag public.ecr.aws/q9t5s3a7/vllm-cpu-release-repo:$(buildkite-agent meta-data get release-version) --tag public.ecr.aws/q9t5s3a7/vllm-cpu-release-repo:latest --progress plain --target vllm-openai -f docker/Dockerfile.cpu ."
136136
- "docker push public.ecr.aws/q9t5s3a7/vllm-cpu-release-repo:latest"
137137
- "docker push public.ecr.aws/q9t5s3a7/vllm-cpu-release-repo:$(buildkite-agent meta-data get release-version)"
138138
env:

.buildkite/scripts/hardware_ci/run-amd-test.sh

Lines changed: 7 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -59,7 +59,7 @@ while true; do
5959
fi
6060
done
6161

62-
echo "--- Pulling container"
62+
echo "--- Pulling container"
6363
image_name="rocm/vllm-ci:${BUILDKITE_COMMIT}"
6464
container_name="rocm_${BUILDKITE_COMMIT}_$(tr -dc A-Za-z0-9 < /dev/urandom | head -c 10; echo)"
6565
docker pull "${image_name}"
@@ -78,17 +78,13 @@ HF_MOUNT="/root/.cache/huggingface"
7878
commands=$@
7979
echo "Commands:$commands"
8080

81-
if [[ $commands == *"pytest -v -s basic_correctness/test_basic_correctness.py"* ]]; then
82-
commands=${commands//"pytest -v -s basic_correctness/test_basic_correctness.py"/"VLLM_USE_TRITON_FLASH_ATTN=0 pytest -v -s basic_correctness/test_basic_correctness.py"}
83-
fi
81+
commands=${commands//"pytest -v -s basic_correctness/test_basic_correctness.py"/"pytest -v -s basic_correctness/test_basic_correctness.py"}
8482

8583
if [[ $commands == *"pytest -v -s models/test_registry.py"* ]]; then
8684
commands=${commands//"pytest -v -s models/test_registry.py"/"pytest -v -s models/test_registry.py -k 'not BambaForCausalLM and not GritLM and not Mamba2ForCausalLM and not Zamba2ForCausalLM'"}
8785
fi
8886

89-
if [[ $commands == *"pytest -v -s compile/test_basic_correctness.py"* ]]; then
90-
commands=${commands//"pytest -v -s compile/test_basic_correctness.py"/"VLLM_USE_TRITON_FLASH_ATTN=0 pytest -v -s compile/test_basic_correctness.py"}
91-
fi
87+
commands=${commands//"pytest -v -s compile/test_basic_correctness.py"/"pytest -v -s compile/test_basic_correctness.py"}
9288

9389
if [[ $commands == *"pytest -v -s lora"* ]]; then
9490
commands=${commands//"pytest -v -s lora"/"VLLM_ROCM_CUSTOM_PAGED_ATTN=0 pytest -v -s lora"}
@@ -181,13 +177,13 @@ if [[ -z "$render_gid" ]]; then
181177
exit 1
182178
fi
183179

184-
# check if the command contains shard flag, we will run all shards in parallel because the host have 8 GPUs.
180+
# check if the command contains shard flag, we will run all shards in parallel because the host have 8 GPUs.
185181
if [[ $commands == *"--shard-id="* ]]; then
186-
# assign job count as the number of shards used
187-
commands=${commands//"--num-shards= "/"--num-shards=${PARALLEL_JOB_COUNT} "}
182+
# assign job count as the number of shards used
183+
commands=$(echo "$commands" | sed -E "s/--num-shards[[:blank:]]*=[[:blank:]]*[0-9]*/--num-shards=${PARALLEL_JOB_COUNT} /g" | sed 's/ \\ / /g')
188184
for GPU in $(seq 0 $(($PARALLEL_JOB_COUNT-1))); do
189185
# assign shard-id for each shard
190-
commands_gpu=${commands//"--shard-id= "/"--shard-id=${GPU} "}
186+
commands_gpu=$(echo "$commands" | sed -E "s/--shard-id[[:blank:]]*=[[:blank:]]*[0-9]*/--shard-id=${GPU} /g" | sed 's/ \\ / /g')
191187
echo "Shard ${GPU} commands:$commands_gpu"
192188
echo "Render devices: $BUILDKITE_AGENT_META_DATA_RENDER_DEVICES"
193189
docker run \

.buildkite/scripts/hardware_ci/run-cpu-test.sh

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,7 @@ function cpu_tests() {
4949
# Run kernel tests
5050
docker exec cpu-test-"$NUMA_NODE" bash -c "
5151
set -e
52+
pytest -x -v -s tests/kernels/attention/test_cpu_attn.py
5253
pytest -x -v -s tests/kernels/test_onednn.py"
5354

5455
# Run basic model test
@@ -76,7 +77,7 @@ function cpu_tests() {
7677
# Run AWQ test
7778
# docker exec cpu-test-"$NUMA_NODE" bash -c "
7879
# set -e
79-
# VLLM_USE_V1=0 pytest -x -s -v \
80+
# pytest -x -s -v \
8081
# tests/quantization/test_ipex_quant.py"
8182

8283
# Run multi-lora tests
@@ -116,4 +117,4 @@ function cpu_tests() {
116117

117118
# All of CPU tests are expected to be finished less than 40 mins.
118119
export -f cpu_tests
119-
timeout 2h bash -c "cpu_tests $CORE_RANGE $NUMA_NODE"
120+
timeout 2.5h bash -c "cpu_tests $CORE_RANGE $NUMA_NODE"

.buildkite/scripts/hardware_ci/run-xpu-test.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,6 @@ docker run \
4646
pytest -v -s v1/worker --ignore=v1/worker/test_gpu_model_runner.py
4747
pytest -v -s v1/structured_output
4848
pytest -v -s v1/spec_decode --ignore=v1/spec_decode/test_max_len.py --ignore=v1/spec_decode/test_tree_attention.py --ignore=v1/spec_decode/test_speculators_eagle3.py
49-
pytest -v -s v1/kv_connector/unit --ignore=v1/kv_connector/unit/test_multi_connector.py --ignore=v1/kv_connector/unit/test_nixl_connector.py --ignore=v1/kv_connector/unit/test_shared_storage_connector.py
49+
pytest -v -s v1/kv_connector/unit --ignore=v1/kv_connector/unit/test_multi_connector.py --ignore=v1/kv_connector/unit/test_nixl_connector.py --ignore=v1/kv_connector/unit/test_shared_storage_connector.py --ignore=v1/kv_connector/unit/test_lmcache_integration.py
5050
pytest -v -s v1/test_serial_utils.py
5151
'

0 commit comments

Comments
 (0)