Skip to content

testing vllm 0.11.2 #31

testing vllm 0.11.2

testing vllm 0.11.2 #31

Workflow file for this run

name: PR - vLLM
on:
pull_request:
branches:
- main
paths:
- "docker/vllm/**"
permissions:
contents: read
concurrency:
group: pr-vllm-${{ github.event.pull_request.number }}
cancel-in-progress: true
jobs:
check-changes:
runs-on: ubuntu-latest
outputs:
vllm-ec2: ${{ steps.changes.outputs.vllm-ec2 }}
vllm-rayserve-ec2: ${{ steps.changes.outputs.vllm-rayserve-ec2 }}
steps:
- uses: actions/checkout@v5
- uses: actions/setup-python@v6
with:
python-version: "3.12"
- uses: pre-commit/action@v3.0.1
with:
extra_args: --all-files
- name: Detect file changes
id: changes
uses: dorny/paths-filter@v3
with:
filters: |
vllm-ec2:
- "docker/vllm/Dockerfile"
vllm-rayserve-ec2:
- "docker/vllm/Dockerfile.rayserve"
# test upstream image
vllm-upstream-sagemaker_standards-test:
needs: [check-changes]
runs-on:
- codebuild-runner-${{ github.run_id }}-${{ github.run_attempt }}
fleet:x86-g6xl-runner
steps:
- name: Checkout DLC source
uses: actions/checkout@v5
- name: Pull image
run: |
docker pull docker.io/vllm/vllm-openai:v0.11.1
- name: Checkout vLLM Tests
uses: actions/checkout@v5
with:
repository: vllm-project/vllm
ref: v0.11.1
path: vllm_source
- name: Start container
run: |
CONTAINER_ID=$(docker run -d -it --rm --gpus=all --entrypoint /bin/bash \
-v ${HOME}/.cache/huggingface:/root/.cache/huggingface \
-v ${HOME}/.cache/vllm:/root/.cache/vllm \
-v ./vllm_source:/workdir --workdir /workdir \
-e HUGGING_FACE_HUB_TOKEN=${{ secrets.HUGGING_FACE_HUB_TOKEN }} \
docker.io/vllm/vllm-openai:v0.11.1)
echo "CONTAINER_ID=$CONTAINER_ID" >> $GITHUB_ENV
- name: Setup for vLLM Test
run: |
docker exec ${CONTAINER_ID} sh -c '
set -eux
uv pip install --no-upgrade --system -r requirements/common.txt -r requirements/dev.txt --torch-backend=auto
uv pip install --no-upgrade --system pytest pytest-asyncio
uv pip install --no-upgrade --system -e tests/vllm_test_utils
uv pip install --no-upgrade --system hf_transfer
mkdir src
mv vllm src/vllm
'
- name: Run vLLM Tests
run: |
docker exec ${CONTAINER_ID} sh -c '
set -eux
nvidia-smi
# Test LoRA adapter loading/unloading via SageMaker endpoints
pytest tests/entrypoints/sagemaker/test_sagemaker_lora_adapters.py -v
# Test stateful session management
pytest tests/entrypoints/sagemaker/test_sagemaker_stateful_sessions.py -v
# Test sagemaker custom middleware
pytest tests/entrypoints/sagemaker/test_sagemaker_middleware_integration.py -v
# Test sagemaker endpoint overrides
pytest tests/entrypoints/sagemaker/test_sagemaker_handler_overrides.py -v
Then run related OpenAI entrypoint tests:
# Test LoRA adapter loading/unloading via original OpenAI API server endpoints
pytest tests/entrypoints/openai/test_lora_adapters.py -v
# Test OpenAI API server's regular invocations endpoint
pytest -v \
tests/entrypoints/openai/test_chat.py \
tests/entrypoints/pooling/openai/test_classification.py \
tests/entrypoints/pooling/openai/test_embedding.py \
tests/entrypoints/pooling/openai/test_pooling.py \
tests/entrypoints/pooling/openai/test_rerank.py \
tests/entrypoints/pooling/openai/test_score.py \
-k "test_invocations"
'
- name: Cleanup container and images
if: always()
run: |
docker rm -f ${CONTAINER_ID} || true
docker image prune -a --force --filter "until=24h"
docker system df
# vLLM jobs
build-vllm-image:
needs: [check-changes]
if: needs.check-changes.outputs.vllm-ec2 == 'true'
runs-on:
- codebuild-runner-${{ github.run_id }}-${{ github.run_attempt }}
fleet:x86-build-runner
outputs:
image-uri: ${{ steps.image-uri-build.outputs.IMAGE_URI }}
steps:
- uses: actions/checkout@v5
- run: .github/scripts/runner_setup.sh
- run: .github/scripts/buildkitd.sh
- name: ECR login
uses: ./.github/actions/ecr-authenticate
with:
aws_region: ${{ vars.AWS_REGION }}
aws_account_id: ${{ vars.AWS_ACCOUNT_ID }}
- name: Resolve image URI for build
id: image-uri-build
run: |
IMAGE_URI=${{ vars.AWS_ACCOUNT_ID }}.dkr.ecr.${{ vars.AWS_REGION }}.amazonaws.com/ci:vllm-0.11.0-gpu-py312-cu128-ubuntu22.04-ec2-pr-${{ github.event.pull_request.number }}
echo "Image URI to build: ${IMAGE_URI}"
echo "IMAGE_URI=${IMAGE_URI}" >> ${GITHUB_ENV}
echo "IMAGE_URI=${IMAGE_URI}" >> ${GITHUB_OUTPUT}
- name: Build image
run: |
docker buildx build --progress plain \
--build-arg CACHE_REFRESH="$(date +"%Y-%m-%d")" \
--cache-to=type=inline \
--cache-from=type=registry,ref=${IMAGE_URI} \
--tag ${IMAGE_URI} \
--target vllm-ec2 \
-f docker/vllm/Dockerfile .
- name: Container push
run: |
docker push ${IMAGE_URI}
docker rmi ${IMAGE_URI}
vllm-regression-test:
needs: [build-vllm-image]
if: needs.build-vllm-image.result == 'success'
runs-on:
- codebuild-runner-${{ github.run_id }}-${{ github.run_attempt }}
fleet:x86-g6xl-runner
steps:
- name: Checkout DLC source
uses: actions/checkout@v5
- name: Container pull
uses: ./.github/actions/ecr-authenticate
with:
aws_region: ${{ vars.AWS_REGION }}
aws_account_id: ${{ vars.AWS_ACCOUNT_ID }}
image_uri: ${{ needs.build-vllm-image.outputs.image-uri }}
- name: Checkout vLLM tests
uses: actions/checkout@v5
with:
repository: vllm-project/vllm
ref: v0.11.1
path: vllm_source
- name: Start container
run: |
CONTAINER_ID=$(docker run -d -it --rm --gpus=all --entrypoint /bin/bash \
-v ${HOME}/.cache/huggingface:/root/.cache/huggingface \
-v ${HOME}/.cache/vllm:/root/.cache/vllm \
-v ./vllm_source:/workdir --workdir /workdir \
-e HF_TOKEN=${{ secrets.HUGGING_FACE_HUB_TOKEN }} \
${{ needs.build-vllm-image.outputs.image-uri }})
echo "CONTAINER_ID=$CONTAINER_ID" >> $GITHUB_ENV
- name: Setup for vLLM tests
run: |
docker exec ${CONTAINER_ID} sh -c '
set -eux
uv pip install --no-upgrade --system -r requirements/common.txt -r requirements/dev.txt --torch-backend=auto
uv pip install --no-upgrade --system pytest pytest-asyncio
uv pip install --no-upgrade --system -e tests/vllm_test_utils
uv pip install --no-upgrade --system hf_transfer
mkdir src
mv vllm src/vllm
'
- name: Run vLLM tests
run: |
docker exec ${CONTAINER_ID} sh -c '
set -eux
nvidia-smi
# Regression Test # 7min
cd /workdir/tests
uv pip install --system modelscope
pytest -v -s test_regression.py
'
- name: Cleanup container and images
if: always()
uses: ./.github/actions/container-cleanup
with:
container_id: ${{ env.CONTAINER_ID }}
vllm-cuda-test:
needs: [build-vllm-image]
if: needs.build-vllm-image.result == 'success'
runs-on:
- codebuild-runner-${{ github.run_id }}-${{ github.run_attempt }}
fleet:x86-g6xl-runner
steps:
- name: Checkout DLC source
uses: actions/checkout@v5
- name: Container pull
uses: ./.github/actions/ecr-authenticate
with:
aws_region: ${{ vars.AWS_REGION }}
aws_account_id: ${{ vars.AWS_ACCOUNT_ID }}
image_uri: ${{ needs.build-vllm-image.outputs.image-uri }}
- name: Checkout vLLM tests
uses: actions/checkout@v5
with:
repository: vllm-project/vllm
ref: v0.11.1
path: vllm_source
- name: Start container
run: |
CONTAINER_ID=$(docker run -d -it --rm --gpus=all --entrypoint /bin/bash \
-v ${HOME}/.cache/huggingface:/root/.cache/huggingface \
-v ${HOME}/.cache/vllm:/root/.cache/vllm \
-v ./vllm_source:/workdir --workdir /workdir \
-e HF_TOKEN=${{ secrets.HUGGING_FACE_HUB_TOKEN }} \
${{ needs.build-vllm-image.outputs.image-uri }})
echo "CONTAINER_ID=$CONTAINER_ID" >> $GITHUB_ENV
- name: Setup for vLLM tests
run: |
docker exec ${CONTAINER_ID} sh -c '
set -eux
uv pip install --no-upgrade --system -r requirements/common.txt -r requirements/dev.txt --torch-backend=auto
uv pip install --no-upgrade --system pytest pytest-asyncio
uv pip install --no-upgrade --system -e tests/vllm_test_utils
uv pip install --no-upgrade --system hf_transfer
mkdir src
mv vllm src/vllm
'
- name: Run vLLM tests
run: |
docker exec ${CONTAINER_ID} sh -c '
set -eux
nvidia-smi
# Platform Tests (CUDA) # 4min
cd /workdir/tests
pytest -v -s cuda/test_cuda_context.py
'
- name: Cleanup container and images
if: always()
uses: ./.github/actions/container-cleanup
with:
container_id: ${{ env.CONTAINER_ID }}
vllm-sagemaker_standards-test:
needs: [build-vllm-image]
if: needs.build-vllm-image.result == 'success'
runs-on:
- codebuild-runner-${{ github.run_id }}-${{ github.run_attempt }}
fleet:x86-g6xl-runner
steps:
- name: Checkout DLC source
uses: actions/checkout@v5
- name: Container pull
uses: ./.github/actions/ecr-authenticate
with:
aws_region: ${{ vars.AWS_REGION }}
aws_account_id: ${{ vars.AWS_ACCOUNT_ID }}
image_uri: ${{ needs.build-vllm-image.outputs.image-uri }}
- name: Checkout vLLM tests
uses: actions/checkout@v5
with:
repository: vllm-project/vllm
ref: v0.11.1
path: vllm_source
- name: Start container
run: |
CONTAINER_ID=$(docker run -d -it --rm --gpus=all --entrypoint /bin/bash \
-v ${HOME}/.cache/huggingface:/root/.cache/huggingface \
-v ${HOME}/.cache/vllm:/root/.cache/vllm \
-v ./vllm_source:/workdir --workdir /workdir \
-e HF_TOKEN=${{ secrets.HUGGING_FACE_HUB_TOKEN }} \
${{ needs.build-vllm-image.outputs.image-uri }})
echo "CONTAINER_ID=$CONTAINER_ID" >> $GITHUB_ENV
- name: Setup for vLLM tests
run: |
docker exec ${CONTAINER_ID} sh -c '
set -eux
uv pip install --no-upgrade --system -r requirements/common.txt -r requirements/dev.txt --torch-backend=auto
uv pip install --no-upgrade --system pytest pytest-asyncio
uv pip install --no-upgrade --system -e tests/vllm_test_utils
uv pip install --no-upgrade --system hf_transfer
mkdir src
mv vllm src/vllm
'
- name: Run vLLM tests
run: |
docker exec ${CONTAINER_ID} sh -c '
set -eux
nvidia-smi
# Test LoRA adapter loading/unloading via SageMaker endpoints
pytest tests/entrypoints/sagemaker/test_sagemaker_lora_adapters.py -v
# Test stateful session management
pytest tests/entrypoints/sagemaker/test_sagemaker_stateful_sessions.py -v
# Test sagemaker custom middleware
pytest tests/entrypoints/sagemaker/test_sagemaker_middleware_integration.py -v
# Test sagemaker endpoint overrides
pytest tests/entrypoints/sagemaker/test_sagemaker_handler_overrides.py -v
Then run related OpenAI entrypoint tests:
# Test LoRA adapter loading/unloading via original OpenAI API server endpoints
pytest tests/entrypoints/openai/test_lora_adapters.py -v
# Test OpenAI API server's regular invocations endpoint
pytest -v \
tests/entrypoints/openai/test_chat.py \
tests/entrypoints/pooling/openai/test_classification.py \
tests/entrypoints/pooling/openai/test_embedding.py \
tests/entrypoints/pooling/openai/test_pooling.py \
tests/entrypoints/pooling/openai/test_rerank.py \
tests/entrypoints/pooling/openai/test_score.py \
-k "test_invocations"
'
- name: Cleanup container and images
if: always()
uses: ./.github/actions/container-cleanup
with:
container_id: ${{ env.CONTAINER_ID }}
# vLLM RayServe jobs
build-rayserve-image:
needs: [check-changes]
if: needs.check-changes.outputs.vllm-rayserve-ec2 == 'true'
runs-on:
- codebuild-runner-${{ github.run_id }}-${{ github.run_attempt }}
fleet:x86-build-runner
outputs:
image-uri: ${{ steps.image-uri-build.outputs.IMAGE_URI }}
steps:
- uses: actions/checkout@v5
- run: .github/scripts/runner_setup.sh
- run: .github/scripts/buildkitd.sh
- name: ECR login
run: |
aws ecr get-login-password --region ${{ vars.AWS_REGION }} | docker login --username AWS --password-stdin ${{ vars.AWS_ACCOUNT_ID }}.dkr.ecr.${{ vars.AWS_REGION }}.amazonaws.com
- name: Resolve image URI for build
id: image-uri-build
run: |
IMAGE_URI=${{ vars.AWS_ACCOUNT_ID }}.dkr.ecr.${{ vars.AWS_REGION }}.amazonaws.com/ci:vllm-0.10.2-gpu-py312-cu128-ubuntu22.04-rayserve-ec2-pr-${{ github.event.pull_request.number }}
echo "Image URI to build: ${IMAGE_URI}"
echo "IMAGE_URI=${IMAGE_URI}" >> ${GITHUB_ENV}
echo "IMAGE_URI=${IMAGE_URI}" >> ${GITHUB_OUTPUT}
- name: Build image
run: |
docker buildx build --progress plain \
--build-arg CACHE_REFRESH="$(date +"%Y-%m-%d")" \
--cache-to=type=inline \
--cache-from=type=registry,ref=${IMAGE_URI} \
--tag ${IMAGE_URI} \
--target vllm-rayserve-ec2 \
-f docker/vllm/Dockerfile.rayserve .
- name: Container push
run: |
docker push ${IMAGE_URI}
docker rmi ${IMAGE_URI}
rayserve-regression-test:
needs: [build-rayserve-image]
if: needs.build-rayserve-image.result == 'success'
runs-on:
- codebuild-runner-${{ github.run_id }}-${{ github.run_attempt }}
fleet:x86-g6xl-runner
steps:
- name: Checkout DLC source
uses: actions/checkout@v5
- name: Container pull
uses: ./.github/actions/ecr-authenticate
with:
aws_region: ${{ vars.AWS_REGION }}
aws_account_id: ${{ vars.AWS_ACCOUNT_ID }}
image_uri: ${{ needs.build-rayserve-image.outputs.image-uri }}
- name: Checkout vLLM tests
uses: actions/checkout@v5
with:
repository: vllm-project/vllm
ref: v0.10.2
path: vllm_source
- name: Start container
run: |
CONTAINER_ID=$(docker run -d -it --rm --gpus=all --entrypoint /bin/bash \
-v ${HOME}/.cache/huggingface:/root/.cache/huggingface \
-v ${HOME}/.cache/vllm:/root/.cache/vllm \
-v ./vllm_source:/workdir --workdir /workdir \
-e HF_TOKEN=${{ secrets.HUGGING_FACE_HUB_TOKEN }} \
${{ needs.build-rayserve-image.outputs.image-uri }})
echo "CONTAINER_ID=$CONTAINER_ID" >> $GITHUB_ENV
- name: Setup for vLLM tests
run: |
docker exec ${CONTAINER_ID} sh -c '
set -eux
uv pip install --no-upgrade --system -r requirements/common.txt -r requirements/dev.txt --torch-backend=auto
uv pip install --no-upgrade --system pytest pytest-asyncio
uv pip install --no-upgrade --system -e tests/vllm_test_utils
uv pip install --no-upgrade --system hf_transfer
mkdir src
mv vllm src/vllm
'
- name: Run vLLM tests
run: |
docker exec ${CONTAINER_ID} sh -c '
set -eux
nvidia-smi
# Regression Test # 7min
cd /workdir/tests
uv pip install --system modelscope
pytest -v -s test_regression.py
'
- name: Cleanup container and images
if: always()
uses: ./.github/actions/container-cleanup
with:
container_id: ${{ env.CONTAINER_ID }}
rayserve-cuda-test:
needs: [build-rayserve-image]
if: needs.build-rayserve-image.result == 'success'
runs-on:
- codebuild-runner-${{ github.run_id }}-${{ github.run_attempt }}
fleet:x86-g6xl-runner
steps:
- name: Checkout DLC source
uses: actions/checkout@v5
- name: Container pull
uses: ./.github/actions/ecr-authenticate
with:
aws_region: ${{ vars.AWS_REGION }}
aws_account_id: ${{ vars.AWS_ACCOUNT_ID }}
image_uri: ${{ needs.build-rayserve-image.outputs.image-uri }}
- name: Checkout vLLM tests
uses: actions/checkout@v5
with:
repository: vllm-project/vllm
ref: v0.10.2
path: vllm_source
- name: Start container
run: |
CONTAINER_ID=$(docker run -d -it --rm --gpus=all --entrypoint /bin/bash \
-v ${HOME}/.cache/huggingface:/root/.cache/huggingface \
-v ${HOME}/.cache/vllm:/root/.cache/vllm \
-v ./vllm_source:/workdir --workdir /workdir \
-e HF_TOKEN=${{ secrets.HUGGING_FACE_HUB_TOKEN }} \
${{ needs.build-rayserve-image.outputs.image-uri }})
echo "CONTAINER_ID=$CONTAINER_ID" >> $GITHUB_ENV
- name: Setup for vLLM tests
run: |
docker exec ${CONTAINER_ID} sh -c '
set -eux
uv pip install --no-upgrade --system -r requirements/common.txt -r requirements/dev.txt --torch-backend=auto
uv pip install --no-upgrade --system pytest pytest-asyncio
uv pip install --no-upgrade --system -e tests/vllm_test_utils
uv pip install --no-upgrade --system hf_transfer
mkdir src
mv vllm src/vllm
'
- name: Run vLLM tests
run: |
docker exec ${CONTAINER_ID} sh -c '
set -eux
nvidia-smi
# Platform Tests (CUDA) # 4min
cd /workdir/tests
pytest -v -s cuda/test_cuda_context.py
'
- name: Cleanup container and images
if: always()
uses: ./.github/actions/container-cleanup
with:
container_id: ${{ env.CONTAINER_ID }}
rayserve-example-test:
needs: [build-rayserve-image]
if: needs.build-rayserve-image.result == 'success'
runs-on:
- codebuild-runner-${{ github.run_id }}-${{ github.run_attempt }}
fleet:x86-g6xl-runner
steps:
- name: Checkout DLC source
uses: actions/checkout@v5
- name: Container pull
uses: ./.github/actions/ecr-authenticate
with:
aws_region: ${{ vars.AWS_REGION }}
aws_account_id: ${{ vars.AWS_ACCOUNT_ID }}
image_uri: ${{ needs.build-rayserve-image.outputs.image-uri }}
- name: Checkout vLLM tests
uses: actions/checkout@v5
with:
repository: vllm-project/vllm
ref: v0.10.2
path: vllm_source
- name: Start container
run: |
CONTAINER_ID=$(docker run -d -it --rm --gpus=all --entrypoint /bin/bash \
-v ${HOME}/.cache/huggingface:/root/.cache/huggingface \
-v ${HOME}/.cache/vllm:/root/.cache/vllm \
-v ./vllm_source:/workdir --workdir /workdir \
-e HF_TOKEN=${{ secrets.HUGGING_FACE_HUB_TOKEN }} \
${{ needs.build-rayserve-image.outputs.image-uri }})
echo "CONTAINER_ID=$CONTAINER_ID" >> $GITHUB_ENV
- name: Setup for vLLM tests
run: |
docker exec ${CONTAINER_ID} sh -c '
set -eux
uv pip install --no-upgrade --system -r requirements/common.txt -r requirements/dev.txt --torch-backend=auto
uv pip install --no-upgrade --system pytest pytest-asyncio
uv pip install --no-upgrade --system -e tests/vllm_test_utils
uv pip install --no-upgrade --system hf_transfer
mkdir src
mv vllm src/vllm
'
- name: Run vLLM tests
run: |
docker exec ${CONTAINER_ID} sh -c '
set -eux
nvidia-smi
# Examples Test # 30min
cd /workdir/examples
pip install tensorizer # for tensorizer test
python3 offline_inference/basic/generate.py --model facebook/opt-125m
# python3 offline_inference/basic/generate.py --model meta-llama/Llama-2-13b-chat-hf --cpu-offload-gb 10
python3 offline_inference/basic/chat.py
python3 offline_inference/prefix_caching.py
python3 offline_inference/llm_engine_example.py
python3 offline_inference/audio_language.py --seed 0
python3 offline_inference/vision_language.py --seed 0
python3 offline_inference/vision_language_pooling.py --seed 0
python3 offline_inference/vision_language_multi_image.py --seed 0
VLLM_USE_V1=0 python3 others/tensorize_vllm_model.py --model facebook/opt-125m serialize --serialized-directory /tmp/ --suffix v1 && python3 others/tensorize_vllm_model.py --model facebook/opt-125m deserialize --path-to-tensors /tmp/vllm/facebook/opt-125m/v1/model.tensors
python3 offline_inference/encoder_decoder_multimodal.py --model-type whisper --seed 0
python3 offline_inference/basic/classify.py
python3 offline_inference/basic/embed.py
python3 offline_inference/basic/score.py
VLLM_USE_V1=0 python3 offline_inference/profiling.py --model facebook/opt-125m run_num_steps --num-steps 2
'
- name: Cleanup container and images
if: always()
uses: ./.github/actions/container-cleanup
with:
container_id: ${{ env.CONTAINER_ID }}