testing vllm 0.11.2 #31
Workflow file for this run
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| name: PR - vLLM | |
| on: | |
| pull_request: | |
| branches: | |
| - main | |
| paths: | |
| - "docker/vllm/**" | |
| permissions: | |
| contents: read | |
| concurrency: | |
| group: pr-vllm-${{ github.event.pull_request.number }} | |
| cancel-in-progress: true | |
| jobs: | |
| check-changes: | |
| runs-on: ubuntu-latest | |
| outputs: | |
| vllm-ec2: ${{ steps.changes.outputs.vllm-ec2 }} | |
| vllm-rayserve-ec2: ${{ steps.changes.outputs.vllm-rayserve-ec2 }} | |
| steps: | |
| - uses: actions/checkout@v5 | |
| - uses: actions/setup-python@v6 | |
| with: | |
| python-version: "3.12" | |
| - uses: pre-commit/action@v3.0.1 | |
| with: | |
| extra_args: --all-files | |
| - name: Detect file changes | |
| id: changes | |
| uses: dorny/paths-filter@v3 | |
| with: | |
| filters: | | |
| vllm-ec2: | |
| - "docker/vllm/Dockerfile" | |
| vllm-rayserve-ec2: | |
| - "docker/vllm/Dockerfile.rayserve" | |
| # test upstream image | |
| vllm-upstream-sagemaker_standards-test: | |
| needs: [check-changes] | |
| runs-on: | |
| - codebuild-runner-${{ github.run_id }}-${{ github.run_attempt }} | |
| fleet:x86-g6xl-runner | |
| steps: | |
| - name: Checkout DLC source | |
| uses: actions/checkout@v5 | |
| - name: Pull image | |
| run: | | |
| docker pull docker.io/vllm/vllm-openai:v0.11.1 | |
| - name: Checkout vLLM Tests | |
| uses: actions/checkout@v5 | |
| with: | |
| repository: vllm-project/vllm | |
| ref: v0.11.1 | |
| path: vllm_source | |
| - name: Start container | |
| run: | | |
| CONTAINER_ID=$(docker run -d -it --rm --gpus=all --entrypoint /bin/bash \ | |
| -v ${HOME}/.cache/huggingface:/root/.cache/huggingface \ | |
| -v ${HOME}/.cache/vllm:/root/.cache/vllm \ | |
| -v ./vllm_source:/workdir --workdir /workdir \ | |
| -e HUGGING_FACE_HUB_TOKEN=${{ secrets.HUGGING_FACE_HUB_TOKEN }} \ | |
| docker.io/vllm/vllm-openai:v0.11.1) | |
| echo "CONTAINER_ID=$CONTAINER_ID" >> $GITHUB_ENV | |
| - name: Setup for vLLM Test | |
| run: | | |
| docker exec ${CONTAINER_ID} sh -c ' | |
| set -eux | |
| uv pip install --no-upgrade --system -r requirements/common.txt -r requirements/dev.txt --torch-backend=auto | |
| uv pip install --no-upgrade --system pytest pytest-asyncio | |
| uv pip install --no-upgrade --system -e tests/vllm_test_utils | |
| uv pip install --no-upgrade --system hf_transfer | |
| mkdir src | |
| mv vllm src/vllm | |
| ' | |
| - name: Run vLLM Tests | |
| run: | | |
| docker exec ${CONTAINER_ID} sh -c ' | |
| set -eux | |
| nvidia-smi | |
| # Test LoRA adapter loading/unloading via SageMaker endpoints | |
| pytest tests/entrypoints/sagemaker/test_sagemaker_lora_adapters.py -v | |
| # Test stateful session management | |
| pytest tests/entrypoints/sagemaker/test_sagemaker_stateful_sessions.py -v | |
| # Test sagemaker custom middleware | |
| pytest tests/entrypoints/sagemaker/test_sagemaker_middleware_integration.py -v | |
| # Test sagemaker endpoint overrides | |
| pytest tests/entrypoints/sagemaker/test_sagemaker_handler_overrides.py -v | |
| Then run related OpenAI entrypoint tests: | |
| # Test LoRA adapter loading/unloading via original OpenAI API server endpoints | |
| pytest tests/entrypoints/openai/test_lora_adapters.py -v | |
| # Test OpenAI API server's regular invocations endpoint | |
| pytest -v \ | |
| tests/entrypoints/openai/test_chat.py \ | |
| tests/entrypoints/pooling/openai/test_classification.py \ | |
| tests/entrypoints/pooling/openai/test_embedding.py \ | |
| tests/entrypoints/pooling/openai/test_pooling.py \ | |
| tests/entrypoints/pooling/openai/test_rerank.py \ | |
| tests/entrypoints/pooling/openai/test_score.py \ | |
| -k "test_invocations" | |
| ' | |
| - name: Cleanup container and images | |
| if: always() | |
| run: | | |
| docker rm -f ${CONTAINER_ID} || true | |
| docker image prune -a --force --filter "until=24h" | |
| docker system df | |
| # vLLM jobs | |
| build-vllm-image: | |
| needs: [check-changes] | |
| if: needs.check-changes.outputs.vllm-ec2 == 'true' | |
| runs-on: | |
| - codebuild-runner-${{ github.run_id }}-${{ github.run_attempt }} | |
| fleet:x86-build-runner | |
| outputs: | |
| image-uri: ${{ steps.image-uri-build.outputs.IMAGE_URI }} | |
| steps: | |
| - uses: actions/checkout@v5 | |
| - run: .github/scripts/runner_setup.sh | |
| - run: .github/scripts/buildkitd.sh | |
| - name: ECR login | |
| uses: ./.github/actions/ecr-authenticate | |
| with: | |
| aws_region: ${{ vars.AWS_REGION }} | |
| aws_account_id: ${{ vars.AWS_ACCOUNT_ID }} | |
| - name: Resolve image URI for build | |
| id: image-uri-build | |
| run: | | |
| IMAGE_URI=${{ vars.AWS_ACCOUNT_ID }}.dkr.ecr.${{ vars.AWS_REGION }}.amazonaws.com/ci:vllm-0.11.0-gpu-py312-cu128-ubuntu22.04-ec2-pr-${{ github.event.pull_request.number }} | |
| echo "Image URI to build: ${IMAGE_URI}" | |
| echo "IMAGE_URI=${IMAGE_URI}" >> ${GITHUB_ENV} | |
| echo "IMAGE_URI=${IMAGE_URI}" >> ${GITHUB_OUTPUT} | |
| - name: Build image | |
| run: | | |
| docker buildx build --progress plain \ | |
| --build-arg CACHE_REFRESH="$(date +"%Y-%m-%d")" \ | |
| --cache-to=type=inline \ | |
| --cache-from=type=registry,ref=${IMAGE_URI} \ | |
| --tag ${IMAGE_URI} \ | |
| --target vllm-ec2 \ | |
| -f docker/vllm/Dockerfile . | |
| - name: Container push | |
| run: | | |
| docker push ${IMAGE_URI} | |
| docker rmi ${IMAGE_URI} | |
| vllm-regression-test: | |
| needs: [build-vllm-image] | |
| if: needs.build-vllm-image.result == 'success' | |
| runs-on: | |
| - codebuild-runner-${{ github.run_id }}-${{ github.run_attempt }} | |
| fleet:x86-g6xl-runner | |
| steps: | |
| - name: Checkout DLC source | |
| uses: actions/checkout@v5 | |
| - name: Container pull | |
| uses: ./.github/actions/ecr-authenticate | |
| with: | |
| aws_region: ${{ vars.AWS_REGION }} | |
| aws_account_id: ${{ vars.AWS_ACCOUNT_ID }} | |
| image_uri: ${{ needs.build-vllm-image.outputs.image-uri }} | |
| - name: Checkout vLLM tests | |
| uses: actions/checkout@v5 | |
| with: | |
| repository: vllm-project/vllm | |
| ref: v0.11.1 | |
| path: vllm_source | |
| - name: Start container | |
| run: | | |
| CONTAINER_ID=$(docker run -d -it --rm --gpus=all --entrypoint /bin/bash \ | |
| -v ${HOME}/.cache/huggingface:/root/.cache/huggingface \ | |
| -v ${HOME}/.cache/vllm:/root/.cache/vllm \ | |
| -v ./vllm_source:/workdir --workdir /workdir \ | |
| -e HF_TOKEN=${{ secrets.HUGGING_FACE_HUB_TOKEN }} \ | |
| ${{ needs.build-vllm-image.outputs.image-uri }}) | |
| echo "CONTAINER_ID=$CONTAINER_ID" >> $GITHUB_ENV | |
| - name: Setup for vLLM tests | |
| run: | | |
| docker exec ${CONTAINER_ID} sh -c ' | |
| set -eux | |
| uv pip install --no-upgrade --system -r requirements/common.txt -r requirements/dev.txt --torch-backend=auto | |
| uv pip install --no-upgrade --system pytest pytest-asyncio | |
| uv pip install --no-upgrade --system -e tests/vllm_test_utils | |
| uv pip install --no-upgrade --system hf_transfer | |
| mkdir src | |
| mv vllm src/vllm | |
| ' | |
| - name: Run vLLM tests | |
| run: | | |
| docker exec ${CONTAINER_ID} sh -c ' | |
| set -eux | |
| nvidia-smi | |
| # Regression Test # 7min | |
| cd /workdir/tests | |
| uv pip install --system modelscope | |
| pytest -v -s test_regression.py | |
| ' | |
| - name: Cleanup container and images | |
| if: always() | |
| uses: ./.github/actions/container-cleanup | |
| with: | |
| container_id: ${{ env.CONTAINER_ID }} | |
| vllm-cuda-test: | |
| needs: [build-vllm-image] | |
| if: needs.build-vllm-image.result == 'success' | |
| runs-on: | |
| - codebuild-runner-${{ github.run_id }}-${{ github.run_attempt }} | |
| fleet:x86-g6xl-runner | |
| steps: | |
| - name: Checkout DLC source | |
| uses: actions/checkout@v5 | |
| - name: Container pull | |
| uses: ./.github/actions/ecr-authenticate | |
| with: | |
| aws_region: ${{ vars.AWS_REGION }} | |
| aws_account_id: ${{ vars.AWS_ACCOUNT_ID }} | |
| image_uri: ${{ needs.build-vllm-image.outputs.image-uri }} | |
| - name: Checkout vLLM tests | |
| uses: actions/checkout@v5 | |
| with: | |
| repository: vllm-project/vllm | |
| ref: v0.11.1 | |
| path: vllm_source | |
| - name: Start container | |
| run: | | |
| CONTAINER_ID=$(docker run -d -it --rm --gpus=all --entrypoint /bin/bash \ | |
| -v ${HOME}/.cache/huggingface:/root/.cache/huggingface \ | |
| -v ${HOME}/.cache/vllm:/root/.cache/vllm \ | |
| -v ./vllm_source:/workdir --workdir /workdir \ | |
| -e HF_TOKEN=${{ secrets.HUGGING_FACE_HUB_TOKEN }} \ | |
| ${{ needs.build-vllm-image.outputs.image-uri }}) | |
| echo "CONTAINER_ID=$CONTAINER_ID" >> $GITHUB_ENV | |
| - name: Setup for vLLM tests | |
| run: | | |
| docker exec ${CONTAINER_ID} sh -c ' | |
| set -eux | |
| uv pip install --no-upgrade --system -r requirements/common.txt -r requirements/dev.txt --torch-backend=auto | |
| uv pip install --no-upgrade --system pytest pytest-asyncio | |
| uv pip install --no-upgrade --system -e tests/vllm_test_utils | |
| uv pip install --no-upgrade --system hf_transfer | |
| mkdir src | |
| mv vllm src/vllm | |
| ' | |
| - name: Run vLLM tests | |
| run: | | |
| docker exec ${CONTAINER_ID} sh -c ' | |
| set -eux | |
| nvidia-smi | |
| # Platform Tests (CUDA) # 4min | |
| cd /workdir/tests | |
| pytest -v -s cuda/test_cuda_context.py | |
| ' | |
| - name: Cleanup container and images | |
| if: always() | |
| uses: ./.github/actions/container-cleanup | |
| with: | |
| container_id: ${{ env.CONTAINER_ID }} | |
| vllm-sagemaker_standards-test: | |
| needs: [build-vllm-image] | |
| if: needs.build-vllm-image.result == 'success' | |
| runs-on: | |
| - codebuild-runner-${{ github.run_id }}-${{ github.run_attempt }} | |
| fleet:x86-g6xl-runner | |
| steps: | |
| - name: Checkout DLC source | |
| uses: actions/checkout@v5 | |
| - name: Container pull | |
| uses: ./.github/actions/ecr-authenticate | |
| with: | |
| aws_region: ${{ vars.AWS_REGION }} | |
| aws_account_id: ${{ vars.AWS_ACCOUNT_ID }} | |
| image_uri: ${{ needs.build-vllm-image.outputs.image-uri }} | |
| - name: Checkout vLLM tests | |
| uses: actions/checkout@v5 | |
| with: | |
| repository: vllm-project/vllm | |
| ref: v0.11.1 | |
| path: vllm_source | |
| - name: Start container | |
| run: | | |
| CONTAINER_ID=$(docker run -d -it --rm --gpus=all --entrypoint /bin/bash \ | |
| -v ${HOME}/.cache/huggingface:/root/.cache/huggingface \ | |
| -v ${HOME}/.cache/vllm:/root/.cache/vllm \ | |
| -v ./vllm_source:/workdir --workdir /workdir \ | |
| -e HF_TOKEN=${{ secrets.HUGGING_FACE_HUB_TOKEN }} \ | |
| ${{ needs.build-vllm-image.outputs.image-uri }}) | |
| echo "CONTAINER_ID=$CONTAINER_ID" >> $GITHUB_ENV | |
| - name: Setup for vLLM tests | |
| run: | | |
| docker exec ${CONTAINER_ID} sh -c ' | |
| set -eux | |
| uv pip install --no-upgrade --system -r requirements/common.txt -r requirements/dev.txt --torch-backend=auto | |
| uv pip install --no-upgrade --system pytest pytest-asyncio | |
| uv pip install --no-upgrade --system -e tests/vllm_test_utils | |
| uv pip install --no-upgrade --system hf_transfer | |
| mkdir src | |
| mv vllm src/vllm | |
| ' | |
| - name: Run vLLM tests | |
| run: | | |
| docker exec ${CONTAINER_ID} sh -c ' | |
| set -eux | |
| nvidia-smi | |
| # Test LoRA adapter loading/unloading via SageMaker endpoints | |
| pytest tests/entrypoints/sagemaker/test_sagemaker_lora_adapters.py -v | |
| # Test stateful session management | |
| pytest tests/entrypoints/sagemaker/test_sagemaker_stateful_sessions.py -v | |
| # Test sagemaker custom middleware | |
| pytest tests/entrypoints/sagemaker/test_sagemaker_middleware_integration.py -v | |
| # Test sagemaker endpoint overrides | |
| pytest tests/entrypoints/sagemaker/test_sagemaker_handler_overrides.py -v | |
| Then run related OpenAI entrypoint tests: | |
| # Test LoRA adapter loading/unloading via original OpenAI API server endpoints | |
| pytest tests/entrypoints/openai/test_lora_adapters.py -v | |
| # Test OpenAI API server's regular invocations endpoint | |
| pytest -v \ | |
| tests/entrypoints/openai/test_chat.py \ | |
| tests/entrypoints/pooling/openai/test_classification.py \ | |
| tests/entrypoints/pooling/openai/test_embedding.py \ | |
| tests/entrypoints/pooling/openai/test_pooling.py \ | |
| tests/entrypoints/pooling/openai/test_rerank.py \ | |
| tests/entrypoints/pooling/openai/test_score.py \ | |
| -k "test_invocations" | |
| ' | |
| - name: Cleanup container and images | |
| if: always() | |
| uses: ./.github/actions/container-cleanup | |
| with: | |
| container_id: ${{ env.CONTAINER_ID }} | |
| # vLLM RayServe jobs | |
| build-rayserve-image: | |
| needs: [check-changes] | |
| if: needs.check-changes.outputs.vllm-rayserve-ec2 == 'true' | |
| runs-on: | |
| - codebuild-runner-${{ github.run_id }}-${{ github.run_attempt }} | |
| fleet:x86-build-runner | |
| outputs: | |
| image-uri: ${{ steps.image-uri-build.outputs.IMAGE_URI }} | |
| steps: | |
| - uses: actions/checkout@v5 | |
| - run: .github/scripts/runner_setup.sh | |
| - run: .github/scripts/buildkitd.sh | |
| - name: ECR login | |
| run: | | |
| aws ecr get-login-password --region ${{ vars.AWS_REGION }} | docker login --username AWS --password-stdin ${{ vars.AWS_ACCOUNT_ID }}.dkr.ecr.${{ vars.AWS_REGION }}.amazonaws.com | |
| - name: Resolve image URI for build | |
| id: image-uri-build | |
| run: | | |
| IMAGE_URI=${{ vars.AWS_ACCOUNT_ID }}.dkr.ecr.${{ vars.AWS_REGION }}.amazonaws.com/ci:vllm-0.10.2-gpu-py312-cu128-ubuntu22.04-rayserve-ec2-pr-${{ github.event.pull_request.number }} | |
| echo "Image URI to build: ${IMAGE_URI}" | |
| echo "IMAGE_URI=${IMAGE_URI}" >> ${GITHUB_ENV} | |
| echo "IMAGE_URI=${IMAGE_URI}" >> ${GITHUB_OUTPUT} | |
| - name: Build image | |
| run: | | |
| docker buildx build --progress plain \ | |
| --build-arg CACHE_REFRESH="$(date +"%Y-%m-%d")" \ | |
| --cache-to=type=inline \ | |
| --cache-from=type=registry,ref=${IMAGE_URI} \ | |
| --tag ${IMAGE_URI} \ | |
| --target vllm-rayserve-ec2 \ | |
| -f docker/vllm/Dockerfile.rayserve . | |
| - name: Container push | |
| run: | | |
| docker push ${IMAGE_URI} | |
| docker rmi ${IMAGE_URI} | |
| rayserve-regression-test: | |
| needs: [build-rayserve-image] | |
| if: needs.build-rayserve-image.result == 'success' | |
| runs-on: | |
| - codebuild-runner-${{ github.run_id }}-${{ github.run_attempt }} | |
| fleet:x86-g6xl-runner | |
| steps: | |
| - name: Checkout DLC source | |
| uses: actions/checkout@v5 | |
| - name: Container pull | |
| uses: ./.github/actions/ecr-authenticate | |
| with: | |
| aws_region: ${{ vars.AWS_REGION }} | |
| aws_account_id: ${{ vars.AWS_ACCOUNT_ID }} | |
| image_uri: ${{ needs.build-rayserve-image.outputs.image-uri }} | |
| - name: Checkout vLLM tests | |
| uses: actions/checkout@v5 | |
| with: | |
| repository: vllm-project/vllm | |
| ref: v0.10.2 | |
| path: vllm_source | |
| - name: Start container | |
| run: | | |
| CONTAINER_ID=$(docker run -d -it --rm --gpus=all --entrypoint /bin/bash \ | |
| -v ${HOME}/.cache/huggingface:/root/.cache/huggingface \ | |
| -v ${HOME}/.cache/vllm:/root/.cache/vllm \ | |
| -v ./vllm_source:/workdir --workdir /workdir \ | |
| -e HF_TOKEN=${{ secrets.HUGGING_FACE_HUB_TOKEN }} \ | |
| ${{ needs.build-rayserve-image.outputs.image-uri }}) | |
| echo "CONTAINER_ID=$CONTAINER_ID" >> $GITHUB_ENV | |
| - name: Setup for vLLM tests | |
| run: | | |
| docker exec ${CONTAINER_ID} sh -c ' | |
| set -eux | |
| uv pip install --no-upgrade --system -r requirements/common.txt -r requirements/dev.txt --torch-backend=auto | |
| uv pip install --no-upgrade --system pytest pytest-asyncio | |
| uv pip install --no-upgrade --system -e tests/vllm_test_utils | |
| uv pip install --no-upgrade --system hf_transfer | |
| mkdir src | |
| mv vllm src/vllm | |
| ' | |
| - name: Run vLLM tests | |
| run: | | |
| docker exec ${CONTAINER_ID} sh -c ' | |
| set -eux | |
| nvidia-smi | |
| # Regression Test # 7min | |
| cd /workdir/tests | |
| uv pip install --system modelscope | |
| pytest -v -s test_regression.py | |
| ' | |
| - name: Cleanup container and images | |
| if: always() | |
| uses: ./.github/actions/container-cleanup | |
| with: | |
| container_id: ${{ env.CONTAINER_ID }} | |
| rayserve-cuda-test: | |
| needs: [build-rayserve-image] | |
| if: needs.build-rayserve-image.result == 'success' | |
| runs-on: | |
| - codebuild-runner-${{ github.run_id }}-${{ github.run_attempt }} | |
| fleet:x86-g6xl-runner | |
| steps: | |
| - name: Checkout DLC source | |
| uses: actions/checkout@v5 | |
| - name: Container pull | |
| uses: ./.github/actions/ecr-authenticate | |
| with: | |
| aws_region: ${{ vars.AWS_REGION }} | |
| aws_account_id: ${{ vars.AWS_ACCOUNT_ID }} | |
| image_uri: ${{ needs.build-rayserve-image.outputs.image-uri }} | |
| - name: Checkout vLLM tests | |
| uses: actions/checkout@v5 | |
| with: | |
| repository: vllm-project/vllm | |
| ref: v0.10.2 | |
| path: vllm_source | |
| - name: Start container | |
| run: | | |
| CONTAINER_ID=$(docker run -d -it --rm --gpus=all --entrypoint /bin/bash \ | |
| -v ${HOME}/.cache/huggingface:/root/.cache/huggingface \ | |
| -v ${HOME}/.cache/vllm:/root/.cache/vllm \ | |
| -v ./vllm_source:/workdir --workdir /workdir \ | |
| -e HF_TOKEN=${{ secrets.HUGGING_FACE_HUB_TOKEN }} \ | |
| ${{ needs.build-rayserve-image.outputs.image-uri }}) | |
| echo "CONTAINER_ID=$CONTAINER_ID" >> $GITHUB_ENV | |
| - name: Setup for vLLM tests | |
| run: | | |
| docker exec ${CONTAINER_ID} sh -c ' | |
| set -eux | |
| uv pip install --no-upgrade --system -r requirements/common.txt -r requirements/dev.txt --torch-backend=auto | |
| uv pip install --no-upgrade --system pytest pytest-asyncio | |
| uv pip install --no-upgrade --system -e tests/vllm_test_utils | |
| uv pip install --no-upgrade --system hf_transfer | |
| mkdir src | |
| mv vllm src/vllm | |
| ' | |
| - name: Run vLLM tests | |
| run: | | |
| docker exec ${CONTAINER_ID} sh -c ' | |
| set -eux | |
| nvidia-smi | |
| # Platform Tests (CUDA) # 4min | |
| cd /workdir/tests | |
| pytest -v -s cuda/test_cuda_context.py | |
| ' | |
| - name: Cleanup container and images | |
| if: always() | |
| uses: ./.github/actions/container-cleanup | |
| with: | |
| container_id: ${{ env.CONTAINER_ID }} | |
| rayserve-example-test: | |
| needs: [build-rayserve-image] | |
| if: needs.build-rayserve-image.result == 'success' | |
| runs-on: | |
| - codebuild-runner-${{ github.run_id }}-${{ github.run_attempt }} | |
| fleet:x86-g6xl-runner | |
| steps: | |
| - name: Checkout DLC source | |
| uses: actions/checkout@v5 | |
| - name: Container pull | |
| uses: ./.github/actions/ecr-authenticate | |
| with: | |
| aws_region: ${{ vars.AWS_REGION }} | |
| aws_account_id: ${{ vars.AWS_ACCOUNT_ID }} | |
| image_uri: ${{ needs.build-rayserve-image.outputs.image-uri }} | |
| - name: Checkout vLLM tests | |
| uses: actions/checkout@v5 | |
| with: | |
| repository: vllm-project/vllm | |
| ref: v0.10.2 | |
| path: vllm_source | |
| - name: Start container | |
| run: | | |
| CONTAINER_ID=$(docker run -d -it --rm --gpus=all --entrypoint /bin/bash \ | |
| -v ${HOME}/.cache/huggingface:/root/.cache/huggingface \ | |
| -v ${HOME}/.cache/vllm:/root/.cache/vllm \ | |
| -v ./vllm_source:/workdir --workdir /workdir \ | |
| -e HF_TOKEN=${{ secrets.HUGGING_FACE_HUB_TOKEN }} \ | |
| ${{ needs.build-rayserve-image.outputs.image-uri }}) | |
| echo "CONTAINER_ID=$CONTAINER_ID" >> $GITHUB_ENV | |
| - name: Setup for vLLM tests | |
| run: | | |
| docker exec ${CONTAINER_ID} sh -c ' | |
| set -eux | |
| uv pip install --no-upgrade --system -r requirements/common.txt -r requirements/dev.txt --torch-backend=auto | |
| uv pip install --no-upgrade --system pytest pytest-asyncio | |
| uv pip install --no-upgrade --system -e tests/vllm_test_utils | |
| uv pip install --no-upgrade --system hf_transfer | |
| mkdir src | |
| mv vllm src/vllm | |
| ' | |
| - name: Run vLLM tests | |
| run: | | |
| docker exec ${CONTAINER_ID} sh -c ' | |
| set -eux | |
| nvidia-smi | |
| # Examples Test # 30min | |
| cd /workdir/examples | |
| pip install tensorizer # for tensorizer test | |
| python3 offline_inference/basic/generate.py --model facebook/opt-125m | |
| # python3 offline_inference/basic/generate.py --model meta-llama/Llama-2-13b-chat-hf --cpu-offload-gb 10 | |
| python3 offline_inference/basic/chat.py | |
| python3 offline_inference/prefix_caching.py | |
| python3 offline_inference/llm_engine_example.py | |
| python3 offline_inference/audio_language.py --seed 0 | |
| python3 offline_inference/vision_language.py --seed 0 | |
| python3 offline_inference/vision_language_pooling.py --seed 0 | |
| python3 offline_inference/vision_language_multi_image.py --seed 0 | |
| VLLM_USE_V1=0 python3 others/tensorize_vllm_model.py --model facebook/opt-125m serialize --serialized-directory /tmp/ --suffix v1 && python3 others/tensorize_vllm_model.py --model facebook/opt-125m deserialize --path-to-tensors /tmp/vllm/facebook/opt-125m/v1/model.tensors | |
| python3 offline_inference/encoder_decoder_multimodal.py --model-type whisper --seed 0 | |
| python3 offline_inference/basic/classify.py | |
| python3 offline_inference/basic/embed.py | |
| python3 offline_inference/basic/score.py | |
| VLLM_USE_V1=0 python3 offline_inference/profiling.py --model facebook/opt-125m run_num_steps --num-steps 2 | |
| ' | |
| - name: Cleanup container and images | |
| if: always() | |
| uses: ./.github/actions/container-cleanup | |
| with: | |
| container_id: ${{ env.CONTAINER_ID }} |