Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
65 changes: 56 additions & 9 deletions buildkite/bootstrap.sh
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@

#!/bin/bash

set -euo pipefail
Expand Down Expand Up @@ -86,6 +85,8 @@ upload_pipeline() {
-D vllm_use_precompiled="$VLLM_USE_PRECOMPILED" \
-D cov_enabled="$COV_ENABLED" \
-D vllm_ci_branch="$VLLM_CI_BRANCH" \
-D skip_image_build="$SKIP_IMAGE_BUILD" \
-D docker_image_override="$DOCKER_IMAGE_OVERRIDE" \
| sed '/^[[:space:]]*$/d' \
> pipeline.yaml
)
Expand Down Expand Up @@ -163,9 +164,9 @@ ignore_patterns=(
"cmake/hipify.py"
"cmake/cpu_extension.cmake"
)

# Detect if there are critical changes matching patterns
CRITICAL_CHANGE_DETECTED=0
for file in $file_diff; do
# First check if file matches any pattern
matches_pattern=0
for pattern in "${patterns[@]}"; do
if [[ $file == $pattern* ]] || [[ $file == $pattern ]]; then
Expand All @@ -174,7 +175,6 @@ for file in $file_diff; do
fi
done

# If file matches pattern, check it's not in ignore patterns
if [[ $matches_pattern -eq 1 ]]; then
matches_ignore=0
for ignore in "${ignore_patterns[@]}"; do
Expand All @@ -185,25 +185,72 @@ for file in $file_diff; do
done

if [[ $matches_ignore -eq 0 ]]; then
RUN_ALL=1
echo "Found changes: $file. Run all tests"
CRITICAL_CHANGE_DETECTED=1
echo "Found critical changes: $file"
break
fi
fi
done

# RUN_ALL can be set manually, but also set it when critical changes are detected
if [[ -z "${RUN_ALL:-}" ]]; then
RUN_ALL=0
fi
if [[ $CRITICAL_CHANGE_DETECTED -eq 1 ]]; then
RUN_ALL=1
echo "RUN_ALL set due to critical changes"
fi

# Decide whether to use precompiled wheels
# Relies on existing patterns array as a basis.
if [[ -n "${VLLM_USE_PRECOMPILED:-}" ]]; then
echo "VLLM_USE_PRECOMPILED is already set to: $VLLM_USE_PRECOMPILED"
elif [[ $RUN_ALL -eq 1 ]]; then
elif [[ $CRITICAL_CHANGE_DETECTED -eq 1 || "${BUILDKITE_BRANCH}" == "main" ]]; then
export VLLM_USE_PRECOMPILED=0
echo "Detected critical changes, building wheels from source"
echo "Detected critical changes or main branch, building wheels from source"
else
export VLLM_USE_PRECOMPILED=1
echo "No critical changes, using precompiled wheels"
fi

# Decide whether to skip building docker images (pull & mount code instead)
if [[ -n "${SKIP_IMAGE_BUILD:-}" ]]; then
echo "SKIP_IMAGE_BUILD is preset to: ${SKIP_IMAGE_BUILD}"
else
if [[ "${VLLM_USE_PRECOMPILED:-}" == "1" && "$CRITICAL_CHANGE_DETECTED" -eq 0 ]]; then
SKIP_IMAGE_BUILD=1
else
SKIP_IMAGE_BUILD=0
fi
fi

# Determine the lowest common ancestor (LCA) commit with main branch if skipping image build
DOCKER_IMAGE_OVERRIDE=""
if [[ "${SKIP_IMAGE_BUILD}" == "1" ]]; then
LCA_COMMIT=""
if git rev-parse --is-inside-work-tree >/dev/null 2>&1; then
LCA_COMMIT=$(git merge-base origin/main HEAD)
fi
if [[ -n "$LCA_COMMIT" ]]; then
IMAGE_TAG="public.ecr.aws/q9t5s3a7/vllm-ci-postmerge-repo:$LCA_COMMIT"
echo "Checking for Docker image for LCA: $IMAGE_TAG"
# Check if the image exists on the registry
if docker manifest inspect "$IMAGE_TAG" >/dev/null 2>&1; then
DOCKER_IMAGE_OVERRIDE="$IMAGE_TAG"
echo "Using Docker image for LCA commit: $DOCKER_IMAGE_OVERRIDE"
else
echo "LCA image not found, falling back to build image"
SKIP_IMAGE_BUILD=0
VLLM_USE_PRECOMPILED=0
fi
else
DOCKER_IMAGE_OVERRIDE="public.ecr.aws/q9t5s3a7/vllm-ci-postmerge-repo:latest"
echo "Could not determine LCA commit, using latest Docker image: $DOCKER_IMAGE_OVERRIDE"
fi
fi

echo "Final SKIP_IMAGE_BUILD=${SKIP_IMAGE_BUILD} (RUN_ALL=${RUN_ALL}, VLLM_USE_PRECOMPILED=${VLLM_USE_PRECOMPILED:-unset})"

################## end WIP #####################

LIST_FILE_DIFF=$(get_diff | tr ' ' '|')
if [[ $BUILDKITE_BRANCH == "main" ]]; then
Expand Down
180 changes: 135 additions & 45 deletions buildkite/test-template-ci.j2
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,13 @@
{% set docker_image_cu118 = "public.ecr.aws/q9t5s3a7/vllm-ci-postmerge-repo:$BUILDKITE_COMMIT-cu118" %}
{% set docker_image_cpu = "public.ecr.aws/q9t5s3a7/vllm-ci-postmerge-repo:$BUILDKITE_COMMIT-cpu" %}
{% endif %}
{% set skip_image_build = (skip_image_build | default("0")) %}
{% if skip_image_build == "1" and docker_image_override is defined and docker_image_override %}
{% set docker_image = docker_image_override %}
{% set docker_image_torch_nightly = docker_image_override %}
{% set docker_image_cu118 = docker_image_override %}
{% set docker_image_cpu = docker_image_override %}
{% endif %}
{% set docker_image_amd = "rocm/vllm-ci:$BUILDKITE_COMMIT" %}
{% set default_working_dir = "/vllm-workspace/tests" %}
{% set hf_home = "/root/.cache/huggingface" %}
Expand All @@ -30,12 +37,71 @@
{%- set tests_only = (tests_acc.only_tests and tests_acc.any) %}
{%- set changed_tests = tests_acc.changed %}

{% macro add_pytest_coverage(cmd, coverage_file) %}
{% if "pytest " in cmd %}
COVERAGE_FILE={{ coverage_file }} {{ cmd | replace("pytest ", "pytest --cov=vllm --cov-report= --cov-append --durations=0 ") }} || true
{% else %}
{{ cmd }}
{# --- helpers ------------------------------------------------------------ #}

{# collapse backslash-newline-indentation to a single space, trim ends #}
{% macro clean_cmd(cmd) -%}
{{- cmd
| replace(' \\', '') | trim | safe
-}}
{%- endmacro %}

{# emit step.command / step.commands, cleaned and joined safely #}
{% macro emit_step_commands(step) -%}
{%- if step.command -%}
{{ clean_cmd(step.command) }}
{%- elif step.commands -%}
{%- for c in step.commands -%}
{{ clean_cmd(c) }}{{ " && " if not loop.last else "" }}
{%- endfor -%}
{%- else -%}
echo "No command(s) defined for this step." >&2; exit 2
{%- endif -%}
{%- endmacro %}

{# wrap pytest with coverage flags if present #}
{% macro add_pytest_coverage(cmd, coverage_file) -%}
{%- set c = clean_cmd(cmd) -%}
{%- if "pytest " in c -%}
COVERAGE_FILE={{ coverage_file }} {{ c | replace("pytest ", "pytest --cov=vllm --cov-report= --cov-append --durations=0 ") }} || true
{%- else -%}
{{ c }}
{%- endif -%}
{%- endmacro %}

{# --- main macros -------------------------------------------------------- #}

{% macro vllm_checkoutoverlay_script(step, default_working_dir, skip_image_build, fail_fast, cov_enabled) %}
{% if fail_fast == "true" -%}
set -xeuo pipefail
{%- else -%}
set -xuo pipefail
{%- endif %}

echo "SKIP_IMAGE_BUILD={{ skip_image_build }}"

{% if skip_image_build == "1" %}
# Copy in the code from the checkout to the workspace
rm -rf /vllm-workspace/vllm || true
cp -a /workdir/. /vllm-workspace/

# Overlay the pure-Python vllm into the install package dir
export SITEPKG="$(python3 -c 'import sysconfig; print(sysconfig.get_paths()["purelib"])')"
cp -a /vllm-workspace/vllm/* "$$SITEPKG/vllm/"

# Restore src/ layout, as Dockerfile does. Hides code from tests, but allows setup.
rm -rf /vllm-workspace/src || true
mkdir -p /vllm-workspace/src
mv /vllm-workspace/vllm /vllm-workspace/src/vllm
{% endif %}

(command -v nvidia-smi >/dev/null && nvidia-smi || true)
export VLLM_LOGGING_LEVEL=DEBUG
export VLLM_ALLOW_DEPRECATED_BEAM_SEARCH=1
cd {{ (step.working_dir or default_working_dir) | safe }}

# Run tests with intelligent targeting and coverage
{{ add_docker_pytest_coverage(step, cov_enabled) }}
{% endmacro %}

{% macro add_docker_pytest_coverage(step, cov_enabled) %}
Expand All @@ -47,53 +113,55 @@ COVERAGE_FILE={{ coverage_file }} {{ cmd | replace("pytest ", "pytest --cov=vllm
{# Intelligent test targeting: Build matched test targets for this step when only tests changed #}
{%- set match_ns = namespace(targets=[]) %}
{%- if tests_only and step.source_file_dependencies %}
{%- for dep in step.source_file_dependencies %}
{%- if dep[:6] == 'tests/' %}
{%- set dep_rel = dep[6:] %}
{# Handle deps that already end with '/' (e.g., tests/benchmarks/) #}
{%- if dep_rel[-1:] == '/' %}
{%- set dep_dir_prefix = dep_rel %}
{%- set dep_file_name = dep_rel[:-1] ~ '.py' %}
{%- else %}
{%- set dep_dir_prefix = dep_rel ~ '/' %}
{%- set dep_file_name = dep_rel ~ '.py' %}
{%- endif %}
{%- for t in changed_tests %}
{# Check if t starts with dep_dir_prefix (for directories) or equals dep_file_name (for files) #}
{%- set prefix_len = dep_dir_prefix | length %}
{%- set t_prefix = t[:prefix_len] %}
{%- set cond1 = (t | length >= prefix_len and t_prefix == dep_dir_prefix) %}
{%- set cond2 = (t == dep_file_name) %}
{%- if cond1 or cond2 %}
{%- set match_ns.targets = match_ns.targets + [t] %}
{%- endif %}
{%- endfor %}
{%- endif %}
{%- endfor %}
{%- for dep in step.source_file_dependencies %}
{%- if dep[:6] == 'tests/' %}
{%- set dep_rel = dep[6:] %}
{%- if dep_rel[-1:] == '/' %}
{%- set dep_dir_prefix = dep_rel %}
{%- set dep_file_name = dep_rel[:-1] ~ '.py' %}
{%- else %}
{%- set dep_dir_prefix = dep_rel ~ '/' %}
{%- set dep_file_name = dep_rel ~ '.py' %}
{%- endif %}
{%- for t in changed_tests %}
{%- set prefix_len = dep_dir_prefix | length %}
{%- set t_prefix = t[:prefix_len] %}
{%- set cond1 = (t | length >= prefix_len and t_prefix == dep_dir_prefix) %}
{%- set cond2 = (t == dep_file_name) %}
{%- if cond1 or cond2 %}
{%- set match_ns.targets = match_ns.targets + [t] %}
{%- endif %}
{%- endfor %}
{%- endif %}
{%- endfor %}
{%- endif %}
{%- set matched_targets = match_ns.targets %}

{# If we have matched targets, run only those specific tests #}
{% if matched_targets | length > 0 %}
pytest -v -s {{ matched_targets | join(' ') }}
{% else %}
{# Default behavior: preserve original commands with optional coverage injection #}
{% if cov_enabled %}
{% set ns = namespace(has_pytest=false) %}
{% if step.command %}
{% if "pytest " in step.command %}{% set ns.has_pytest = true %}{% endif %}
{{ add_pytest_coverage(step.command, coverage_file) }}
{% else %}
{% for cmd in step.commands %}
{% if "pytest " in cmd %}{% set ns.has_pytest = true %}{% endif %}
{{ add_pytest_coverage(cmd, coverage_file) }}{{ " && " if not loop.last else "" }}{% endfor %}
{% endif %}{% if ns.has_pytest %} && curl -sSL https://raw.githubusercontent.com/vllm-project/ci-infra/{{ vllm_ci_branch | default('main') }}/buildkite/scripts/upload_codecov.sh | bash -s -- \"{{ step.label }}\"{% endif %}
{% else %}
{{ step.command or (step.commands | join(' && ')) | safe }}
{% endif %}
{# Default behavior: preserve original commands with optional coverage injection, all cleaned #}
{% if cov_enabled %}
{% set ns = namespace(has_pytest=false) %}
{% if step.command %}
{% if "pytest " in clean_cmd(step.command) %}{% set ns.has_pytest = true %}{% endif %}
{{ add_pytest_coverage(step.command, coverage_file) }}
{% else %}
{%- for cmd in step.commands -%}
{{ add_pytest_coverage(cmd, coverage_file) }}{{ " && " if not loop.last else "" }}
{%- endfor -%}
{% endif %}
{%- if ns.has_pytest -%}
&& curl -sSL https://raw.githubusercontent.com/vllm-project/ci-infra/{{ vllm_ci_branch | default('main') }}/buildkite/scripts/upload_codecov.sh | bash -s -- "{{ step.label }}"
{%- endif -%}
{% else %}
{{ emit_step_commands(step) }}
{% endif %}
{% endif %}
{% endmacro %}


{% macro render_cuda_config(step, image, default_working_dir, hf_home_fsx, hf_home, branch) %}
agents:
{% if step.label == "Documentation Build" %}
Expand Down Expand Up @@ -143,7 +211,11 @@ plugins:
{% if step.label == "Benchmarks" or step.mount_buildkite_agent or cov_enabled %}
mount-buildkite-agent: true
{% endif %}
command: ["bash", "{% if fail_fast == "true" %}-xce{% else %}-xc{% endif %}", "(command nvidia-smi || true) && export VLLM_ALLOW_DEPRECATED_BEAM_SEARCH=1 && cd {{ (step.working_dir or default_working_dir) | safe }} && {{ add_docker_pytest_coverage(step, cov_enabled) }}"]
command:
- "/bin/bash"
- "-c"
- |
{{ vllm_checkoutoverlay_script(step,default_working_dir,skip_image_build,fail_fast,cov_enabled) | indent(12,true) }}
environment:
- VLLM_USAGE_SOURCE=ci-test
- NCCL_CUMEM_HOST_ENABLE=0
Expand All @@ -168,7 +240,11 @@ plugins:
always-pull: true
propagate-environment: true
gpus: all
command: ["bash", "{% if fail_fast == "true" %}-xce{% else %}-xc{% endif %}", "(command nvidia-smi || true) && export VLLM_ALLOW_DEPRECATED_BEAM_SEARCH=1 && cd {{ (step.working_dir or default_working_dir) | safe }} && {{ add_docker_pytest_coverage(step, cov_enabled) }}"]
command:
- "/bin/bash"
- "-c"
- |
{{ vllm_checkoutoverlay_script(step,default_working_dir,skip_image_build,fail_fast,cov_enabled) | indent(12,true) }}
environment:
- VLLM_USAGE_SOURCE=ci-test
- NCCL_CUMEM_HOST_ENABLE=0
Expand All @@ -192,7 +268,11 @@ plugins:
propagate-environment: true
# gpus will be configured by BUILDKITE_PLUGIN_DOCKER_GPUS in per host environment variable.
# gpus: all
command: ["bash", "{% if fail_fast == "true" %}-xce{% else %}-xc{% endif %}", "(command nvidia-smi || true) && export VLLM_ALLOW_DEPRECATED_BEAM_SEARCH=1 && cd {{ (step.working_dir or default_working_dir) | safe }} && {{ add_docker_pytest_coverage(step, cov_enabled) }}"]
command:
- "/bin/bash"
- "-c"
- |
{{ vllm_checkoutoverlay_script(step,default_working_dir,skip_image_build,fail_fast,cov_enabled) | indent(12,true) }}
environment:
- VLLM_USAGE_SOURCE=ci-test
- NCCL_CUMEM_HOST_ENABLE=0
Expand Down Expand Up @@ -285,6 +365,7 @@ plugins:


steps:
{% if skip_image_build != "1" %}
- label: ":docker: build image"
key: image-build
depends_on: ~
Expand Down Expand Up @@ -410,6 +491,7 @@ steps:
limit: 2
- exit_status: -10 # Agent was lost
limit: 2
{% endif %}

{% for step in steps %}
{% if step.fast_check_only != true %}
Expand Down Expand Up @@ -438,15 +520,23 @@ steps:

{% if ns.blocked == 1 or (step.optional and nightly != "1") %}
- block: "Run {{ step.label }}"
{% if skip_image_build != "1" %}
depends_on: image-build
{% else %}
depends_on: ~
{% endif %}
key: block-{{ step.label | replace(" ", "-") | lower | replace("(", "") | replace(")", "") | replace("%", "") | replace(",", "-") | replace("+", "-") }}
{% endif %}

- label: "{{ step.label }}"
{% if ns.blocked == 1 or (step.optional and nightly != "1") %}
depends_on: block-{{ step.label | replace(" ", "-") | lower | replace("(", "") | replace(")", "") | replace("%", "") | replace(",", "-") | replace("+", "-") }}
{% else %}
{% if skip_image_build != "1" %}
depends_on: {{ "image-build-cpu" if step.no_gpu else "image-build" }}
{% else %}
depends_on: ~
{% endif %}
{% endif %}
soft_fail: {{ step.soft_fail or false }}
{{ render_cuda_config(step, docker_image_cpu if step.no_gpu else docker_image, default_working_dir, hf_home_fsx, hf_home, branch) | indent(4, true) }}
Expand Down
Loading