diff --git a/scripts/ci_aws_bootstrap.sh b/scripts/ci_aws_bootstrap.sh index f3b40936..74a1298c 100644 --- a/scripts/ci_aws_bootstrap.sh +++ b/scripts/ci_aws_bootstrap.sh @@ -2,9 +2,24 @@ set -euo pipefail -if [[ -z "${RUN_ALL:-}" ]]; then - RUN_ALL=0 -fi +RUN_ALL=${RUN_ALL:-0} +VLLM_CI_BRANCH=${VLLM_CI_BRANCH:-main} + +generate_pipeline() { + python -m pip install "click==8.1.7" "pydantic==2.9.2" + + # Download necessary files + mkdir -p .buildkite/pipeline_generator + for FILE in pipeline_generator.py pipeline_generator_helper.py plugin.py step.py utils.py __init__.py; do + curl -o ".buildkite/pipeline_generator/$FILE" "https://raw.githubusercontent.com/vllm-project/buildkite-ci/$VLLM_CI_BRANCH/scripts/pipeline_generator/$FILE" + done + + # Generate and upload pipeline + cd .buildkite + python -m pipeline_generator.pipeline_generator --run_all="$RUN_ALL" --list_file_diff="$LIST_FILE_DIFF" + cat pipeline.yaml + buildkite-agent pipeline upload pipeline.yaml +} upload_pipeline() { echo "Uploading pipeline..." @@ -74,4 +89,9 @@ LIST_FILE_DIFF=$(get_diff | tr ' ' '|') if [[ $BUILDKITE_BRANCH == "main" ]]; then LIST_FILE_DIFF=$(get_diff_main | tr ' ' '|') fi -upload_pipeline + +if [[ $BUILDKITE_PIPELINE_SLUG == "fastcheck" ]]; then + upload_pipeline +else + generate_pipeline +fi diff --git a/scripts/pipeline_generator/pipeline_generator.py b/scripts/pipeline_generator/pipeline_generator.py index b1053b8a..06676950 100644 --- a/scripts/pipeline_generator/pipeline_generator.py +++ b/scripts/pipeline_generator/pipeline_generator.py @@ -1,9 +1,39 @@ +import yaml +import click +from typing import List, Dict, Union import os -import re -from typing import List, Optional - -from pydantic import BaseModel, field_validator +from pydantic import BaseModel +from .plugin import ( + get_kubernetes_plugin_config, + get_docker_plugin_config, +) +from .utils import ( + AgentQueue, + AMD_REPO, + TEST_PATH, + EXTERNAL_HARDWARE_TEST_PATH, + PIPELINE_FILE_PATH, + STEPS_TO_BLOCK, + VLLM_ECR_URL, + VLLM_ECR_REPO, + get_agent_queue, + get_full_test_command, + get_multi_node_test_command, +) +from .step import ( + TestStep, + BuildkiteStep, + BuildkiteBlockStep, + get_block_step, + get_step_key +) +from .pipeline_generator_helper import ( + step_should_run, + get_plugin_config, + create_buildkite_step, + get_build_commands, +) class PipelineGeneratorConfig: def __init__( @@ -45,11 +75,147 @@ def validate(self): if not os.path.isfile(self.external_hardware_test_path): raise FileNotFoundError(f"External hardware test file {self.external_hardware_test_path} not found") +def read_test_steps(self, file_path: str) -> List[TestStep]: + """Read test steps from test pipeline yaml and parse them into TestStep objects.""" + with open(file_path, "r") as f: + content = yaml.safe_load(f) + return [TestStep(**step) for step in content["steps"]] + +def write_buildkite_steps( + self, + buildkite_steps: List[Union[BuildkiteStep, BuildkiteBlockStep]], + output_file_path: str + ) -> None: + """Output the buildkite steps to the Buildkite pipeline yaml file.""" + buildkite_steps_dict = {"steps": [step.dict(exclude_none=True) for step in buildkite_steps]} + with open(output_file_path, "w") as f: + yaml.dump(buildkite_steps_dict, f, sort_keys=False) class PipelineGenerator: def __init__( self, config: PipelineGeneratorConfig ): - config.validate() self.config = config + + def generate_build_step(self) -> BuildkiteStep: + """Build the Docker image and push it to container registry.""" + build_commands = get_build_commands(self.config.container_registry, self.config.commit, self.config.container_image) + + return BuildkiteStep( + label=":docker: build image", + key="build", + agents={"queue": AgentQueue.AWS_CPU.value}, + env={"DOCKER_BUILDKIT": "1"}, + retry={ + "automatic": [ + {"exit_status": -1, "limit": 2}, + {"exit_status": -10, "limit": 2} + ] + }, + commands=build_commands, + depends_on=None, + ) + + def convert_test_step_to_buildkite_steps(self, step: TestStep) -> List[Union[BuildkiteStep, BuildkiteBlockStep]]: + """Process test step and return corresponding BuildkiteStep.""" + steps = [] + current_step = create_buildkite_step(step, self.config.container_image) + + if not step_should_run(step, self.config.run_all, self.config.list_file_diff): + block_step = get_block_step(step.label) + steps.append(block_step) + current_step.depends_on = block_step.key + + steps.append(current_step) + return steps + + def get_external_hardware_tests(self, test_steps: List[TestStep]) -> List[Union[BuildkiteStep, BuildkiteBlockStep]]: + """Process the external hardware tests from the yaml file and convert to Buildkite steps.""" + buildkite_steps = self._process_external_hardware_steps() + buildkite_steps.extend(self._mirror_amd_test_steps(test_steps)) + return buildkite_steps + + + def _process_external_hardware_steps(self) -> List[Union[BuildkiteStep, BuildkiteBlockStep]]: + with open(EXTERNAL_HARDWARE_TEST_PATH, "r") as f: + content = yaml.safe_load(f) + buildkite_steps = [] + amd_docker_image = f"{AMD_REPO}:{self.config.commit}" + for step in content["steps"]: + step["commands"] = [cmd.replace("DOCKER_IMAGE_AMD", amd_docker_image) for cmd in step["commands"]] + buildkite_step = BuildkiteStep(**step) + buildkite_step.depends_on = "bootstrap" + + # Add block step if step is in blocklist + if buildkite_step.key in STEPS_TO_BLOCK: + block_step = get_block_step(buildkite_step.label) + buildkite_steps.append(block_step) + buildkite_step.depends_on = block_step.key + buildkite_steps.append(buildkite_step) + return buildkite_steps + + def _mirror_amd_test_steps(self, test_steps: List[TestStep]) -> List[BuildkiteStep]: + mirrored_buildkite_steps = [] + for test_step in test_steps: + if test_step.mirror_hardwares and "amd" in test_step.mirror_hardwares: + test_commands = [test_step.command] if test_step.command else test_step.commands + amd_test_command = [ + "bash", + ".buildkite/run-amd-test.sh", + f"'{get_full_test_command(test_commands, test_step.working_dir)}'", + ] + mirrored_buildkite_step = BuildkiteStep( + label=f"AMD: {test_step.label}", + key=f"amd_{get_step_key(test_step.label)}", + depends_on="amd-build", + agents={"queue": AgentQueue.AMD_GPU.value}, + soft_fail=test_step.soft_fail, + env={"DOCKER_BUILDKIT": "1"}, + commands=[" ".join(amd_test_command)], + ) + mirrored_buildkite_steps.append(mirrored_buildkite_step) + return mirrored_buildkite_steps + + + def generate(self): + test_steps = self.read_test_steps(self.config.test_path) + buildkite_steps = [self.generate_build_step()] + + for test_step in test_steps: + test_buildkite_steps = self.convert_test_step_to_buildkite_steps(test_step) + buildkite_steps.extend(test_buildkite_steps) + buildkite_steps.extend(self.get_external_hardware_tests(test_steps)) + + self.write_buildkite_steps(buildkite_steps, self.config.pipeline_file_path) + + +@click.command() +@click.option("--run_all", type=str) +@click.option("--list_file_diff", type=str) +def main(run_all: str = "-1", list_file_diff: str = None): + list_file_diff = list_file_diff.split("|") if list_file_diff else [] + pipeline_generator_config = PipelineGeneratorConfig( + run_all=run_all == "1", + list_file_diff=list_file_diff, + container_registry=VLLM_ECR_URL, + container_registry_repo=VLLM_ECR_REPO, + commit=os.getenv("BUILDKITE_COMMIT"), + test_path=TEST_PATH, + external_hardware_test_path=EXTERNAL_HARDWARE_TEST_PATH, + pipeline_file_path=PIPELINE_FILE_PATH + ) + pipeline_generator = PipelineGenerator(pipeline_generator_config) + pipeline_generator.generate() + + +if __name__ == "__main__": + main() +import os +import re +from typing import List, Optional + +from pydantic import BaseModel, field_validator + + + diff --git a/scripts/pipeline_generator/pipeline_generator_helper.py b/scripts/pipeline_generator/pipeline_generator_helper.py new file mode 100644 index 00000000..035559e2 --- /dev/null +++ b/scripts/pipeline_generator/pipeline_generator_helper.py @@ -0,0 +1,84 @@ +from typing import List, Dict +from .plugin import get_kubernetes_plugin_config, get_docker_plugin_config +from .utils import get_agent_queue, get_full_test_command, get_multi_node_test_command, GPUType +from .step import BuildkiteStep, TestStep, get_step_key + +def step_should_run(step: TestStep, run_all: bool, list_file_diff: List[str]) -> bool: + """Determine whether the step should automatically run or not.""" + if step.optional: + return False + if not step.source_file_dependencies or run_all: + return True + return any(source_file in diff_file + for source_file in step.source_file_dependencies + for diff_file in list_file_diff) + +def get_plugin_config(step: TestStep, container_image: str) -> Dict: + """Returns the plugin configuration for the step.""" + test_step_commands = [step.command] if step.command else step.commands + test_bash_command = [ + "bash", + "-c", + get_full_test_command(test_step_commands, step.working_dir) + ] + if step.gpu == GPUType.A100: + return get_kubernetes_plugin_config( + container_image, + test_bash_command, + step.num_gpus + ) + return get_docker_plugin_config( + container_image, + test_bash_command, + step.no_gpu + ) + + +def create_buildkite_step(step: TestStep, container_image: str) -> BuildkiteStep: + """Convert TestStep into BuildkiteStep.""" + buildkite_step = BuildkiteStep( + label=step.label, + key=get_step_key(step.label), + parallelism=step.parallelism, + soft_fail=step.soft_fail, + plugins=[get_plugin_config(step, container_image)], + agents={"queue": get_agent_queue(step.no_gpu, step.gpu, step.num_gpus).value} + ) + # If test is multi-node, configure step to run with custom script + if step.num_nodes and step.num_nodes > 1: + buildkite_step.commands = [get_multi_node_test_command( + step.commands, + step.working_dir, + step.num_nodes, + step.num_gpus, + container_image + ) + ] + buildkite_step.plugins = None + return buildkite_step + + +def get_build_commands(container_registry: str, buildkite_commit: str, container_image: str) -> List[str]: + ecr_login_command = ( + "aws ecr-public get-login-password --region us-east-1 | " + f"docker login --username AWS --password-stdin {container_registry}" + ) + image_check_command = f"""#!/bin/bash +if [[ -z $(docker manifest inspect {container_image}) ]]; then +echo "Image not found, proceeding with build..." +else +echo "Image found" +exit 0 +fi +""" + docker_build_command = ( + f"docker build " + f"--build-arg max_jobs=64 " + f"--build-arg buildkite_commit={buildkite_commit} " + f"--build-arg USE_SCCACHE=1 " + f"--tag {container_image} " + f"--target test " + f"--progress plain ." + ) + docker_push_command = f"docker push {container_image}" + return [ecr_login_command, image_check_command, docker_build_command, docker_push_command] \ No newline at end of file diff --git a/scripts/pipeline_generator/plugin.py b/scripts/pipeline_generator/plugin.py index 5219a498..3a79e0e7 100644 --- a/scripts/pipeline_generator/plugin.py +++ b/scripts/pipeline_generator/plugin.py @@ -95,6 +95,7 @@ class KubernetesPluginConfig(BaseModel): def get_kubernetes_plugin_config(container_image: str, test_bash_command: List[str], num_gpus: int) -> Dict: + test_bash_command[-1] = f'"{test_bash_command[-1]}"' pod_spec = KubernetesPodSpec( containers=[ KubernetesPodContainerConfig( diff --git a/scripts/pipeline_generator/step.py b/scripts/pipeline_generator/step.py index 8e73c45d..3f3cf48c 100644 --- a/scripts/pipeline_generator/step.py +++ b/scripts/pipeline_generator/step.py @@ -28,11 +28,11 @@ class TestStep(BaseModel): @classmethod def validate_and_convert_command(cls, values) -> Any: """ - Validate that either 'command' or 'commands' is defined. + Validate that either 'command' or 'commands' or `plugins` is defined. If 'command' is defined, convert it to 'commands'. """ - if not values.get("command") and not values.get("commands"): - raise ValueError("Either 'command' or 'commands' must be defined.") + if not values.get("command") and not values.get("commands") and not values.get("plugins"): + raise ValueError("Either 'command' or 'commands' or `plugins` must be defined.") if values.get("command") and values.get("commands"): raise ValueError("Only one of 'command' or 'commands' can be defined.") if values.get("command"): @@ -59,7 +59,7 @@ class BuildkiteStep(BaseModel): """This class represents a step in Buildkite format.""" label: str agents: Dict[str, AgentQueue] = {"queue": AgentQueue.AWS_CPU} - commands: List[str] + commands: Optional[List[str]] = None key: Optional[str] = None plugins: Optional[List[Dict]] = None parallelism: Optional[int] = None diff --git a/scripts/pipeline_generator/utils.py b/scripts/pipeline_generator/utils.py index dc1615ae..9c318956 100644 --- a/scripts/pipeline_generator/utils.py +++ b/scripts/pipeline_generator/utils.py @@ -5,13 +5,13 @@ HF_HOME = "/root/.cache/huggingface" DEFAULT_WORKING_DIR = "/vllm-workspace/tests" VLLM_ECR_URL = "public.ecr.aws/q9t5s3a7" -VLLM_ECR_REPO = f"{VLLM_ECR_URL}/vllm-ci-test-repo" +VLLM_ECR_REPO = f"vllm-ci-test-repo" AMD_REPO = "rocm/vllm-ci" # File paths -TEST_PATH = ".buildkite/test-pipeline.yaml" -EXTERNAL_HARDWARE_TEST_PATH = ".buildkite/external-tests.yaml" -PIPELINE_FILE_PATH = ".buildkite/pipeline.yaml" +TEST_PATH = "./test-pipeline.yaml" +EXTERNAL_HARDWARE_TEST_PATH = "./external-tests.yaml" +PIPELINE_FILE_PATH = "./pipeline.yaml" MULTI_NODE_TEST_SCRIPT = ".buildkite/run-multi-node-test.sh" TEST_DEFAULT_COMMANDS = [ diff --git a/scripts/tests/pipeline_generator/test_pipeline_generator.py b/scripts/tests/pipeline_generator/test_pipeline_generator.py index 145d11ec..cdd92784 100644 --- a/scripts/tests/pipeline_generator/test_pipeline_generator.py +++ b/scripts/tests/pipeline_generator/test_pipeline_generator.py @@ -1,5 +1,368 @@ import pytest import sys +from unittest import mock + + +from scripts.pipeline_generator.pipeline_generator import PipelineGenerator +from scripts.pipeline_generator.step import TestStep, BuildkiteStep, BuildkiteBlockStep +from scripts.pipeline_generator.utils import ( + AgentQueue, + VLLM_ECR_REPO, + MULTI_NODE_TEST_SCRIPT, +) +from scripts.pipeline_generator.plugin import ( + DEFAULT_DOCKER_ENVIRONMENT_VARIBLES, + DEFAULT_DOCKER_VOLUMES, + DEFAULT_KUBERNETES_CONTAINER_VOLUME_MOUNTS, + DEFAULT_KUBERNETES_CONTAINER_ENVIRONMENT_VARIABLES, + DEFAULT_KUBERNETES_NODE_SELECTOR, + DEFAULT_KUBERNETES_POD_VOLUMES, +) + +TEST_COMMIT = "123456789abcdef123456789abcdef123456789a" +TEST_FILE_PATH = "scripts/tests/pipeline_generator/tests.yaml" + + +def get_test_pipeline_generator(): + pipeline_generator = PipelineGenerator(run_all=False, list_file_diff=[]) + pipeline_generator.commit = TEST_COMMIT + return pipeline_generator + + +def test_read_test_steps(): + pipeline_generator = get_test_pipeline_generator() + steps = pipeline_generator.read_test_steps(TEST_FILE_PATH) + assert len(steps) == 4 + for i in range(4): + assert steps[i].label == f"Test {i}" + assert steps[0].source_file_dependencies == ["dir1/", "dir2/file1"] + assert steps[0].commands == ["pytest -v -s a", "pytest -v -s b.py"] + assert steps[1].working_dir == "/tests" + assert steps[2].num_gpus == 2 + assert steps[2].num_nodes == 2 + assert steps[3].gpu == "a100" + assert steps[3].optional is True + + +@pytest.mark.parametrize( + ("test_step", "expected_plugin_config"), + [ + ( + TestStep( + label="Test 0", + source_file_dependencies=["dir1/", "dir2/file1"], + commands=["test command 1", "test command 2"], + ), + { + "docker#v5.2.0": { + "image": f"{VLLM_ECR_REPO}:{TEST_COMMIT}", + "always-pull": True, + "propagate-environment": True, + "gpus": "all", + "mount-buildkite-agent": False, + "command": [ + "bash", + "-c", + "(command nvidia-smi || true);\nexport VLLM_LOGGING_LEVEL=DEBUG;\nexport VLLM_ALLOW_DEPRECATED_BEAM_SEARCH=1;\ncd /vllm-workspace/tests;\ntest command 1;\ntest command 2", + ], + "environment": DEFAULT_DOCKER_ENVIRONMENT_VARIBLES, + "volumes": DEFAULT_DOCKER_VOLUMES, + } + }, + ), + ( + TestStep( + label="Test 1", + commands=["test command 1", "test command 2"], + gpu="a100", + num_gpus=4, + ), + { + "kubernetes": { + "podSpec": { + "containers": [ + { + "image": f"{VLLM_ECR_REPO}:{TEST_COMMIT}", + "command": [ + 'bash -c "(command nvidia-smi || true);\nexport VLLM_LOGGING_LEVEL=DEBUG;\nexport VLLM_ALLOW_DEPRECATED_BEAM_SEARCH=1;\ncd /vllm-workspace/tests;\ntest command 1;\ntest command 2"' + ], + "resources": {"limits": {"nvidia.com/gpu": 4}}, + "volumeMounts": DEFAULT_KUBERNETES_CONTAINER_VOLUME_MOUNTS, + "env": DEFAULT_KUBERNETES_CONTAINER_ENVIRONMENT_VARIABLES, + } + ], + "priorityClassName": "ci", + "nodeSelector": DEFAULT_KUBERNETES_NODE_SELECTOR, + "volumes": DEFAULT_KUBERNETES_POD_VOLUMES, + } + } + }, + ), + ], +) +def test_get_plugin_config(test_step, expected_plugin_config): + pipeline_generator = get_test_pipeline_generator() + plugin_config = pipeline_generator.get_plugin_config(test_step) + assert plugin_config == expected_plugin_config + + +@pytest.mark.parametrize( + ("test_step", "expected_buildkite_step"), + [ + ( + TestStep( + label="Test 0", + source_file_dependencies=["dir1/", "dir2/file1"], + commands=["test command 1", "test command 2"], + ), + BuildkiteStep( + label="Test 0", + key="test-0", + agents={"queue": AgentQueue.AWS_1xL4.value}, + plugins=[ + { + "docker#v5.2.0": { + "image": "public.ecr.aws/q9t5s3a7/vllm-ci-test-repo:123456789abcdef123456789abcdef123456789a", + "always-pull": True, + "propagate-environment": True, + "gpus": "all", + "mount-buildkite-agent": False, + "command": [ + "bash", + "-c", + "(command nvidia-smi || true);\nexport VLLM_LOGGING_LEVEL=DEBUG;\nexport VLLM_ALLOW_DEPRECATED_BEAM_SEARCH=1;\ncd /vllm-workspace/tests;\ntest command 1;\ntest command 2", + ], + "environment": DEFAULT_DOCKER_ENVIRONMENT_VARIBLES, + "volumes": DEFAULT_DOCKER_VOLUMES, + } + } + ], + ), + ), + # A100 test + ( + TestStep( + label="Test 1", + commands=["test command 1", "test command 2"], + gpu="a100", + num_gpus=4, + ), + BuildkiteStep( + label="Test 1", + key="test-1", + agents={"queue": AgentQueue.A100.value}, + plugins=[ + { + "kubernetes": { + "podSpec": { + "containers": [ + { + "image": f"{VLLM_ECR_REPO}:{TEST_COMMIT}", + "command": [ + 'bash -c "(command nvidia-smi || true);\nexport VLLM_LOGGING_LEVEL=DEBUG;\nexport VLLM_ALLOW_DEPRECATED_BEAM_SEARCH=1;\ncd /vllm-workspace/tests;\ntest command 1;\ntest command 2"' + ], + "resources": {"limits": {"nvidia.com/gpu": 4}}, + "volumeMounts": DEFAULT_KUBERNETES_CONTAINER_VOLUME_MOUNTS, + "env": DEFAULT_KUBERNETES_CONTAINER_ENVIRONMENT_VARIABLES, + } + ], + "priorityClassName": "ci", + "nodeSelector": DEFAULT_KUBERNETES_NODE_SELECTOR, + "volumes": DEFAULT_KUBERNETES_POD_VOLUMES, + } + } + }, + ], + ), + ), + # Multi node test + ( + TestStep( + label="Test 2", + num_gpus=2, + num_nodes=2, + commands=["test command 1", "test command 2"], + working_dir="/tests", + ), + BuildkiteStep( + label="Test 2", + key="test-2", + agents={"queue": AgentQueue.AWS_4xL4.value}, + commands=[ + f"{MULTI_NODE_TEST_SCRIPT} /tests 2 2 {VLLM_ECR_REPO}:{TEST_COMMIT} 'test command 1' 'test command 2'" + ], + ), + ), + ], +) +def test_create_buildkite_step(test_step, expected_buildkite_step): + pipeline_generator = get_test_pipeline_generator() + + buildkite_step = pipeline_generator.create_buildkite_step(test_step) + assert buildkite_step == expected_buildkite_step + + +@pytest.mark.parametrize( + ("test_step", "expected_value_without_runall", "expected_value_with_runall"), + [ + ( + TestStep( + label="Test 0", + source_file_dependencies=["dir1/", "dir2/file1"], + commands=["test command 1", "test command 2"], + ), + True, + True, + ), + ( + TestStep( + label="Test 0", + commands=["test command 1", "test command 2"], + ), + True, + True, + ), + ( + TestStep( + label="Test 0", + source_file_dependencies=["dir2/", "dir3/file1"], + commands=["test command 1", "test command 2"], + ), + False, + True, + ), + ( + TestStep( + label="Test 1", + commands=["test command 1", "test command 2"], + gpu="a100", + optional=True, + num_gpus=4, + ), + False, + False, + ), + ], +) +def test_step_should_run( + test_step, expected_value_without_runall, expected_value_with_runall +): + pipeline_generator = get_test_pipeline_generator() + pipeline_generator.list_file_diff = ["dir1/a.py", "dir3/file2.py"] + assert ( + pipeline_generator.step_should_run(test_step) == expected_value_without_runall + ) + + # With run_all + pipeline_generator.run_all = True + assert pipeline_generator.step_should_run(test_step) == expected_value_with_runall + + +@pytest.mark.parametrize( + ("test_step", "expected_buildkite_steps"), + [ + # Test always run so no block step + ( + TestStep( + label="Test 0", + commands=["test command 1", "test command 2"], + ), + [ + BuildkiteStep( + label="Test 0", + key="test-0", + agents={"queue": AgentQueue.AWS_1xL4.value}, + plugins=[ + { + "docker#v5.2.0": { + "image": "public.ecr.aws/q9t5s3a7/vllm-ci-test-repo:123456789abcdef123456789abcdef123456789a", + "always-pull": True, + "propagate-environment": True, + "gpus": "all", + "mount-buildkite-agent": False, + "command": [ + "bash", + "-c", + "(command nvidia-smi || true);\nexport VLLM_LOGGING_LEVEL=DEBUG;\nexport VLLM_ALLOW_DEPRECATED_BEAM_SEARCH=1;\ncd /vllm-workspace/tests;\ntest command 1;\ntest command 2", + ], + "environment": DEFAULT_DOCKER_ENVIRONMENT_VARIBLES, + "volumes": DEFAULT_DOCKER_VOLUMES, + } + } + ], + ), + ], + ), + # Test doesn't automatically run because dependencies are not matched -> with block step + ( + TestStep( + label="Test 0", + source_file_dependencies=["dir1/", "dir2/file1"], + commands=["test command 1", "test command 2"], + ), + [ + BuildkiteBlockStep(block="Run Test 0", key="block-test-0"), + BuildkiteStep( + label="Test 0", + key="test-0", + agents={"queue": AgentQueue.AWS_1xL4.value}, + depends_on="block-test-0", + plugins=[ + { + "docker#v5.2.0": { + "image": "public.ecr.aws/q9t5s3a7/vllm-ci-test-repo:123456789abcdef123456789abcdef123456789a", + "always-pull": True, + "propagate-environment": True, + "gpus": "all", + "mount-buildkite-agent": False, + "command": [ + "bash", + "-c", + "(command nvidia-smi || true);\nexport VLLM_LOGGING_LEVEL=DEBUG;\nexport VLLM_ALLOW_DEPRECATED_BEAM_SEARCH=1;\ncd /vllm-workspace/tests;\ntest command 1;\ntest command 2", + ], + "environment": DEFAULT_DOCKER_ENVIRONMENT_VARIBLES, + "volumes": DEFAULT_DOCKER_VOLUMES, + } + } + ], + ), + ], + ), + ], +) +def test_process_step(test_step, expected_buildkite_steps): + pipeline_generator = get_test_pipeline_generator() + buildkite_steps = pipeline_generator.process_step(test_step) + assert buildkite_steps == expected_buildkite_steps + + +def test_generate_build_step(): + pipeline_generator = get_test_pipeline_generator() + pipeline_generator.get_build_commands = mock.MagicMock( + return_value=["build command 1", "build command 2"] + ) + build_step = pipeline_generator.generate_build_step() + expected_build_step = BuildkiteStep( + label=":docker: build image", + key="build", + agents={"queue": AgentQueue.AWS_CPU.value}, + env={"DOCKER_BUILDKIT": "1"}, + retry={ + "automatic": [ + {"exit_status": -1, "limit": 2}, + {"exit_status": -10, "limit": 2}, + ] + }, + commands=["build command 1", "build command 2"], + depends_on=None, + ) + assert build_step == expected_build_step + + +if __name__ == "__main__": + sys.exit(pytest.main(["-v", __file__])) +import pytest +import sys import os import tempfile diff --git a/scripts/tests/pipeline_generator/test_plugin.py b/scripts/tests/pipeline_generator/test_plugin.py index 7e85ac00..4ced8ab6 100644 --- a/scripts/tests/pipeline_generator/test_plugin.py +++ b/scripts/tests/pipeline_generator/test_plugin.py @@ -11,7 +11,7 @@ def test_get_kubernetes_plugin_config(): docker_image_path = "test_image:latest" - test_bash_command = ["echo", "Hello, Kubernetes!"] + test_bash_command = ["bash", "-c", "echo A"] num_gpus = 1 expected_config = { @@ -20,7 +20,7 @@ def test_get_kubernetes_plugin_config(): "containers": [ { "image": docker_image_path, - "command": [" ".join(test_bash_command)], + "command": ['bash -c "echo A"'], "resources": {"limits": {"nvidia.com/gpu": num_gpus}}, "volumeMounts": [ {"name": "devshm", "mountPath": "/dev/shm"}, diff --git a/scripts/tests/pipeline_generator/tests.yaml b/scripts/tests/pipeline_generator/tests.yaml new file mode 100644 index 00000000..a98b1f6d --- /dev/null +++ b/scripts/tests/pipeline_generator/tests.yaml @@ -0,0 +1,25 @@ +steps: +- label: Test 0 + source_file_dependencies: + - dir1/ + - dir2/file1 + commands: + - pytest -v -s a + - pytest -v -s b.py +- label: Test 1 + working_dir: "/tests" + commands: + - pytest -v -s d +- label: Test 2 + num_gpus: 2 + num_nodes: 2 + commands: + - pytest -v -s e && pytest -v -s f + - pytest -v -s g +- label: Test 3 + working_dir: "/tests" + gpu: a100 + num_gpus: 4 + optional: true + commands: + - pytest -v -s d