Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 8 additions & 4 deletions buildkite/test-template-ci.j2
Original file line number Diff line number Diff line change
Expand Up @@ -495,14 +495,14 @@ steps:

{% for step in steps %}
{% if step.mirror_hardwares and mirror_hw in step.mirror_hardwares %}
- label: "AMD MI300: {{ step.label }}"
- label: "AMD MI325: {{ step.label }}"
depends_on: amd-build
agents:
{% if step.label and step.label=="Benchmarks" or step.label=="Kernels Attention Test %N" or step.label=="LoRA Test %N" or step.label=="Kernels Quantization Test %N" %}
{% if step.label=="LoRA Test %N" or step.label=="Kernels Attention Test %N" or step.label=="Kernels Quantization Test %N" or step.label=="Kernels MoE Test %N" or step.label and step.label=="Benchmarks" or step.label=="Benchmarks CLI Test" or step.label=="Basic Models Tests (Extra Initialization) %N" or step.label=="Language Models Tests (Hybrid) %N" %}
queue: amd_mi325_8
{% elif step.label=="Distributed Tests (4 GPUs)" or step.label=="2 Node Tests (4 GPUs in total)" or step.label=="Multi-step Tests (4 GPUs)" or step.label=="Pipeline Parallelism Test" or step.label=="LoRA TP Test (Distributed)" %}
{% elif step.label=="Distributed Tests (4 GPUs)" or step.label=="EPLB Execution Test" or step.label=="2 Node Tests (4 GPUs in total)" or step.label=="Multi-step Tests (4 GPUs)" or step.label=="Pipeline Parallelism Test" or step.label=="LoRA TP Test (Distributed)" or step.label=="Pipeline + Context Parallelism Test" or step.label=="LoRA TP Test (Distributed)" %}
queue: amd_mi325_4
{% elif step.label=="Distributed Comm Ops Test" or step.label=="Distributed Tests (2 GPUs)" or step.label=="Plugin Tests (2 GPUs)" or step.label=="Weight Loading Multiple GPU Test" or step.label=="Weight Loading Multiple GPU Test - Large Models" %}
{% elif step.label=="Metrics, Tracing Test" or step.label=="Distributed Tests (2 GPUs)" or step.label=="Plugin Tests (2 GPUs)" or step.label=="Weight Loading Multiple GPU Test" or step.label=="Weight Loading Multiple GPU Test - Large Models" %}
queue: amd_mi325_2
{% else %}
queue: amd_mi325_1
Expand All @@ -511,7 +511,11 @@ steps:
env:
DOCKER_BUILDKIT: "1"
priority: 100
{% if step.label and step.label=="Regresson Test" %}
soft_fail: false
{% else %}
soft_fail: true
{% endif %}
{% endif %}
{% endfor %}
{% for step in steps %}
Expand Down
4 changes: 2 additions & 2 deletions buildkite/test-template-fastcheck.j2
Original file line number Diff line number Diff line change
Expand Up @@ -346,14 +346,14 @@ steps:
{% for step in steps %}
{% if step.mirror_hardwares and mirror_hw in step.mirror_hardwares %}
{% if step.label and step.label=="Basic Correctness Test" %}
- block: "Run AMD MI300: {{ step.label }} with {{mirror_hw}}"
- block: "Run AMD MI325: {{ step.label }} with {{mirror_hw}}"
key: block-amd-{{ step.label | replace(" ", "-") | lower | replace("(", "") | replace(")", "") | replace("%", "") | replace(",", "-") | replace("+", "-") }}
depends_on: amd-build

- label: "AMD MI300: {{ step.label }} with {{mirror_hw}}"
depends_on: block-amd-{{ step.label | replace(" ", "-") | lower | replace("(", "") | replace(")", "") | replace("%", "") | replace(",", "-") | replace("+", "-") }}
agents:
queue: amd_mi300_1
queue: amd_mi325_1
command: bash .buildkite/scripts/hardware_ci/run-amd-test.sh "(command rocm-smi || true) && export VLLM_ALLOW_DEPRECATED_BEAM_SEARCH=1 && cd {{ (step.working_dir or default_working_dir) | safe }} ; {{ step.command or (step.commands | join(" && ")) | safe }}"
env:
DOCKER_BUILDKIT: "1"
Expand Down