Skip to content

Commit 743adaa

Browse files
mgoinepwalsh
authored andcommitted
[CI Fix] Pin deepep and pplx tags in tools/ep_kernels/, gate multigpu tests (vllm-project#23568)
Signed-off-by: mgoin <[email protected]>
1 parent e470152 commit 743adaa

File tree

9 files changed

+40
-12
lines changed

9 files changed

+40
-12
lines changed

.buildkite/test-pipeline.yaml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -390,6 +390,7 @@ steps:
390390
- csrc/moe/
391391
- tests/kernels/moe
392392
- vllm/model_executor/layers/fused_moe/
393+
- vllm/distributed/device_communicators/
393394
commands:
394395
- pytest -v -s kernels/moe --shard-id=$$BUILDKITE_PARALLEL_JOB --num-shards=$$BUILDKITE_PARALLEL_JOB_COUNT
395396
parallelism: 2

tests/distributed/test_comm_ops.py

Lines changed: 5 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,8 @@
1818
tensor_model_parallel_all_reduce,
1919
tensor_model_parallel_reduce_scatter)
2020

21-
from ..utils import init_test_distributed_environment, multi_process_parallel
21+
from ..utils import (init_test_distributed_environment, multi_gpu_test,
22+
multi_process_parallel)
2223

2324

2425
@ray.remote(num_gpus=1, max_calls=1)
@@ -226,8 +227,7 @@ def send_recv_test_worker(
226227
torch.testing.assert_close(test_tensor, recv_tensor)
227228

228229

229-
@pytest.mark.skipif(torch.cuda.device_count() < 2,
230-
reason="Need at least 2 GPUs to run the test.")
230+
@multi_gpu_test(num_gpus=2)
231231
@pytest.mark.parametrize("tp_size", [2])
232232
@pytest.mark.parametrize("test_target", [
233233
all_reduce_test_worker, all_gather_test_worker,
@@ -241,8 +241,7 @@ def test_multi_process_tensor_parallel(
241241
multi_process_parallel(monkeypatch, tp_size, 1, test_target)
242242

243243

244-
@pytest.mark.skipif(torch.cuda.device_count() < 2,
245-
reason="Need at least 2 GPUs to run the test.")
244+
@multi_gpu_test(num_gpus=2)
246245
@pytest.mark.parametrize("pp_size", [2])
247246
@pytest.mark.parametrize(
248247
"test_target", [send_recv_test_worker, send_recv_tensor_dict_test_worker])
@@ -254,8 +253,7 @@ def test_multi_process_pipeline_parallel(
254253
multi_process_parallel(monkeypatch, 1, pp_size, test_target)
255254

256255

257-
@pytest.mark.skipif(torch.cuda.device_count() < 4,
258-
reason="Need at least 4 GPUs to run the test.")
256+
@multi_gpu_test(num_gpus=4)
259257
@pytest.mark.parametrize("tp_size", [2])
260258
@pytest.mark.parametrize("pp_size", [2])
261259
@pytest.mark.parametrize("test_target", [

tests/kernels/moe/test_deepep_deepgemm_moe.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@
2323
from vllm.utils.deep_gemm import (is_blackwell_deep_gemm_e8m0_used,
2424
is_deep_gemm_supported)
2525

26+
from ...utils import multi_gpu_test
2627
from .parallel_utils import ProcessGroupInfo, parallel_launch
2728
from .utils import make_test_weights
2829

@@ -370,6 +371,7 @@ def _test_deepep_deepgemm_moe(
370371
@pytest.mark.parametrize("num_experts", NUM_EXPERTS)
371372
@pytest.mark.parametrize("topk", TOPKS)
372373
@pytest.mark.parametrize("world_dp_size", [(2, 1)])
374+
@multi_gpu_test(num_gpus=2)
373375
@requires_deep_ep
374376
@requires_deep_gemm
375377
@pytest.mark.skipif(is_blackwell_deep_gemm_e8m0_used(),
@@ -427,6 +429,7 @@ def test_ht_deepep_deepgemm_moe(mnk: tuple[int, int, int], num_experts: int,
427429
@pytest.mark.parametrize("use_fp8_dispatch", USE_FP8_DISPATCH)
428430
@pytest.mark.parametrize("block_size", [[128, 128]])
429431
@pytest.mark.parametrize("world_dp_size", [(2, 1)])
432+
@multi_gpu_test(num_gpus=2)
430433
@requires_deep_ep
431434
@requires_deep_gemm
432435
@pytest.mark.skipif(is_blackwell_deep_gemm_e8m0_used(),

tests/kernels/moe/test_deepep_moe.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@
2424
from vllm.platforms import current_platform
2525
from vllm.utils import has_deep_ep
2626

27+
from ...utils import multi_gpu_test
2728
from .parallel_utils import ProcessGroupInfo, parallel_launch
2829

2930
if has_deep_ep():
@@ -411,6 +412,7 @@ def _deep_ep_moe(
411412
@pytest.mark.parametrize("topk", [6])
412413
@pytest.mark.parametrize("world_dp_size", [(2, 1)])
413414
@pytest.mark.parametrize("per_act_token_quant", [False, True])
415+
@multi_gpu_test(num_gpus=2)
414416
@requires_deep_ep
415417
def test_deep_ep_moe(
416418
dtype: torch.dtype,
@@ -459,6 +461,7 @@ def test_deep_ep_moe(
459461
@pytest.mark.parametrize("topk", [6])
460462
@pytest.mark.parametrize("world_dp_size", [(2, 1)])
461463
@pytest.mark.parametrize("use_fp8_dispatch", USE_FP8_DISPATCH)
464+
@multi_gpu_test(num_gpus=2)
462465
@requires_deep_ep
463466
def test_low_latency_deep_ep_moe(dtype: torch.dtype, mnk: tuple[int, int, int],
464467
num_experts: int, topk: int,

tests/kernels/moe/test_modular_kernel_combinations.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
from vllm.utils import has_deep_ep, has_deep_gemm, has_pplx
1717
from vllm.utils.flashinfer import has_flashinfer_cutlass_fused_moe
1818

19+
from ...utils import multi_gpu_test
1920
from .modular_kernel_tools.common import (Config, RankTensors, WeightTensors,
2021
reference_moe_impl,
2122
run_modular_kernel)
@@ -162,6 +163,7 @@ def is_nyi_config(config: Config) -> bool:
162163
product(MK_MULTI_GPU_PREPARE_FINALIZE_TYPES, MK_FUSED_EXPERT_TYPES))
163164
@pytest.mark.parametrize("fused_moe_chunk_size", FUSED_MOE_CHUNK_SIZEs)
164165
@pytest.mark.parametrize("world_size", [2])
166+
@multi_gpu_test(num_gpus=2)
165167
@meets_multi_gpu_requirements
166168
def test_modular_kernel_combinations_multigpu(
167169
k: int, n: int, e: int, dtype: torch.dtype,

tests/kernels/moe/test_pplx_cutlass_moe.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
from vllm.platforms import current_platform
1818
from vllm.utils import cdiv
1919

20+
from ...utils import multi_gpu_test
2021
from .parallel_utils import ProcessGroupInfo, parallel_launch
2122

2223
try:
@@ -247,6 +248,7 @@ def _pplx_moe(
247248
@pytest.mark.parametrize("per_out_ch", [True, False])
248249
@pytest.mark.parametrize("world_dp_size", [[2, 1]]) #, [4, 2]])
249250
@pytest.mark.parametrize("use_internode", [False])
251+
@multi_gpu_test(num_gpus=2)
250252
@pytest.mark.skipif(
251253
(lambda x: x is None or not ops.cutlass_group_gemm_supported(x.to_int()))(
252254
current_platform.get_device_capability()),

tests/kernels/moe/test_pplx_moe.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,7 @@
3737
from vllm.platforms import current_platform
3838
from vllm.utils import round_up
3939

40+
from ...utils import multi_gpu_test
4041
from .parallel_utils import ProcessGroupInfo, parallel_launch
4142

4243
requires_pplx = pytest.mark.skipif(
@@ -452,6 +453,7 @@ def _pplx_prepare_finalize(
452453
@pytest.mark.parametrize("use_internode", [False])
453454
@pytest.mark.optional
454455
@requires_pplx
456+
@multi_gpu_test(num_gpus=2)
455457
def test_pplx_prepare_finalize_slow(
456458
mnk: tuple[int, int, int],
457459
e: int,
@@ -740,6 +742,7 @@ def _pplx_moe(
740742
@pytest.mark.parametrize("use_internode", [False])
741743
@pytest.mark.optional
742744
@requires_pplx
745+
@multi_gpu_test(num_gpus=2)
743746
def test_pplx_moe_slow(
744747
mnk: tuple[int, int, int],
745748
e: int,
@@ -880,6 +883,7 @@ def format_result(msg, ex=None):
880883
@pytest.mark.parametrize("world_dp_size", [[2, 1]])
881884
@pytest.mark.parametrize("use_internode", [False])
882885
@requires_pplx
886+
@multi_gpu_test(num_gpus=2)
883887
def test_pplx_prepare_finalize(
884888
world_dp_size: tuple[int, int],
885889
use_internode: bool,
@@ -893,6 +897,7 @@ def test_pplx_prepare_finalize(
893897
@pytest.mark.parametrize("world_dp_size", [[2, 1]])
894898
@pytest.mark.parametrize("use_internode", [False])
895899
@requires_pplx
900+
@multi_gpu_test(num_gpus=2)
896901
def test_pplx_moe(
897902
world_dp_size: tuple[int, int],
898903
use_internode: bool,

tests/utils.py

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -696,9 +696,12 @@ def multi_process_parallel(
696696
os.environ["RAY_RUNTIME_ENV_IGNORE_GITIGNORE"] = "1"
697697
ray.init(
698698
runtime_env={
699-
"working_dir": VLLM_PATH,
700-
"excludes":
701-
["build", ".git", "cmake-build-*", "shellcheck", "dist"]
699+
"working_dir":
700+
VLLM_PATH,
701+
"excludes": [
702+
"build", ".git", "cmake-build-*", "shellcheck", "dist",
703+
"ep_kernels_workspace"
704+
]
702705
})
703706

704707
distributed_init_port = get_open_port()

tools/ep_kernels/install_python_libraries.sh

Lines changed: 13 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -77,6 +77,7 @@ clone_repo() {
7777
local repo_url=$1
7878
local dir_name=$2
7979
local key_file=$3
80+
local commit_hash=$4
8081

8182
if [ -d "$dir_name" ]; then
8283
# Check if directory has uncommitted changes (dirty)
@@ -87,17 +88,27 @@ clone_repo() {
8788
echo "$dir_name directory exists but clone appears incomplete, cleaning up and re-cloning"
8889
rm -rf "$dir_name"
8990
git clone "$repo_url"
91+
if [ -n "$commit_hash" ]; then
92+
cd "$dir_name"
93+
git checkout "$commit_hash"
94+
cd ..
95+
fi
9096
else
9197
echo "$dir_name directory exists and appears complete; manually update if needed"
9298
fi
9399
else
94100
git clone "$repo_url"
101+
if [ -n "$commit_hash" ]; then
102+
cd "$dir_name"
103+
git checkout "$commit_hash"
104+
cd ..
105+
fi
95106
fi
96107
}
97108

98109
# build and install pplx, require pytorch installed
99110
pushd $WORKSPACE
100-
clone_repo "https://github.com/ppl-ai/pplx-kernels" "pplx-kernels" "setup.py"
111+
clone_repo "https://github.com/ppl-ai/pplx-kernels" "pplx-kernels" "setup.py" "c336faf"
101112
cd pplx-kernels
102113
# see https://github.com/pypa/pip/issues/9955#issuecomment-838065925
103114
# PIP_NO_BUILD_ISOLATION=0 disables build isolation
@@ -106,7 +117,7 @@ popd
106117

107118
# build and install deepep, require pytorch installed
108119
pushd $WORKSPACE
109-
clone_repo "https://github.com/deepseek-ai/DeepEP" "DeepEP" "setup.py"
120+
clone_repo "https://github.com/deepseek-ai/DeepEP" "DeepEP" "setup.py" "e3908bf"
110121
cd DeepEP
111122
export NVSHMEM_DIR=$WORKSPACE/nvshmem_install
112123
PIP_NO_BUILD_ISOLATION=0 pip install -vvv -e .

0 commit comments

Comments
 (0)