Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions tests/fault_tolerance/etcd_ha/test_sglang.py
Original file line number Diff line number Diff line change
Expand Up @@ -149,6 +149,7 @@ def is_ready(self, response) -> bool:
@pytest.mark.gpu_1
@pytest.mark.e2e
@pytest.mark.model(FAULT_TOLERANCE_MODEL_NAME)
@pytest.mark.skip(reason="Broken, temporarily disabled")
def test_etcd_ha_failover_sglang_aggregated(request, predownload_models):
"""
Test ETCD High Availability with leader failover using SGLang.
Expand Down Expand Up @@ -209,6 +210,7 @@ def test_etcd_ha_failover_sglang_aggregated(request, predownload_models):
@pytest.mark.gpu_2
@pytest.mark.e2e
@pytest.mark.model(FAULT_TOLERANCE_MODEL_NAME)
@pytest.mark.skip(reason="Broken, temporarily disabled")
def test_etcd_ha_failover_sglang_disaggregated(
request, predownload_models, set_ucx_tls_no_mm
):
Expand Down Expand Up @@ -277,6 +279,7 @@ def test_etcd_ha_failover_sglang_disaggregated(
@pytest.mark.gpu_1
@pytest.mark.e2e
@pytest.mark.model(FAULT_TOLERANCE_MODEL_NAME)
@pytest.mark.skip(reason="Broken, temporarily disabled")
def test_etcd_non_ha_shutdown_sglang_aggregated(request, predownload_models):
"""
Test that frontend and worker shut down when single ETCD node is terminated using SGLang.
Expand Down Expand Up @@ -333,6 +336,7 @@ def test_etcd_non_ha_shutdown_sglang_aggregated(request, predownload_models):
@pytest.mark.gpu_2
@pytest.mark.e2e
@pytest.mark.model(FAULT_TOLERANCE_MODEL_NAME)
@pytest.mark.skip(reason="Broken, temporarily disabled")
def test_etcd_non_ha_shutdown_sglang_disaggregated(
request, predownload_models, set_ucx_tls_no_mm
):
Expand Down
4 changes: 4 additions & 0 deletions tests/fault_tolerance/etcd_ha/test_trtllm.py
Original file line number Diff line number Diff line change
Expand Up @@ -135,6 +135,7 @@ def is_ready(self, response) -> bool:
@pytest.mark.gpu_1
@pytest.mark.e2e
@pytest.mark.model(FAULT_TOLERANCE_MODEL_NAME)
@pytest.mark.skip(reason="Broken, temporarily disabled")
def test_etcd_ha_failover_trtllm_aggregated(request, predownload_models):
"""
Test ETCD High Availability with leader failover for TRT-LLM in aggregated mode.
Expand Down Expand Up @@ -195,6 +196,7 @@ def test_etcd_ha_failover_trtllm_aggregated(request, predownload_models):
@pytest.mark.gpu_1
@pytest.mark.e2e
@pytest.mark.model(FAULT_TOLERANCE_MODEL_NAME)
@pytest.mark.skip(reason="Broken, temporarily disabled")
def test_etcd_ha_failover_trtllm_disaggregated(
request, predownload_models, set_ucx_tls_no_mm
):
Expand Down Expand Up @@ -262,6 +264,7 @@ def test_etcd_ha_failover_trtllm_disaggregated(
@pytest.mark.gpu_1
@pytest.mark.e2e
@pytest.mark.model(FAULT_TOLERANCE_MODEL_NAME)
@pytest.mark.skip(reason="Broken, temporarily disabled")
def test_etcd_non_ha_shutdown_trtllm_aggregated(request, predownload_models):
"""
Test that frontend and worker shut down when single ETCD node is terminated for TRT-LLM in aggregated mode.
Expand Down Expand Up @@ -321,6 +324,7 @@ def test_etcd_non_ha_shutdown_trtllm_aggregated(request, predownload_models):
@pytest.mark.gpu_1
@pytest.mark.e2e
@pytest.mark.model(FAULT_TOLERANCE_MODEL_NAME)
@pytest.mark.skip(reason="Broken, temporarily disabled")
def test_etcd_non_ha_shutdown_trtllm_disaggregated(
request, predownload_models, set_ucx_tls_no_mm
):
Expand Down
4 changes: 4 additions & 0 deletions tests/fault_tolerance/etcd_ha/test_vllm.py
Original file line number Diff line number Diff line change
Expand Up @@ -117,6 +117,7 @@ def is_ready(self, response) -> bool:
@pytest.mark.gpu_1
@pytest.mark.e2e
@pytest.mark.model(FAULT_TOLERANCE_MODEL_NAME)
@pytest.mark.skip(reason="Broken, temporarily disabled")
def test_etcd_ha_failover_vllm_aggregated(request, predownload_models):
"""
Test ETCD High Availability with leader failover.
Expand Down Expand Up @@ -175,6 +176,7 @@ def test_etcd_ha_failover_vllm_aggregated(request, predownload_models):
@pytest.mark.gpu_1
@pytest.mark.e2e
@pytest.mark.model(FAULT_TOLERANCE_MODEL_NAME)
@pytest.mark.skip(reason="Broken, temporarily disabled")
def test_etcd_ha_failover_vllm_disaggregated(
request, predownload_models, set_ucx_tls_no_mm
):
Expand Down Expand Up @@ -239,6 +241,7 @@ def test_etcd_ha_failover_vllm_disaggregated(
@pytest.mark.gpu_1
@pytest.mark.e2e
@pytest.mark.model(FAULT_TOLERANCE_MODEL_NAME)
@pytest.mark.skip(reason="Broken, temporarily disabled")
def test_etcd_non_ha_shutdown_vllm_aggregated(request, predownload_models):
"""
Test that frontend and worker shut down when single ETCD node is terminated.
Expand Down Expand Up @@ -293,6 +296,7 @@ def test_etcd_non_ha_shutdown_vllm_aggregated(request, predownload_models):
@pytest.mark.gpu_1
@pytest.mark.e2e
@pytest.mark.model(FAULT_TOLERANCE_MODEL_NAME)
@pytest.mark.skip(reason="Broken, temporarily disabled")
def test_etcd_non_ha_shutdown_vllm_disaggregated(
request, predownload_models, set_ucx_tls_no_mm
):
Expand Down
Loading