diff --git a/tests/fault_tolerance/etcd_ha/test_sglang.py b/tests/fault_tolerance/etcd_ha/test_sglang.py index 894ede72bb..8783a0fb6d 100644 --- a/tests/fault_tolerance/etcd_ha/test_sglang.py +++ b/tests/fault_tolerance/etcd_ha/test_sglang.py @@ -149,6 +149,7 @@ def is_ready(self, response) -> bool: @pytest.mark.gpu_1 @pytest.mark.e2e @pytest.mark.model(FAULT_TOLERANCE_MODEL_NAME) +@pytest.mark.skip(reason="Broken, temporarily disabled") def test_etcd_ha_failover_sglang_aggregated(request, predownload_models): """ Test ETCD High Availability with leader failover using SGLang. @@ -209,6 +210,7 @@ def test_etcd_ha_failover_sglang_aggregated(request, predownload_models): @pytest.mark.gpu_2 @pytest.mark.e2e @pytest.mark.model(FAULT_TOLERANCE_MODEL_NAME) +@pytest.mark.skip(reason="Broken, temporarily disabled") def test_etcd_ha_failover_sglang_disaggregated( request, predownload_models, set_ucx_tls_no_mm ): @@ -277,6 +279,7 @@ def test_etcd_ha_failover_sglang_disaggregated( @pytest.mark.gpu_1 @pytest.mark.e2e @pytest.mark.model(FAULT_TOLERANCE_MODEL_NAME) +@pytest.mark.skip(reason="Broken, temporarily disabled") def test_etcd_non_ha_shutdown_sglang_aggregated(request, predownload_models): """ Test that frontend and worker shut down when single ETCD node is terminated using SGLang. @@ -333,6 +336,7 @@ def test_etcd_non_ha_shutdown_sglang_aggregated(request, predownload_models): @pytest.mark.gpu_2 @pytest.mark.e2e @pytest.mark.model(FAULT_TOLERANCE_MODEL_NAME) +@pytest.mark.skip(reason="Broken, temporarily disabled") def test_etcd_non_ha_shutdown_sglang_disaggregated( request, predownload_models, set_ucx_tls_no_mm ): diff --git a/tests/fault_tolerance/etcd_ha/test_trtllm.py b/tests/fault_tolerance/etcd_ha/test_trtllm.py index c08ee405aa..67e839aeb5 100644 --- a/tests/fault_tolerance/etcd_ha/test_trtllm.py +++ b/tests/fault_tolerance/etcd_ha/test_trtllm.py @@ -135,6 +135,7 @@ def is_ready(self, response) -> bool: @pytest.mark.gpu_1 @pytest.mark.e2e @pytest.mark.model(FAULT_TOLERANCE_MODEL_NAME) +@pytest.mark.skip(reason="Broken, temporarily disabled") def test_etcd_ha_failover_trtllm_aggregated(request, predownload_models): """ Test ETCD High Availability with leader failover for TRT-LLM in aggregated mode. @@ -195,6 +196,7 @@ def test_etcd_ha_failover_trtllm_aggregated(request, predownload_models): @pytest.mark.gpu_1 @pytest.mark.e2e @pytest.mark.model(FAULT_TOLERANCE_MODEL_NAME) +@pytest.mark.skip(reason="Broken, temporarily disabled") def test_etcd_ha_failover_trtllm_disaggregated( request, predownload_models, set_ucx_tls_no_mm ): @@ -262,6 +264,7 @@ def test_etcd_ha_failover_trtllm_disaggregated( @pytest.mark.gpu_1 @pytest.mark.e2e @pytest.mark.model(FAULT_TOLERANCE_MODEL_NAME) +@pytest.mark.skip(reason="Broken, temporarily disabled") def test_etcd_non_ha_shutdown_trtllm_aggregated(request, predownload_models): """ Test that frontend and worker shut down when single ETCD node is terminated for TRT-LLM in aggregated mode. @@ -321,6 +324,7 @@ def test_etcd_non_ha_shutdown_trtllm_aggregated(request, predownload_models): @pytest.mark.gpu_1 @pytest.mark.e2e @pytest.mark.model(FAULT_TOLERANCE_MODEL_NAME) +@pytest.mark.skip(reason="Broken, temporarily disabled") def test_etcd_non_ha_shutdown_trtllm_disaggregated( request, predownload_models, set_ucx_tls_no_mm ): diff --git a/tests/fault_tolerance/etcd_ha/test_vllm.py b/tests/fault_tolerance/etcd_ha/test_vllm.py index 0e1e950a63..2e58fdfe4b 100644 --- a/tests/fault_tolerance/etcd_ha/test_vllm.py +++ b/tests/fault_tolerance/etcd_ha/test_vllm.py @@ -117,6 +117,7 @@ def is_ready(self, response) -> bool: @pytest.mark.gpu_1 @pytest.mark.e2e @pytest.mark.model(FAULT_TOLERANCE_MODEL_NAME) +@pytest.mark.skip(reason="Broken, temporarily disabled") def test_etcd_ha_failover_vllm_aggregated(request, predownload_models): """ Test ETCD High Availability with leader failover. @@ -175,6 +176,7 @@ def test_etcd_ha_failover_vllm_aggregated(request, predownload_models): @pytest.mark.gpu_1 @pytest.mark.e2e @pytest.mark.model(FAULT_TOLERANCE_MODEL_NAME) +@pytest.mark.skip(reason="Broken, temporarily disabled") def test_etcd_ha_failover_vllm_disaggregated( request, predownload_models, set_ucx_tls_no_mm ): @@ -239,6 +241,7 @@ def test_etcd_ha_failover_vllm_disaggregated( @pytest.mark.gpu_1 @pytest.mark.e2e @pytest.mark.model(FAULT_TOLERANCE_MODEL_NAME) +@pytest.mark.skip(reason="Broken, temporarily disabled") def test_etcd_non_ha_shutdown_vllm_aggregated(request, predownload_models): """ Test that frontend and worker shut down when single ETCD node is terminated. @@ -293,6 +296,7 @@ def test_etcd_non_ha_shutdown_vllm_aggregated(request, predownload_models): @pytest.mark.gpu_1 @pytest.mark.e2e @pytest.mark.model(FAULT_TOLERANCE_MODEL_NAME) +@pytest.mark.skip(reason="Broken, temporarily disabled") def test_etcd_non_ha_shutdown_vllm_disaggregated( request, predownload_models, set_ucx_tls_no_mm ):