feat: exclude k8 FT tests that need custom build from default run (#4017)

tzulingk · web-flow · commit 20d1eb2ee9f2 · 2025-10-31T17:15:10.000Z
Signed-off-by: tzulingk@nvidia.com &lt;tzulingk@nvidia.com&gt;
diff --git a/pyproject.toml b/pyproject.toml
@@ -198,7 +198,8 @@ markers = [
     "slow: marks tests as known to be slow",
     "h100: marks tests to run on H100",
     "kvbm: marks tests for KV behavior and model determinism",
-    "model: model id used by a test or parameter"
+    "model: model id used by a test or parameter",
+    "custom_build: marks tests that require custom builds or special setup (e.g., MoE models)"
 ]
 
 # Linting/formatting
diff --git a/tests/fault_tolerance/deploy/README.md b/tests/fault_tolerance/deploy/README.md
@@ -141,12 +141,18 @@ The combined results of these two phases demonstrate both the system's ability t
 
 #### Example Scenario Execution:
 
-Run all deployments and failure scenarios
+Run standard deployments and failure scenarios (excludes custom builds by default):
 
 ```bash
 pytest tests/fault_tolerance/deploy/test_deployment.py -s -v --namespace ${NAMESPACE}
 ```
 
+To include all scenarios including custom builds (e.g., MoE models):
+
+```bash
+pytest tests/fault_tolerance/deploy/test_deployment.py -s -v --namespace ${NAMESPACE} --include-custom-build
+```
+
 ### Test Results Directory
 
 For each test scenario a directory of log files is created and post-processed to summarize the test. The directory structure differs based on which client type is used.
@@ -490,10 +496,54 @@ Then run the development container mounting the workspace and your kube config.
 
 ### Run the tests
 
+#### Default: Run Standard Tests Only
+
+By default, tests requiring custom builds (e.g., MoE models) are **automatically excluded**:
+
 ```bash
-pytest tests/fault_tolerance/deploy/test_deployment.py -s -v --namespace ${NAMESPACE} --image ${IMAGE}
+# Standard tests only
+pytest tests/fault_tolerance/deploy/test_deployment.py -s -v \
+  --namespace ${NAMESPACE} \
+  --image ${IMAGE}
 ```
 
+#### Include Custom Build Tests
+
+To run ALL tests including those requiring custom builds (e.g., MoE models):
+
+```bash
+pytest tests/fault_tolerance/deploy/test_deployment.py -s -v \
+  --namespace ${NAMESPACE} \
+  --image ${IMAGE} \
+  --include-custom-build
+```
+
+#### Run Only Custom Build Tests
+
+To run ONLY tests that require custom builds:
+
+```bash
+pytest tests/fault_tolerance/deploy/test_deployment.py -s -v \
+  --namespace ${NAMESPACE} \
+  --image ${IMAGE} \
+  -m "custom_build"
+```
+
+#### List Available Tests
+
+```bash
+# See which tests will run by default (excludes custom_build)
+pytest tests/fault_tolerance/deploy/test_deployment.py --collect-only -q
+
+# See which tests are excluded
+pytest tests/fault_tolerance/deploy/test_deployment.py --collect-only -m "custom_build" -q
+```
+
+> **Note:** Tests requiring custom builds are marked with `@pytest.mark.custom_build` and include:
+> - MoE (Mixture-of-Experts) models like DeepSeek-V2-Lite
+> - Tests requiring special Docker image configurations
+> - Any scenario with `requires_custom_build=True` in scenarios.py
+
 
 ### Note on Running with Additional Credentials
 
diff --git a/tests/fault_tolerance/deploy/conftest.py b/tests/fault_tolerance/deploy/conftest.py
@@ -15,6 +15,8 @@
 
 import pytest
 
+from tests.fault_tolerance.deploy.scenarios import scenarios
+
 
 def pytest_addoption(parser):
     parser.addoption("--image", type=str, default=None)
@@ -26,6 +28,71 @@ def pytest_addoption(parser):
         choices=["aiperf", "legacy"],
         help="Client type for load generation: 'aiperf' (default) or 'legacy'",
     )
+    parser.addoption(
+        "--include-custom-build",
+        action="store_true",
+        default=False,
+        help="Include tests that require custom builds (e.g., MoE models). "
+        "By default, these tests are excluded.",
+    )
+
+
+def pytest_generate_tests(metafunc):
+    """Dynamically parametrize tests and apply markers based on scenario properties.
+
+    This hook applies markers to individual test instances based on their scenario:
+    - @pytest.mark.custom_build: For MoE models and other tests requiring custom builds
+    """
+    if "scenario" in metafunc.fixturenames:
+        scenario_names = list(scenarios.keys())
+        argvalues = []
+        ids = []
+
+        for scenario_name in scenario_names:
+            scenario_obj = scenarios[scenario_name]
+            marks = []
+
+            if getattr(scenario_obj, "requires_custom_build", False):
+                marks.append(pytest.mark.custom_build)
+
+            # Always use pytest.param for type consistency (even with empty marks)
+            argvalues.append(pytest.param(scenario_name, marks=marks))
+            ids.append(scenario_name)
+
+        metafunc.parametrize("scenario_name", argvalues, ids=ids)
+
+
+def pytest_collection_modifyitems(config, items):
+    """Automatically deselect custom_build tests unless --include-custom-build is specified.
+
+    This allows users to run tests without any special flags and automatically excludes
+    tests that require custom builds. To include them, use --include-custom-build.
+
+    Note: If user explicitly uses -m marker filtering, we respect that and don't
+    auto-deselect, allowing them to run custom_build tests with -m "custom_build".
+    """
+    # If --include-custom-build flag is set, include all tests
+    if config.getoption("--include-custom-build"):
+        return
+
+    # If user explicitly used -m marker filtering, let pytest handle it
+    # Don't auto-deselect in this case
+    if config.option.markexpr:
+        return
+
+    # Default case: auto-deselect custom_build tests
+    deselected = []
+    selected = []
+
+    for item in items:
+        if "custom_build" in item.keywords:
+            deselected.append(item)
+        else:
+            selected.append(item)
+
+    if deselected:
+        config.hook.pytest_deselected(items=deselected)
+        items[:] = selected
 
 
 @pytest.fixture
diff --git a/tests/fault_tolerance/deploy/scenarios.py b/tests/fault_tolerance/deploy/scenarios.py
@@ -184,6 +184,9 @@ class Scenario:
     failures: list[Failure]
     model: Optional[str] = None
     backend: str = "vllm"  # Backend type for tracking
+    # When set to True, the test will be automatically marked with @pytest.mark.custom_build
+    # and excluded from default test runs unless --include-custom-build flag is used
+    requires_custom_build: bool = False  # Flag for tests needing custom builds/setup
 
 
 # Helper functions to create deployment specs
@@ -572,6 +575,7 @@ def create_legacy_load(
             failures=failure,
             model=scenario_model,
             backend=backend,
+            requires_custom_build=is_moe,  # MoE models require custom builds
         )
 
 
diff --git a/tests/fault_tolerance/deploy/test_deployment.py b/tests/fault_tolerance/deploy/test_deployment.py
@@ -22,13 +22,13 @@
 from tests.utils.managed_deployment import ManagedDeployment
 
 
-@pytest.fixture(params=scenarios.keys())
-def scenario(request, client_type):
+@pytest.fixture
+def scenario(scenario_name, client_type):
     """Get scenario and optionally override client type from command line.
 
     If --client-type is specified, it overrides the scenario's default client type.
     """
-    scenario_obj = scenarios[request.param]
+    scenario_obj = scenarios[scenario_name]
 
     # Override client type if specified on command line
     if client_type is not None:

Original file line number	Diff line number	Diff line change
`@@ -198,7 +198,8 @@ markers = [`
`198`	`198`	`"slow: marks tests as known to be slow",`
`199`	`199`	`"h100: marks tests to run on H100",`
`200`	`200`	`"kvbm: marks tests for KV behavior and model determinism",`
`201`		`- "model: model id used by a test or parameter"`
	`201`	`+ "model: model id used by a test or parameter",`
	`202`	`+ "custom_build: marks tests that require custom builds or special setup (e.g., MoE models)"`
`202`	`203`	`]`
`203`	`204`
`204`	`205`	`# Linting/formatting`