|
31 | 31 | pytest.mark.gpu_1, |
32 | 32 | pytest.mark.e2e, |
33 | 33 | pytest.mark.model(FAULT_TOLERANCE_MODEL_NAME), |
34 | | - pytest.mark.post_merge, # post_merge to pinpoint failure commit |
35 | 34 | pytest.mark.parametrize( |
36 | 35 | "migration_limit", [3, 0], ids=["migration_enabled", "migration_disabled"] |
37 | 36 | ), |
@@ -188,8 +187,8 @@ def is_ready(self, response) -> bool: |
188 | 187 | return False |
189 | 188 |
|
190 | 189 |
|
191 | | -@pytest.mark.timeout(290) # 3x average |
192 | | -@pytest.mark.post_merge |
| 190 | +@pytest.mark.timeout(290) |
| 191 | +@pytest.mark.post_merge # 3x average |
193 | 192 | def test_request_migration_trtllm_aggregated( |
194 | 193 | request, |
195 | 194 | runtime_services_dynamic_ports, |
@@ -240,6 +239,7 @@ def test_request_migration_trtllm_aggregated( |
240 | 239 |
|
241 | 240 | @pytest.mark.xfail(strict=False, reason="Prefill migration not yet supported") |
242 | 241 | @pytest.mark.timeout(350) # 3x average |
| 242 | +@pytest.mark.nightly |
243 | 243 | def test_request_migration_trtllm_prefill( |
244 | 244 | request, |
245 | 245 | runtime_services_dynamic_ports, |
@@ -308,6 +308,7 @@ def test_request_migration_trtllm_prefill( |
308 | 308 |
|
309 | 309 | @pytest.mark.skip(reason="Decode worker can get stuck downloading kv cache") |
310 | 310 | @pytest.mark.timeout(350) # 3x average |
| 311 | +@pytest.mark.nightly |
311 | 312 | def test_request_migration_trtllm_kv_transfer( |
312 | 313 | request, |
313 | 314 | runtime_services_dynamic_ports, |
@@ -375,6 +376,7 @@ def test_request_migration_trtllm_kv_transfer( |
375 | 376 |
|
376 | 377 |
|
377 | 378 | @pytest.mark.timeout(350) # 3x average |
| 379 | +@pytest.mark.post_merge |
378 | 380 | def test_request_migration_trtllm_decode( |
379 | 381 | request, |
380 | 382 | runtime_services_dynamic_ports, |
|
0 commit comments