Skip to content

Commit f874fe9

Browse files
fix: cp move some ft to nightly (#7279)
Signed-off-by: Dmitry Tokarev <dtokarev@nvidia.com>
1 parent 9bdda19 commit f874fe9

File tree

6 files changed

+21
-9
lines changed

6 files changed

+21
-9
lines changed

tests/fault_tolerance/cancellation/test_sglang.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@
3333
pytest.mark.sglang,
3434
pytest.mark.e2e,
3535
pytest.mark.model(FAULT_TOLERANCE_MODEL_NAME),
36-
pytest.mark.post_merge, # post_merge to pinpoint failure commit
36+
pytest.mark.nightly,
3737
pytest.mark.parametrize("request_plane", ["nats", "tcp"], indirect=True),
3838
]
3939

tests/fault_tolerance/cancellation/test_trtllm.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,7 @@
3636
pytest.mark.gpu_1,
3737
pytest.mark.e2e,
3838
pytest.mark.model(FAULT_TOLERANCE_MODEL_NAME),
39-
pytest.mark.post_merge, # post_merge to pinpoint failure commit
39+
pytest.mark.nightly,
4040
pytest.mark.parametrize("request_plane", ["nats", "tcp"], indirect=True),
4141
pytest.mark.xfail(reason="Cancellation is temporarily disabled", strict=True),
4242
]

tests/fault_tolerance/cancellation/test_vllm.py

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -34,8 +34,6 @@
3434
pytest.mark.vllm,
3535
pytest.mark.e2e,
3636
pytest.mark.model(FAULT_TOLERANCE_MODEL_NAME),
37-
pytest.mark.post_merge, # post_merge to pinpoint failure commit
38-
pytest.mark.gpu_1,
3937
pytest.mark.parametrize("request_plane", ["nats", "tcp"], indirect=True),
4038
]
4139

@@ -203,6 +201,8 @@ def is_ready(self, response) -> bool:
203201

204202

205203
@pytest.mark.timeout(110) # 3x average
204+
@pytest.mark.post_merge
205+
@pytest.mark.gpu_1
206206
def test_request_cancellation_vllm_aggregated(
207207
request, runtime_services_dynamic_ports, predownload_models
208208
):
@@ -284,6 +284,8 @@ def test_request_cancellation_vllm_aggregated(
284284

285285

286286
@pytest.mark.timeout(150) # 3x average
287+
@pytest.mark.nightly
288+
@pytest.mark.gpu_2
287289
def test_request_cancellation_vllm_decode_cancel(
288290
request, runtime_services_dynamic_ports, set_ucx_tls_no_mm, predownload_models
289291
):
@@ -365,6 +367,8 @@ def test_request_cancellation_vllm_decode_cancel(
365367

366368

367369
@pytest.mark.timeout(150) # 3x average
370+
@pytest.mark.nightly
371+
@pytest.mark.gpu_2
368372
def test_request_cancellation_vllm_prefill_cancel(
369373
request, runtime_services_dynamic_ports, set_ucx_tls_no_mm, predownload_models
370374
):

tests/fault_tolerance/migration/test_sglang.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,6 @@
3131
pytest.mark.gpu_1,
3232
pytest.mark.e2e,
3333
pytest.mark.model(FAULT_TOLERANCE_MODEL_NAME),
34-
pytest.mark.post_merge, # post_merge to pinpoint failure commit
3534
pytest.mark.parametrize(
3635
"migration_limit", [3, 0], ids=["migration_enabled", "migration_disabled"]
3736
),
@@ -211,6 +210,7 @@ def is_ready(self, response) -> bool:
211210

212211

213212
@pytest.mark.timeout(230) # 3x average
213+
@pytest.mark.post_merge
214214
def test_request_migration_sglang_aggregated(
215215
request,
216216
runtime_services_dynamic_ports,
@@ -262,6 +262,7 @@ def test_request_migration_sglang_aggregated(
262262
@pytest.mark.skip(reason="Cannot reliably migrate at Prefill that finish < 1 ms")
263263
@pytest.mark.xfail(strict=False, reason="Prefill migration not yet supported")
264264
@pytest.mark.timeout(230) # 3x average
265+
@pytest.mark.nightly
265266
def test_request_migration_sglang_prefill(
266267
request,
267268
runtime_services_dynamic_ports,
@@ -330,6 +331,7 @@ def test_request_migration_sglang_prefill(
330331

331332
@pytest.mark.skip(reason="KV cache transfer may fail")
332333
@pytest.mark.timeout(230) # 3x average
334+
@pytest.mark.nightly
333335
def test_request_migration_sglang_kv_transfer(
334336
request,
335337
runtime_services_dynamic_ports,
@@ -397,6 +399,7 @@ def test_request_migration_sglang_kv_transfer(
397399

398400

399401
@pytest.mark.timeout(230) # 3x average
402+
@pytest.mark.nightly
400403
def test_request_migration_sglang_decode(
401404
request,
402405
runtime_services_dynamic_ports,

tests/fault_tolerance/migration/test_trtllm.py

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,6 @@
3131
pytest.mark.gpu_1,
3232
pytest.mark.e2e,
3333
pytest.mark.model(FAULT_TOLERANCE_MODEL_NAME),
34-
pytest.mark.post_merge, # post_merge to pinpoint failure commit
3534
pytest.mark.parametrize(
3635
"migration_limit", [3, 0], ids=["migration_enabled", "migration_disabled"]
3736
),
@@ -188,8 +187,8 @@ def is_ready(self, response) -> bool:
188187
return False
189188

190189

191-
@pytest.mark.timeout(290) # 3x average
192-
@pytest.mark.post_merge
190+
@pytest.mark.timeout(290)
191+
@pytest.mark.post_merge # 3x average
193192
def test_request_migration_trtllm_aggregated(
194193
request,
195194
runtime_services_dynamic_ports,
@@ -240,6 +239,7 @@ def test_request_migration_trtllm_aggregated(
240239

241240
@pytest.mark.xfail(strict=False, reason="Prefill migration not yet supported")
242241
@pytest.mark.timeout(350) # 3x average
242+
@pytest.mark.nightly
243243
def test_request_migration_trtllm_prefill(
244244
request,
245245
runtime_services_dynamic_ports,
@@ -308,6 +308,7 @@ def test_request_migration_trtllm_prefill(
308308

309309
@pytest.mark.skip(reason="Decode worker can get stuck downloading kv cache")
310310
@pytest.mark.timeout(350) # 3x average
311+
@pytest.mark.nightly
311312
def test_request_migration_trtllm_kv_transfer(
312313
request,
313314
runtime_services_dynamic_ports,
@@ -375,6 +376,7 @@ def test_request_migration_trtllm_kv_transfer(
375376

376377

377378
@pytest.mark.timeout(350) # 3x average
379+
@pytest.mark.post_merge
378380
def test_request_migration_trtllm_decode(
379381
request,
380382
runtime_services_dynamic_ports,

tests/fault_tolerance/migration/test_vllm.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,6 @@
3232
pytest.mark.gpu_1,
3333
pytest.mark.e2e,
3434
pytest.mark.model(FAULT_TOLERANCE_MODEL_NAME),
35-
pytest.mark.post_merge, # post_merge to pinpoint failure commit
3635
pytest.mark.parametrize(
3736
"migration_limit", [3, 0], ids=["migration_enabled", "migration_disabled"]
3837
),
@@ -208,6 +207,7 @@ def is_ready(self, response) -> bool:
208207

209208

210209
@pytest.mark.timeout(290) # 3x average
210+
@pytest.mark.post_merge
211211
def test_request_migration_vllm_aggregated(
212212
request,
213213
runtime_services_dynamic_ports,
@@ -258,6 +258,7 @@ def test_request_migration_vllm_aggregated(
258258

259259
@pytest.mark.xfail(strict=False, reason="Prefill migration not yet supported")
260260
@pytest.mark.timeout(350) # 3x average
261+
@pytest.mark.nightly
261262
def test_request_migration_vllm_prefill(
262263
request,
263264
runtime_services_dynamic_ports,
@@ -335,6 +336,7 @@ def test_request_migration_vllm_prefill(
335336
),
336337
)
337338
@pytest.mark.timeout(350) # 3x average
339+
@pytest.mark.nightly
338340
def test_request_migration_vllm_kv_transfer(
339341
request,
340342
runtime_services_dynamic_ports,
@@ -412,6 +414,7 @@ def test_request_migration_vllm_kv_transfer(
412414
),
413415
)
414416
@pytest.mark.timeout(350) # 3x average
417+
@pytest.mark.nightly
415418
def test_request_migration_vllm_decode(
416419
request,
417420
runtime_services_dynamic_ports,

0 commit comments

Comments
 (0)