Skip to content

Commit 62050b2

Browse files
authored
[None][infra] separate AutoDeploy tests into own stages (#10634)
Signed-off-by: Lucas Liebenwein <11156568+lucaslie@users.noreply.github.com>
1 parent f7de285 commit 62050b2

File tree

9 files changed

+106
-30
lines changed

9 files changed

+106
-30
lines changed

jenkins/L0_Test.groovy

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2153,8 +2153,12 @@ def getMakoArgsFromStageName(stageName, parseSysinfo=false) {
21532153
// If stageName contains "-FMHA-", add "backend=fmha" to makoArgs
21542154
// At this point, only tests with backend=fmha or unspecified backend will be run
21552155
makoArgs += ["backend=fmha"]
2156+
} else if (stageName.contains("-AutoDeploy-")) {
2157+
// If stageName contains "-AutoDeploy-", add "backend=autodeploy" to makoArgs
2158+
// At this point, only tests with backend=autodeploy or unspecified backend will be run
2159+
makoArgs += ["backend=autodeploy"]
21562160
} else {
2157-
// If stageName does not contain "-PyTorch-", "-TensorRT-", "-CPP-", "-Triton-", or "-FMHA-", do not add any backend
2161+
// If stageName does not contain "-PyTorch-", "-TensorRT-", "-CPP-", "-Triton-", "-FMHA-", or "-AutoDeploy-", do not add any backend
21582162
// At this point, all tests will be run
21592163
// For cases where backend is not specified in makoArgs, we will match all types of backends and tests without specified backend
21602164
}
@@ -3155,6 +3159,7 @@ def launchTestJobs(pipeline, testFilter)
31553159
"A30-Triton-1": ["a30", "l0_a30", 1, 1],
31563160
"A30-PyTorch-1": ["a30", "l0_a30", 1, 2],
31573161
"A30-PyTorch-2": ["a30", "l0_a30", 2, 2],
3162+
"A30-AutoDeploy-1": ["a30", "l0_a30", 1, 1],
31583163
"A30-CPP-1": ["a30", "l0_a30", 1, 3],
31593164
"A30-CPP-2": ["a30", "l0_a30", 2, 3],
31603165
"A30-CPP-3": ["a30", "l0_a30", 3, 3],
@@ -3166,11 +3171,13 @@ def launchTestJobs(pipeline, testFilter)
31663171
"H100_PCIe-PyTorch-3": ["h100-cr", "l0_h100", 3, 4],
31673172
"H100_PCIe-PyTorch-4": ["h100-cr", "l0_h100", 4, 4],
31683173
"H100_PCIe-PyTorch-Ray-1": ["h100-cr", "l0_h100", 1, 1],
3174+
"H100_PCIe-AutoDeploy-1": ["h100-cr", "l0_h100", 1, 1],
31693175
"H100_PCIe-CPP-1": ["h100-cr", "l0_h100", 1, 1],
31703176
"H100_PCIe-TensorRT-1": ["h100-cr", "l0_h100", 1, 1],
31713177
"B200_PCIe-PyTorch-1": ["b100-ts2", "l0_b200", 1, 3],
31723178
"B200_PCIe-PyTorch-2": ["b100-ts2", "l0_b200", 2, 3],
31733179
"B200_PCIe-PyTorch-3": ["b100-ts2", "l0_b200", 3, 3],
3180+
"B200_PCIe-AutoDeploy-1": ["b100-ts2", "l0_b200", 1, 1],
31743181
"RTX5090-PyTorch-1": ["rtx-5090", "l0_gb202", 1, 1],
31753182
"RTX5080-TensorRT-1": ["rtx-5080", "l0_gb203", 1, 2],
31763183
"RTX5080-TensorRT-2": ["rtx-5080", "l0_gb203", 2, 2],
@@ -3262,8 +3269,10 @@ def launchTestJobs(pipeline, testFilter)
32623269
"DGX_H100-4_GPUs-PyTorch-GptOss-1": ["dgx-h100-x4-oci", "l0_dgx_h100", 1, 1, 4],
32633270
"DGX_H100-4_GPUs-PyTorch-Others-1": ["dgx-h100-x4-oci", "l0_dgx_h100", 1, 1, 4],
32643271
"DGX_H100-4_GPUs-PyTorch-Ray-1": ["dgx-h100-x4-oci", "l0_dgx_h100", 1, 1, 4],
3272+
"DGX_H100-4_GPUs-AutoDeploy-1": ["dgx-h100-x4-oci", "l0_dgx_h100", 1, 1, 4],
32653273
"DGX_B200-4_GPUs-PyTorch-1": ["b200-x4-lbd", "l0_dgx_b200", 1, 1, 4, 1, true],
32663274
"DGX_B200-4_GPUs-PyTorch-Ray-1": ["b200-x4-lbd", "l0_dgx_b200", 1, 1, 4, 1, true],
3275+
"DGX_B200-4_GPUs-AutoDeploy-1": ["b200-x4-lbd", "l0_dgx_b200", 1, 1, 4, 1, true],
32673276
"DGX_B200-8_GPUs-PyTorch-1": ["b200-x8-lbd", "l0_dgx_b200", 1, 1, 8, 1, true],
32683277
"DGX_B200-4_GPUs-PyTorch-Post-Merge-1": ["b200-x4-lbd", "l0_dgx_b200", 1, 2, 4, 1, true],
32693278
"DGX_B200-4_GPUs-PyTorch-Post-Merge-2": ["b200-x4-lbd", "l0_dgx_b200", 2, 2, 4, 1, true],

tests/integration/test_lists/test-db/l0_a30.yml

Lines changed: 16 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,6 @@ l0_a30:
2020
- unittest/_torch/modeling -k "modeling_qwen_moe"
2121
- unittest/_torch/modeling -k "modeling_out_of_tree"
2222
- unittest/_torch/modeling -k "modeling_starcoder2"
23-
- unittest/_torch/auto_deploy/unit/singlegpu
2423
- unittest/_torch/sampler/test_beam_search.py
2524
- unittest/_torch/sampler/test_return_logits.py
2625
- test_e2e.py::test_openai_completions_with_logit_bias[torch_sampler]
@@ -244,3 +243,19 @@ l0_a30:
244243
- triton_server/test_triton_llm.py::test_tiny_llama_ifb_token_counts[tensorrtllm-both-False-1---False-True-False-0-128-enableDecoupleMode-inflight_fused_batching-disableTrtOverlap--guaranteed_no_evict---1-1-1-False-tensorrt_llm_bls]
245244
- triton_server/test_triton_llm.py::test_tiny_llama_ifb_token_counts[tensorrtllm-both-False-1---False-True-False-0-128-enableDecoupleMode-inflight_fused_batching-disableTrtOverlap--guaranteed_no_evict---1-1-1-False-ensemble]
246245
- triton_server/test_triton_llm.py::test_tiny_llama_ifb_token_counts[tensorrtllm-both-False-1---False-True-False-0-128-disableDecoupleMode-inflight_fused_batching-disableTrtOverlap--guaranteed_no_evict---1-1-1-False-tensorrt_llm_bls]
246+
# ------------- AutoDeploy Backend Stages ---------------
247+
- condition:
248+
ranges:
249+
system_gpu_count:
250+
gte: 1
251+
lte: 1
252+
wildcards:
253+
gpu:
254+
- '*a30*'
255+
linux_distribution_name: ubuntu*
256+
terms:
257+
stage: pre_merge
258+
backend: autodeploy
259+
tests:
260+
# TODO (lucaslie): consider more fine-grained split
261+
- unittest/_torch/auto_deploy/unit/singlegpu

tests/integration/test_lists/test-db/l0_b200.yml

Lines changed: 17 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -94,10 +94,6 @@ l0_b200:
9494
- unittest/_torch/modules/test_fused_moe.py::test_fused_moe_w4a8_nvfp4_fp8[enable_configurable_moe-CUTLASS]
9595
- unittest/_torch/modules/test_fused_moe.py::test_fused_moe_mxfp4_mxfp8[enable_configurable_moe-True-8-256-CUTLASS]
9696
- unittest/_torch/modules/test_fused_moe.py::test_fused_moe_fp8_blockwise_deepgemm[enable_configurable_moe-dtype1-72-256-2560-DefaultMoeRoutingMethod]
97-
# ------------- AutoDeploy tests ---------------
98-
- accuracy/test_llm_api_autodeploy.py::TestLlama3_1_8B::test_auto_dtype[False-1]
99-
- accuracy/test_llm_api_autodeploy.py::TestNemotronMOE::test_fp8
100-
- unittest/_torch/auto_deploy/unit/singlegpu
10197
- condition:
10298
ranges:
10399
system_gpu_count:
@@ -169,3 +165,20 @@ l0_b200:
169165
- accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_fp8_block_scales[enable_configurable_moe-mtp=disable-fp8kv=True-attention_dp=True-cuda_graph=True-overlap_scheduler=True-torch_compile=True]
170166
- accuracy/test_llm_api_pytorch.py::TestSeedOss_36B::test_auto_dtype
171167
- accuracy/test_llm_api_pytorch.py::TestLlama3_1_8B_Instruct_RocketKV::test_auto_dtype
168+
# ------------- AutoDeploy Backend Stages ---------------
169+
- condition:
170+
ranges:
171+
system_gpu_count:
172+
gte: 1
173+
lte: 1
174+
wildcards:
175+
gpu:
176+
- '*b100*'
177+
linux_distribution_name: ubuntu*
178+
terms:
179+
stage: pre_merge
180+
backend: autodeploy
181+
tests:
182+
- accuracy/test_llm_api_autodeploy.py::TestLlama3_1_8B::test_auto_dtype[False-1]
183+
- accuracy/test_llm_api_autodeploy.py::TestNemotronMOE::test_fp8
184+
- unittest/_torch/auto_deploy/unit/singlegpu

tests/integration/test_lists/test-db/l0_dgx_b200.yml

Lines changed: 19 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -32,9 +32,6 @@ l0_dgx_b200:
3232
- disaggregated/test_disaggregated.py::test_disaggregated_deepseek_v3_lite_fp8_nixl[DeepSeek-V3-Lite-fp8]
3333
- accuracy/test_llm_api_pytorch.py::TestDeepSeekR1::test_nvfp4_multi_gpus[latency_adp_lmtp_tp4]
3434
- accuracy/test_llm_api_pytorch.py::TestMiniMaxM2::test_4gpus[attention_dp=False-cuda_graph=True-overlap_scheduler=True-tp_size=4-ep_size=4] TIMEOUT (60)
35-
36-
# ------------- AutoDeploy tests ---------------
37-
- accuracy/test_llm_api_autodeploy.py::TestNemotronMOE::test_bf16
3835
- condition:
3936
ranges:
4037
system_gpu_count:
@@ -203,3 +200,22 @@ l0_dgx_b200:
203200
- accuracy/test_llm_api_pytorch.py::TestLlama3_1_8BInstruct::test_fp8_4gpus[pp4-fp8kv=True-attn_backend=TRTLLM-torch_compile=False]
204201
- accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_nvfp4_4gpus[moe_backend=CUTEDSL-mtp_nextn=2-ep4-fp8kv=True-attention_dp=True-cuda_graph=True-overlap_scheduler=True-torch_compile=False]
205202
- accuracy/test_llm_api_pytorch.py::TestLlama3_3_70BInstruct::test_fp4_tp2pp2[torch_compile=False]
203+
# ------------- AutoDeploy Backend Stages ---------------
204+
- condition:
205+
ranges:
206+
system_gpu_count:
207+
gte: 4
208+
lte: 4
209+
wildcards:
210+
gpu:
211+
- '*b200*'
212+
linux_distribution_name: ubuntu*
213+
cpu: x86_64
214+
terms:
215+
stage: pre_merge
216+
backend: autodeploy
217+
orchestrator: mpi
218+
tests:
219+
- unittest/_torch/auto_deploy/unit/multigpu
220+
- accuracy/test_llm_api_autodeploy.py::TestLlama3_1_8B::test_auto_dtype[False-4]
221+
- accuracy/test_llm_api_autodeploy.py::TestNemotronMOE::test_bf16

tests/integration/test_lists/test-db/l0_dgx_b300.yml

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -67,7 +67,6 @@ l0_dgx_b300:
6767
- accuracy/test_disaggregated_serving.py::TestDeepSeekV3Lite::test_nixl_backend
6868
- accuracy/test_llm_api_pytorch.py::TestDeepSeekR1::test_nvfp4_multi_gpus[throughput_tp4] TIMEOUT (180)
6969
- accuracy/test_llm_api_pytorch.py::TestDeepSeekR1::test_nvfp4_multi_gpus[throughput_pp4_mtp] TIMEOUT (180)
70-
# ------------- AutoDeploy tests ---------------
7170
- condition:
7271
ranges:
7372
system_gpu_count:

tests/integration/test_lists/test-db/l0_dgx_h100.yml

Lines changed: 19 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,6 @@ l0_dgx_h100:
1818
- unittest/llmapi/test_llm_multi_gpu_pytorch.py -m "gpu2"
1919
- unittest/llmapi/test_additional_model_outputs.py -m "gpu2"
2020
- unittest/_torch/multi_gpu -m "not post_merge" TIMEOUT (90)
21-
- unittest/_torch/auto_deploy/unit/multigpu
2221
- unittest/_torch/modeling/test_modeling_pixtral.py::test_tensor_parallelism
2322
# ------------- Disaggregated serving tests ---------------
2423
- accuracy/test_disaggregated_serving.py::TestLlama3_1_8BInstruct::test_eagle3[eagle3_one_model=False-overlap_scheduler=False]
@@ -44,8 +43,6 @@ l0_dgx_h100:
4443
- accuracy/test_disaggregated_serving.py::TestLlama3_1_8BInstruct::test_auto_dtype[True-True-True]
4544
- unittest/llmapi/apps/test_disagg_serving_perf_metrics.py
4645
- disaggregated/test_disaggregated.py::test_disaggregated_cancel_large_context_requests[DeepSeek-V3-Lite-bf16]
47-
# ------------- AutoDeploy tests ---------------
48-
- accuracy/test_llm_api_autodeploy.py::TestLlama3_1_8B::test_auto_dtype[False-2]
4946
# llmapi
5047
- unittest/llmapi/test_mpi_session.py::test_llmapi_launch_multiple_tasks
5148
# ------------- Skip softmax attention tests ---------------
@@ -130,8 +127,6 @@ l0_dgx_h100:
130127
- disaggregated/test_auto_scaling.py::test_worker_restart[http-load_balancing]
131128
- disaggregated/test_auto_scaling.py::test_minimal_instances[http-round_robin]
132129
- disaggregated/test_auto_scaling.py::test_disagg_server_restart[http-round_robin]
133-
# ------------- AutoDeploy tests ---------------
134-
- accuracy/test_llm_api_autodeploy.py::TestNemotronMOE::test_bf16
135130
- condition:
136131
ranges:
137132
system_gpu_count:
@@ -308,3 +303,22 @@ l0_dgx_h100:
308303
- unittest/llmapi/test_async_llm.py -m "gpu4"
309304
- ray_orchestrator/RL/test_rl_perf_reproduce.py::test_rl_perf_reproduce[tp2_2instances]
310305
- ray_orchestrator/RL/test_rl_perf_reproduce.py::test_rl_perf_reproduce[tp1_4instances]
306+
# ------------- AutoDeploy Backend Stages ---------------
307+
- condition:
308+
ranges:
309+
system_gpu_count:
310+
gte: 4
311+
lte: 4
312+
wildcards:
313+
gpu:
314+
- '*h100*'
315+
linux_distribution_name: ubuntu*
316+
terms:
317+
stage: pre_merge
318+
backend: autodeploy
319+
auto_trigger: others
320+
orchestrator: mpi
321+
tests:
322+
- unittest/_torch/auto_deploy/unit/multigpu
323+
- accuracy/test_llm_api_autodeploy.py::TestLlama3_1_8B::test_auto_dtype[False-4]
324+
- accuracy/test_llm_api_autodeploy.py::TestNemotronMOE::test_bf16

tests/integration/test_lists/test-db/l0_dgx_h200.yml

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -131,10 +131,6 @@ l0_dgx_h200:
131131
- disaggregated/test_disaggregated.py::test_disaggregated_benchmark_on_diff_backends[llama-v3-8b-hf]
132132
- disaggregated/test_disaggregated.py::test_disaggregated_benchmark_on_diff_backends[llama-3.1-8b-instruct-hf-fp8]
133133
- disaggregated/test_disaggregated.py::test_disaggregated_benchmark_on_diff_backends[DeepSeek-V3-Lite-fp8]
134-
# ------------- AutoDeploy tests ---------------
135-
- accuracy/test_llm_api_autodeploy.py::TestLlama3_1_8B::test_auto_dtype[False-4]
136-
- accuracy/test_llm_api_autodeploy.py::TestNemotronMOE::test_bf16
137-
- accuracy/test_llm_api_autodeploy.py::TestNemotronSuperV3::test_bf16
138134
- condition:
139135
ranges:
140136
system_gpu_count:

tests/integration/test_lists/test-db/l0_h100.yml

Lines changed: 25 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -111,16 +111,6 @@ l0_h100:
111111
- test_e2e.py::test_openai_chat_harmony
112112
- test_e2e.py::test_openai_responses
113113
- test_e2e.py::test_trtllm_benchmark_serving[llama-3.1-model/Meta-Llama-3.1-8B]
114-
# ------------- AutoDeploy tests ---------------
115-
- accuracy/test_llm_api_autodeploy.py::TestLlama3_1_8B::test_auto_dtype[False-1]
116-
- accuracy/test_llm_api_autodeploy.py::TestLlama3_1_8B::test_auto_dtype[True-1]
117-
- accuracy/test_llm_api_autodeploy.py::TestNemotronH::test_auto_dtype[False]
118-
- accuracy/test_llm_api_autodeploy.py::TestNemotronH::test_auto_dtype[True]
119-
- accuracy/test_llm_api_autodeploy.py::TestNemotronMOE::test_fp8
120-
- accuracy/test_llm_api_autodeploy.py::TestNemotronMOE::test_bf16
121-
- examples/test_ad_speculative_decoding.py::test_autodeploy_spec_dec_output[draft_target]
122-
- examples/test_ad_speculative_decoding.py::test_autodeploy_spec_dec_output[eagle3]
123-
- examples/test_ad_speculative_decoding.py::test_autodeploy_eagle3_acceptance_rate
124114
- condition:
125115
ranges:
126116
system_gpu_count:
@@ -422,3 +412,28 @@ l0_h100:
422412
backend: fmha
423413
tests:
424414
- test_fmha.py::test_fmha TIMEOUT (90)
415+
# ------------- AutoDeploy Backend Stages ---------------
416+
- condition:
417+
ranges:
418+
system_gpu_count:
419+
gte: 1
420+
lte: 1
421+
wildcards:
422+
gpu:
423+
- '*h100*'
424+
linux_distribution_name: ubuntu*
425+
terms:
426+
stage: pre_merge
427+
backend: autodeploy
428+
orchestrator: mpi
429+
tests:
430+
- unittest/_torch/auto_deploy/unit/singlegpu
431+
- accuracy/test_llm_api_autodeploy.py::TestLlama3_1_8B::test_auto_dtype[False-1]
432+
- accuracy/test_llm_api_autodeploy.py::TestLlama3_1_8B::test_auto_dtype[True-1]
433+
- accuracy/test_llm_api_autodeploy.py::TestNemotronH::test_auto_dtype[False]
434+
- accuracy/test_llm_api_autodeploy.py::TestNemotronH::test_auto_dtype[True]
435+
- accuracy/test_llm_api_autodeploy.py::TestNemotronMOE::test_fp8
436+
- accuracy/test_llm_api_autodeploy.py::TestNemotronMOE::test_bf16
437+
- examples/test_ad_speculative_decoding.py::test_autodeploy_spec_dec_output[draft_target]
438+
- examples/test_ad_speculative_decoding.py::test_autodeploy_spec_dec_output[eagle3]
439+
- examples/test_ad_speculative_decoding.py::test_autodeploy_eagle3_acceptance_rate

tests/integration/test_lists/waives.txt

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -230,7 +230,6 @@ full:H100_PCIe/unittest/llmapi/test_llm_pytorch.py::test_llama_7b_multi_lora_evi
230230
unittest/_torch/speculative/test_draft_len_schedule.py::test_correctness_across_batch_sizes[model_drafter-schedule1] SKIP (https://nvbugs/5680911)
231231
accuracy/test_llm_api_pytorch.py::TestSeedOss_36B::test_auto_dtype SKIP (https://nvbugs/5612438)
232232
accuracy/test_llm_api_autodeploy.py::TestNemotronH::test_auto_dtype[True] SKIP (https://nvbugs/5688721)
233-
accuracy/test_llm_api_autodeploy.py::TestLlama3_1_8B::test_auto_dtype[False-2] SKIP (https://nvbugs/5769712)
234233
accuracy/test_llm_api_autodeploy.py::TestLlama3_1_8B::test_auto_dtype[False-4] SKIP (https://nvbugs/5769712)
235234
test_e2e.py::test_openai_completions_example[trt] SKIP (https://nvbugs/5701450)
236235
accuracy/test_llm_api_pytorch.py::TestLlama3_1_8BInstruct::test_fp8_4gpus[tp4-fp8kv=False-attn_backend=TRTLLM-torch_compile=False] SKIP (https://nvbugs/5701457)

0 commit comments

Comments
 (0)