Skip to content

Commit 129290d

Browse files
committed
Add dsr1 and gpt-oss test cases
Signed-off-by: Chenfei Zhang <[email protected]>
1 parent b25c91e commit 129290d

File tree

6 files changed

+82
-254
lines changed

6 files changed

+82
-254
lines changed

jenkins/L0_Test.groovy

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2731,7 +2731,6 @@ def launchTestJobs(pipeline, testFilter)
27312731
// "GB300-PyTorch-1": ["gb300-single", "l0_gb300", 1, 1],
27322732
"GB200-4_GPUs-PyTorch-1": ["gb200-trtllm", "l0_gb200_multi_gpus", 1, 1, 4],
27332733
"GB200-4_GPUs-PyTorch-Post-Merge-1": ["gb200-trtllm", "l0_gb200_multi_gpus", 1, 1, 4],
2734-
"GB200-4_GPUs-PyTorch-Perf-Sanity-Post-Merge-1": ["gb200-trtllm", "perf_sanity_l0_gb200_multi_gpus", 1, 1, 4],
27352734
// "GB300-4_GPUs-PyTorch-Post-Merge-1": ["gb300-trtllm", "l0_gb300_multi_gpus", 1, 1, 4],
27362735
]
27372736
fullSet += SBSASlurmTestConfigs.keySet()

tests/integration/defs/perf/test_perf.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -686,8 +686,10 @@ def generate_extra_llm_api_config(self) -> str:
686686
if self.max_draft_len > 0:
687687
config_lines.append(f" max_draft_len: {self.max_draft_len}")
688688
if self.speculative_model_dir:
689+
spec_model_dir = os.path.join(llm_models_root(),
690+
self.speculative_model_dir)
689691
config_lines.append(
690-
f" speculative_model_dir: {self.speculative_model_dir}")
692+
f" speculative_model_dir: {spec_model_dir}")
691693

692694
return "\n".join(config_lines)
693695

tests/integration/test_lists/test-db/perf_sanity_l0_dgx_b200.yml

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -3,8 +3,8 @@ perf_sanity_l0_dgx_b200:
33
- condition:
44
ranges:
55
system_gpu_count:
6-
gte: 4
7-
lte: 4
6+
gte: 8
7+
lte: 8
88
wildcards:
99
gpu:
1010
- '*b200*'
@@ -15,13 +15,13 @@ perf_sanity_l0_dgx_b200:
1515
backend: pytorch
1616
orchestrator: mpi
1717
tests:
18-
- perf/test_perf.py::test_perf[perf_sanity_upload-l0_dgx_b200-r1_fp4_dep4,r1_fp4_tep4,r1_fp4_v2_dep4_mtp1,r1_fp4_v2_tep4_mtp3,gpt_oss_dep4,gpt_oss_tep4]
18+
- perf/test_perf.py::test_perf[perf_sanity_upload-l0_dgx_b200-r1_fp8_dep4_mtp1,r1_fp8_tep4_mtp3,gpt_oss_fp4_eagle3_tp8]
1919

2020
- condition:
2121
ranges:
2222
system_gpu_count:
23-
gte: 4
24-
lte: 4
23+
gte: 8
24+
lte: 8
2525
wildcards:
2626
gpu:
2727
- '*b200*'
@@ -32,13 +32,13 @@ perf_sanity_l0_dgx_b200:
3232
backend: pytorch
3333
orchestrator: mpi
3434
tests:
35-
- perf/test_perf.py::test_perf[perf_sanity_upload-l0_dgx_b200-r1_fp4_dep4,r1_fp4_tep4,r1_fp4_v2_dep4_mtp1,r1_fp4_v2_tep4_mtp3,gpt_oss_dep4,gpt_oss_tep4]
35+
- perf/test_perf.py::test_perf[perf_sanity_upload-l0_dgx_b200-r1_fp8_dep4_mtp1,r1_fp8_tep4_mtp3,gpt_oss_fp4_eagle3_tp8]
3636

3737
- condition:
3838
ranges:
3939
system_gpu_count:
40-
gte: 8
41-
lte: 8
40+
gte: 4
41+
lte: 4
4242
wildcards:
4343
gpu:
4444
- '*b200*'
@@ -49,13 +49,13 @@ perf_sanity_l0_dgx_b200:
4949
backend: pytorch
5050
orchestrator: mpi
5151
tests:
52-
- perf/test_perf.py::test_perf[perf_sanity_upload-l0_dgx_b200-r1_fp8_dep4_mtp1,r1_fp8_tep4_mtp3]
52+
- perf/test_perf.py::test_perf[perf_sanity_upload-l0_dgx_b200-r1_fp4_v2_dep4_mtp1,r1_fp4_v2_tep4_mtp3,gpt_oss_fp4_dep2,gpt_oss_fp4_dep4]
5353

5454
- condition:
5555
ranges:
5656
system_gpu_count:
57-
gte: 8
58-
lte: 8
57+
gte: 4
58+
lte: 4
5959
wildcards:
6060
gpu:
6161
- '*b200*'
@@ -66,4 +66,4 @@ perf_sanity_l0_dgx_b200:
6666
backend: pytorch
6767
orchestrator: mpi
6868
tests:
69-
- perf/test_perf.py::test_perf[perf_sanity_upload-l0_dgx_b200-r1_fp8_dep4_mtp1,r1_fp8_tep4_mtp3]
69+
- perf/test_perf.py::test_perf[perf_sanity_upload-l0_dgx_b200-r1_fp4_v2_dep4_mtp1,r1_fp4_v2_tep4_mtp3,gpt_oss_fp4_dep2,gpt_oss_fp4_dep4]

tests/integration/test_lists/test-db/perf_sanity_l0_gb200_multi_gpus.yml

Lines changed: 0 additions & 35 deletions
This file was deleted.

tests/scripts/perf-sanity/l0_dgx_b200.yaml

Lines changed: 67 additions & 93 deletions
Original file line numberDiff line numberDiff line change
@@ -1,61 +1,60 @@
11
server_configs:
2-
- name: "r1_fp4_dep4"
3-
model_name: "deepseek_r1_0528_fp4"
4-
gpus: 4
5-
tp: 4
6-
ep: 4
2+
- name: "r1_fp8_dep4_mtp1"
3+
model_name: "deepseek_r1_0528_fp8"
4+
gpus: 8
5+
tp: 8
6+
ep: 8
77
pp: 1
88
attention_backend: "TRTLLM"
9-
moe_backend: "CUTLASS"
9+
moe_backend: "DEEPGEMM"
1010
enable_attention_dp: true
11-
enable_chunked_prefill: false
12-
max_num_tokens: 2176
11+
batching_wait_iter: 0
12+
enable_balance: true
13+
timeout_iters: 60
14+
max_batch_size: 512
15+
max_num_tokens: 2112
1316
kv_cache_dtype: "fp8"
1417
free_gpu_memory_fraction: 0.8
15-
max_batch_size: 256
18+
cuda_graph_max_batch_size: 512
1619
enable_padding: true
20+
spec_decoding_type: "MTP"
21+
num_nextn_predict_layers: 1
1722
client_configs:
18-
- name: "con1_iter1_1024_1024"
19-
concurrency: 1
20-
iterations: 1
21-
isl: 1024
22-
osl: 1024
23-
random_range_ratio: 0.0
24-
- name: "con8_iter1_1024_1024"
25-
concurrency: 8
26-
iterations: 1
23+
- name: "con1024_iter10_1k1k"
24+
concurrency: 1024
25+
iterations: 10
2726
isl: 1024
2827
osl: 1024
29-
random_range_ratio: 0.0
28+
random_range_ratio: 1.0
29+
backend: "openai"
30+
use_chat_template: true
3031

31-
- name: "r1_fp4_tep4"
32-
model_name: "deepseek_r1_0528_fp4"
33-
gpus: 4
34-
tp: 4
35-
ep: 4
32+
- name: "r1_fp8_tep4_mtp3"
33+
model_name: "deepseek_r1_0528_fp8"
34+
gpus: 8
35+
tp: 8
36+
ep: 1
3637
pp: 1
3738
attention_backend: "TRTLLM"
38-
moe_backend: "CUTLASS"
39+
moe_backend: "TRTLLM"
3940
enable_attention_dp: false
40-
enable_chunked_prefill: false
41-
max_num_tokens: 2176
41+
max_batch_size: 32
42+
max_num_tokens: 3136
4243
kv_cache_dtype: "fp8"
4344
free_gpu_memory_fraction: 0.8
44-
max_batch_size: 256
45+
cuda_graph_max_batch_size: 32
4546
enable_padding: true
47+
spec_decoding_type: "MTP"
48+
num_nextn_predict_layers: 3
4649
client_configs:
47-
- name: "con1_iter1_1024_1024"
48-
concurrency: 1
49-
iterations: 1
50-
isl: 1024
51-
osl: 1024
52-
random_range_ratio: 0.0
53-
- name: "con8_iter1_1024_1024"
54-
concurrency: 8
55-
iterations: 1
50+
- name: "con32_iter10_1k1k"
51+
concurrency: 32
52+
iterations: 10
5653
isl: 1024
5754
osl: 1024
58-
random_range_ratio: 0.0
55+
random_range_ratio: 1.0
56+
backend: "openai"
57+
use_chat_template: true
5958

6059
- name: "r1_fp4_v2_dep4_mtp1"
6160
model_name: "deepseek_r1_0528_fp4_v2"
@@ -114,78 +113,52 @@ server_configs:
114113
backend: "openai"
115114
use_chat_template: true
116115

117-
- name: "r1_fp8_dep4_mtp1"
118-
model_name: "deepseek_r1_0528_fp8"
119-
gpus: 8
120-
tp: 8
121-
ep: 8
122-
pp: 1
123-
attention_backend: "TRTLLM"
124-
moe_backend: "DEEPGEMM"
125-
enable_attention_dp: true
126-
batching_wait_iter: 0
127-
enable_balance: true
128-
timeout_iters: 60
129-
max_batch_size: 512
130-
max_num_tokens: 2112
131-
kv_cache_dtype: "fp8"
132-
free_gpu_memory_fraction: 0.8
133-
cuda_graph_max_batch_size: 512
134-
enable_padding: true
135-
spec_decoding_type: "MTP"
136-
num_nextn_predict_layers: 1
137-
client_configs:
138-
- name: "con1024_iter10_1k1k"
139-
concurrency: 1024
140-
iterations: 10
141-
isl: 1024
142-
osl: 1024
143-
random_range_ratio: 1.0
144-
backend: "openai"
145-
use_chat_template: true
146-
147-
- name: "r1_fp8_tep4_mtp3"
148-
model_name: "deepseek_r1_0528_fp8"
116+
- name: "gpt_oss_fp4_eagle3_tp8"
117+
model_name: "gpt_oss_120b_fp4"
149118
gpus: 8
150119
tp: 8
151120
ep: 1
152121
pp: 1
153122
attention_backend: "TRTLLM"
154123
moe_backend: "TRTLLM"
155124
enable_attention_dp: false
156-
max_batch_size: 32
157-
max_num_tokens: 3136
125+
max_batch_size: 1
126+
max_num_tokens: 20000
158127
kv_cache_dtype: "fp8"
159128
free_gpu_memory_fraction: 0.8
160-
cuda_graph_max_batch_size: 32
129+
cuda_graph_max_batch_size: 1
161130
enable_padding: true
162-
spec_decoding_type: "MTP"
163-
num_nextn_predict_layers: 3
131+
num_postprocess_workers: 4
132+
stream_interval: 20
133+
spec_decoding_type: "Eagle"
134+
eagle3_layers_to_capture: 1
135+
max_draft_len: 3
136+
speculative_model_dir: "gpt_oss/gpt-oss-120b-Eagle3"
164137
client_configs:
165-
- name: "con32_iter10_1k1k"
166-
concurrency: 32
167-
iterations: 10
138+
- name: "con1_iter32_1k1k"
139+
concurrency: 1
140+
iterations: 32
168141
isl: 1024
169142
osl: 1024
170-
random_range_ratio: 1.0
143+
random_range_ratio: 0.8
171144
backend: "openai"
172145
use_chat_template: true
173146

174-
- name: "gpt_oss_dep4"
147+
- name: "gpt_oss_fp4_dep2"
175148
model_name: "gpt_oss_120b_fp4"
176-
gpus: 4
177-
tp: 4
178-
ep: 4
149+
gpus: 2
150+
tp: 2
151+
ep: 2
179152
pp: 1
180153
attention_backend: "TRTLLM"
181154
moe_backend: "TRTLLM"
182155
enable_attention_dp: true
183156
enable_balance: true
184-
max_batch_size: 512
157+
max_batch_size: 1024
185158
max_num_tokens: 20000
186159
kv_cache_dtype: "fp8"
187160
free_gpu_memory_fraction: 0.8
188-
cuda_graph_max_batch_size: 512
161+
cuda_graph_max_batch_size: 1024
189162
enable_padding: true
190163
num_postprocess_workers: 4
191164
stream_interval: 20
@@ -199,27 +172,28 @@ server_configs:
199172
backend: "openai"
200173
use_chat_template: true
201174

202-
- name: "gpt_oss_tep4"
175+
- name: "gpt_oss_fp4_dep4"
203176
model_name: "gpt_oss_120b_fp4"
204177
gpus: 4
205178
tp: 4
206-
ep: 1
179+
ep: 4
207180
pp: 1
208181
attention_backend: "TRTLLM"
209182
moe_backend: "TRTLLM"
210-
enable_attention_dp: false
211-
max_batch_size: 64
183+
enable_attention_dp: true
184+
enable_balance: true
185+
max_batch_size: 512
212186
max_num_tokens: 20000
213187
kv_cache_dtype: "fp8"
214188
free_gpu_memory_fraction: 0.8
215-
cuda_graph_max_batch_size: 64
189+
cuda_graph_max_batch_size: 512
216190
enable_padding: true
217191
num_postprocess_workers: 4
218192
stream_interval: 20
219193
client_configs:
220-
- name: "con64_iter3_1k1k"
221-
concurrency: 64
222-
iterations: 3
194+
- name: "con2048_iter5_1k1k"
195+
concurrency: 2048
196+
iterations: 5
223197
isl: 1024
224198
osl: 1024
225199
random_range_ratio: 1.0

0 commit comments

Comments
 (0)