Skip to content

Commit e03e328

Browse files
committed
restrict max_num_tokens in disagg mtp config
Signed-off-by: Ruodi Lu <ruodil@users.noreply.github.com>
1 parent 9985277 commit e03e328

12 files changed

+24
-12
lines changed

tests/integration/defs/perf/disagg/test_configs/disagg/perf/deepseek-r1-fp4_128k8k_ctx1_pp4_gen6_tep8_bs1_eplb0_mtp3-Default.yaml

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,8 @@ worker_config:
4949
enable_attention_dp: false
5050
pipeline_parallel_size: 4
5151
max_batch_size: 1
52-
max_num_tokens: 128
52+
# mtp_size=3 ⇒ max_num_tokens = 1 * (3 + 1) = 4
53+
max_num_tokens: 4
5354
max_seq_len: 139296
5455
cuda_graph_config:
5556
enable_padding: true

tests/integration/defs/perf/disagg/test_configs/disagg/perf/deepseek-r1-fp4_128k8k_ctx1_pp8_gen1_dep16_bs1_eplb0_mtp3-Default.yaml

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,8 @@ worker_config:
4949
enable_attention_dp: true
5050
pipeline_parallel_size: 1
5151
max_batch_size: 1
52-
max_num_tokens: 128
52+
# mtp_size=3 ⇒ max_num_tokens = 1 * (3 + 1) = 4
53+
max_num_tokens: 4
5354
max_seq_len: 139296
5455
cuda_graph_config:
5556
enable_padding: true

tests/integration/defs/perf/disagg/test_configs/disagg/perf/deepseek-r1-fp4_128k8k_ctx1_pp8_gen1_dep8_bs4_eplb0_mtp2-Default.yaml

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,8 @@ worker_config:
4949
enable_attention_dp: true
5050
pipeline_parallel_size: 1
5151
max_batch_size: 4
52-
max_num_tokens: 128
52+
# mtp_size=2 ⇒ max_num_tokens = 4 * (2 + 1) = 12
53+
max_num_tokens: 12
5354
max_seq_len: 139296
5455
cuda_graph_config:
5556
enable_padding: true

tests/integration/defs/perf/disagg/test_configs/disagg/perf/deepseek-r1-fp4_128k8k_ctx1_pp8_gen1_tep8_bs2_eplb0_mtp3-Default.yaml

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,8 @@ worker_config:
4949
enable_attention_dp: false
5050
pipeline_parallel_size: 1
5151
max_batch_size: 2
52-
max_num_tokens: 128
52+
# mtp_size=3 ⇒ max_num_tokens = 2 * (3 + 1) = 8
53+
max_num_tokens: 8
5354
max_seq_len: 139296
5455
cuda_graph_config:
5556
enable_padding: true

tests/integration/defs/perf/disagg/test_configs/disagg/perf/deepseek-r1-fp4_128k8k_ctx3_pp8_gen1_dep16_bs8_eplb0_mtp2-Default.yaml

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,8 @@ worker_config:
4949
enable_attention_dp: true
5050
pipeline_parallel_size: 1
5151
max_batch_size: 8
52-
max_num_tokens: 128
52+
# mtp_size=2 ⇒ max_num_tokens = 8 * (2 + 1) = 24
53+
max_num_tokens: 24
5354
max_seq_len: 139296
5455
cuda_graph_config:
5556
enable_padding: true

tests/integration/defs/perf/disagg/test_configs/disagg/perf/deepseek-r1-fp4_128k8k_ctx3_pp8_gen1_dep32_bs2_eplb0_mtp3-Default.yaml

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,8 @@ worker_config:
4949
enable_attention_dp: true
5050
pipeline_parallel_size: 1
5151
max_batch_size: 2
52-
max_num_tokens: 128
52+
# mtp_size=3 ⇒ max_num_tokens = 2 * (3 + 1) = 8
53+
max_num_tokens: 8
5354
max_seq_len: 139296
5455
cuda_graph_config:
5556
enable_padding: true

tests/integration/defs/perf/disagg/test_configs/disagg/perf/deepseek-r1-fp4_128k8k_ctx5_pp4_gen1_dep16_bs8_eplb0_mtp3-Default.yaml

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,8 @@ worker_config:
4949
enable_attention_dp: true
5050
pipeline_parallel_size: 1
5151
max_batch_size: 8
52-
max_num_tokens: 128
52+
# mtp_size=3 ⇒ max_num_tokens = 8 * (3 + 1) = 32
53+
max_num_tokens: 32
5354
max_seq_len: 139296
5455
cuda_graph_config:
5556
enable_padding: true

tests/integration/defs/perf/disagg/test_configs/disagg/perf/deepseek-r1-fp4_128k8k_ctx5_pp4_gen1_dep32_bs2_eplb0_mtp3-Default.yaml

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,8 @@ worker_config:
4949
enable_attention_dp: true
5050
pipeline_parallel_size: 1
5151
max_batch_size: 2
52-
max_num_tokens: 128
52+
# mtp_size=3 ⇒ max_num_tokens = 2 * (3 + 1) = 8
53+
max_num_tokens: 8
5354
max_seq_len: 139296
5455
cuda_graph_config:
5556
enable_padding: true

tests/integration/defs/perf/disagg/test_configs/disagg/perf/deepseek-r1-fp4_128k8k_ctx7_pp4_gen1_dep16_bs16_eplb0_mtp1-Default.yaml

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,8 @@ worker_config:
4949
enable_attention_dp: true
5050
pipeline_parallel_size: 1
5151
max_batch_size: 16
52-
max_num_tokens: 128
52+
# mtp_size=1 ⇒ max_num_tokens = 16 * (1 + 1) = 32
53+
max_num_tokens: 32
5354
max_seq_len: 139296
5455
cuda_graph_config:
5556
enable_padding: true

tests/integration/defs/perf/disagg/test_configs/disagg/perf/deepseek-r1-fp4_128k8k_ctx8_pp4_gen1_dep16_bs32_eplb0_mtp1-Default.yaml

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,8 @@ worker_config:
4949
enable_attention_dp: true
5050
pipeline_parallel_size: 1
5151
max_batch_size: 32
52-
max_num_tokens: 128
52+
# mtp_size=1 ⇒ max_num_tokens = 32 * (1 + 1) = 64
53+
max_num_tokens: 64
5354
max_seq_len: 139296
5455
cuda_graph_config:
5556
enable_padding: true

0 commit comments

Comments
 (0)