Super tiny enable draft-weights-cpu-backup to avoid MTP acc len issue (#971)

fzyzcjy · web-flow · commit 2cb386ee771f · 2025-11-30T21:34:20.000+08:00
diff --git a/docs/en/advanced/speculative-decoding.md b/docs/en/advanced/speculative-decoding.md
@@ -11,6 +11,7 @@ For models with MTP layers (e.g., GLM-4.6, DeepSeek-V3/R1), simply add:
 --sglang-speculative-num-steps 3
 --sglang-speculative-eagle-topk 1
 --sglang-speculative-num-draft-tokens 4
+--sglang-enable-draft-weights-cpu-backup
 ```
 
 If you want to use a separately trained draft model (e.g., one trained with [SpecForge](https://docs.sglang.ai/SpecForge/)), also set:
diff --git a/scripts/run-glm4.5-355B-A32B.sh b/scripts/run-glm4.5-355B-A32B.sh
@@ -122,6 +122,7 @@ SGLANG_ARGS=(
    --sglang-speculative-num-steps 1
    --sglang-speculative-eagle-topk 1
    --sglang-speculative-num-draft-tokens 2
+   --sglang-enable-draft-weights-cpu-backup
 
 )
 
diff --git a/scripts/run-mimo-7B-rl-eagle.sh b/scripts/run-mimo-7B-rl-eagle.sh
@@ -113,6 +113,7 @@ SGLANG_ARGS=(
    --sglang-speculative-num-steps 3
    --sglang-speculative-eagle-topk 1
    --sglang-speculative-num-draft-tokens 4
+   --sglang-enable-draft-weights-cpu-backup
 )
 
 MISC_ARGS=(
diff --git a/scripts/run-qwen3-next-80B-A3B.sh b/scripts/run-qwen3-next-80B-A3B.sh
@@ -129,6 +129,7 @@ SGLANG_ARGS=(
    --sglang-speculative-num-steps 2
    --sglang-speculative-eagle-topk 1
    --sglang-speculative-num-draft-tokens 3
+   --sglang-enable-draft-weights-cpu-backup
 
    --sglang-max-running-requests 512
 )

Original file line number	Diff line number	Diff line change
`@@ -122,6 +122,7 @@ SGLANG_ARGS=(`
`122`	`122`	`--sglang-speculative-num-steps 1`
`123`	`123`	`--sglang-speculative-eagle-topk 1`
`124`	`124`	`--sglang-speculative-num-draft-tokens 2`
	`125`	`+ --sglang-enable-draft-weights-cpu-backup`
`125`	`126`
`126`	`127`	`)`
`127`	`128`
Original file line number	Diff line number	Diff line change
`@@ -113,6 +113,7 @@ SGLANG_ARGS=(`
`113`	`113`	`--sglang-speculative-num-steps 3`
`114`	`114`	`--sglang-speculative-eagle-topk 1`
`115`	`115`	`--sglang-speculative-num-draft-tokens 4`
	`116`	`+ --sglang-enable-draft-weights-cpu-backup`
`116`	`117`	`)`
`117`	`118`
`118`	`119`	`MISC_ARGS=(`
Original file line number	Diff line number	Diff line change
`@@ -129,6 +129,7 @@ SGLANG_ARGS=(`
`129`	`129`	`--sglang-speculative-num-steps 2`
`130`	`130`	`--sglang-speculative-eagle-topk 1`
`131`	`131`	`--sglang-speculative-num-draft-tokens 3`
	`132`	`+ --sglang-enable-draft-weights-cpu-backup`
`132`	`133`
`133`	`134`	`--sglang-max-running-requests 512`
`134`	`135`	`)`