fix: use H200 default chunked-prefill-size of 8192

PierreLeGuen · claude · PierreLeGuen · commit bc09379848ad · 2026-03-06T14:28:15.000-08:00
4096 was below SGLang's auto-detected default for H200 GPUs (&lt;160GB),
which unnecessarily limited prefill throughput.

Co-Authored-By: Claude Opus 4.6 &lt;noreply@anthropic.com&gt;
diff --git a/Qwen3.5-122B.yaml b/Qwen3.5-122B.yaml
@@ -54,7 +54,7 @@ x-sglang-qwen35-122b-common: &sglang-qwen35-122b-common
     --mem-fraction-static 0.88
     --context-length 262144
     --kv-cache-dtype fp8_e4m3
-    --chunked-prefill-size 4096
+    --chunked-prefill-size 8192
     --attention-backend flashinfer
     --schedule-conservativeness 0.5
     --reasoning-parser qwen3