Add comment explaining max_batch_size assumption in bagel.yaml

princepride · cursoragent · princepride · commit c4ef3897c16f · 2026-02-24T14:48:14.000+08:00
Stage-0 max_batch_size=2 assumes single-prompt inference (1 user + 1
CFG companion). For multi-prompt batches it should scale accordingly.

Co-authored-by: Cursor &lt;cursoragent@cursor.com&gt;
diff --git a/vllm_omni/model_executor/stage_configs/bagel.yaml b/vllm_omni/model_executor/stage_configs/bagel.yaml
@@ -6,6 +6,8 @@ stage_args:
     prompt_expand_func: vllm_omni.model_executor.stage_input_processors.bagel.expand_cfg_prompts
     runtime:
       devices: "0"
+      # 2 = 1 user prompt + 1 CFG companion (text-unconditional).
+      # For multi-prompt batches this should scale as batch_size × 2.
       max_batch_size: 2
     engine_args:
       model_stage: thinker