Fix: Ensure stage_id is correctly passed to OmniEngineArgs

usberkeley · usberkeley · commit 8e586d1652bf · 2026-01-15T21:26:19.000+08:00
Signed-off-by: Bradley &lt;bradley.b.pitt@gmail.com&gt;
diff --git a/vllm_omni/model_executor/models/hunyuan_image3_0/hunyuan_image3_0.py b/vllm_omni/model_executor/models/hunyuan_image3_0/hunyuan_image3_0.py
@@ -1056,6 +1056,7 @@ def forward_block(
     def compute_logits(
         self,
         hidden_states: torch.Tensor,
+        sampling_metadata: SamplingMetadata | None = None,
     ) -> torch.Tensor | None:
         logits = self.logits_processor(self.lm_head, hidden_states)
         return logits
diff --git a/vllm_omni/model_executor/stage_configs/hunyuan_image_3_moe.yaml b/vllm_omni/model_executor/stage_configs/hunyuan_image_3_moe.yaml
@@ -5,8 +5,8 @@ stage_args:
   - stage_id: 0
     stage_type: llm  # Use llm stage type to launch OmniLLM
     runtime:
-      process: true            # Run this stage in a separate process
-      devices: "0"            # Visible devices for this stage (CUDA_VISIBLE_DEVICES/torch.cuda.set_device)
+      process: true               # Run this stage in a separate process
+      devices: "0,1,2,3,4,5,6,7"  # Visible devices for this stage (CUDA_VISIBLE_DEVICES/torch.cuda.set_device)
       max_batch_size: 1
     engine_args:
       model_stage: AR
@@ -19,7 +19,7 @@ stage_args:
       engine_output_type: latent
       enable_prefix_caching: false
       max_num_batched_tokens: 32768
-      tensor_parallel_size: 5
+      tensor_parallel_size: 8
       pipeline_parallel_size: 1
     is_comprehension: true
     final_output: true