fix

hiworldwzj · hiworldwzj · commit dff861860cf9 · 2025-07-21T03:37:33.000Z
diff --git a/lightllm/common/basemodel/cuda_graph.py b/lightllm/common/basemodel/cuda_graph.py
@@ -258,13 +258,15 @@ def warmup_overlap(self, model):
                 )
                 b_seq_len = torch.empty(batch_size, dtype=torch.int32, device="cuda")
                 b_seq_len.fill_(seq_len)
+                b_mtp_index = torch.zeros(batch_size, dtype=torch.int32, device="cuda")
 
                 micro_batch = ModelInput(
                     is_prefill=False,
                     batch_size=batch_size,
                     total_token_num=total_token_num,
                     max_len_in_batch=max_len_in_batch,
                     input_ids=input_ids,
+                    b_mtp_index=b_mtp_index,
                     mem_indexes=mem_indexes,
                     b_req_idx=b_req_idx,
                     b_seq_len=b_seq_len,