fix

wangzaijun · wangzaijun · commit 4b7c4a153812 · 2025-12-17T05:37:40.000Z
diff --git a/lightllm/common/basemodel/basemodel.py b/lightllm/common/basemodel/basemodel.py
@@ -410,8 +410,13 @@ def _create_unpad_prefill_model_output(self, model_output: ModelOutput, origin_h
         if handle_token_num == origin_handle_token_num:
             return model_output
 
-        new_model_output = copy.copy(model_output)
-        new_model_output.logits = new_model_output.logits[0:origin_handle_token_num]
+        if self.return_all_prompt_logics:
+            new_model_output = copy.copy(model_output)
+            new_model_output.logits = new_model_output.logits[0:origin_handle_token_num]
+        else:
+            new_model_output = copy.copy(model_output)
+            # 移除多余的pad 的那个 req 对应的 logics
+            new_model_output.logits = new_model_output.logits[0:-1]
 
         # 特殊模型，特殊模式的特殊变量的特殊 unpad
         if new_model_output.deepseekv3_mtp_main_output_hiddens is not None:
diff --git a/lightllm/common/basemodel/triton_kernel/gather_token_id.py b/lightllm/common/basemodel/triton_kernel/gather_token_id.py
@@ -61,7 +61,9 @@ def scatter_token(
         b_req_idx: (batch_size,)
         b_mtp_index: (batch_size,)
     """
-    assert next_token_ids.shape[0] == b_req_idx.shape[0]
+    assert (
+        next_token_ids.shape[0] == b_req_idx.shape[0]
+    ), f"batch size not match, {next_token_ids.shape[0]} != {b_req_idx.shape[0]}"
     batch_size = b_req_idx.shape[0]
     BLOCK = 256