improve postprocess

hiworldwzj · hiworldwzj · commit eef9d35db05c · 2025-05-28T16:19:56.000+08:00
diff --git a/lightllm/server/router/model_infer/infer_batch.py b/lightllm/server/router/model_infer/infer_batch.py
@@ -271,6 +271,15 @@ def init_all(self):
             else:
                 self.out_token_id_count = collections.defaultdict(int)
 
+            shm_param = self.sampling_param.shm_param
+            # 提前标记当前请求是否需要统计输出token的计数，因为这个统计可能会导致一些特定场景下后处理效率的下降
+            # 所以提前标记不需要进行后处理统计的场景。
+            self.need_out_token_id_statistics = not (
+                shm_param.presence_penalty == 0.0
+                and shm_param.frequency_penalty == 0.0
+                and shm_param.repetition_penalty == 1.0
+            )
+
             self.stop_sequences = self.sampling_param.shm_param.stop_sequences.to_list()
             # token healing mode 才被使用的管理对象
             if self.shm_req.prefix_token_ids.size != 0:
diff --git a/lightllm/server/router/model_infer/mode_backend/base_backend.py b/lightllm/server/router/model_infer/mode_backend/base_backend.py
@@ -282,8 +282,9 @@ def _post_handle(
             req_obj.set_next_gen_token_id(next_token_id, next_token_logprob)
             req_obj.cur_output_len += 1
 
-            if req_obj.out_token_id_count is not None:
+            if req_obj.need_out_token_id_statistics:
                 req_obj.out_token_id_count[next_token_id] += 1
+
             req_obj.update_finish_status(self.eos_id)
 
             if extra_post_req_handle_func is not None:
diff --git a/lightllm/server/router/model_infer/mode_backend/continues_batch/impl_for_return_all_prompt_logprobs.py b/lightllm/server/router/model_infer/mode_backend/continues_batch/impl_for_return_all_prompt_logprobs.py
@@ -55,7 +55,7 @@ def prefill(self, run_reqs: List[Tuple]):
             for i in range(req_obj.shm_req.input_len - 1):
                 req_obj.shm_req.shm_logprobs.arr[i + 1] = cur_logprobs[i]
 
-            if req_obj.out_token_id_count is not None:
+            if req_obj.need_out_token_id_statistics:
                 req_obj.out_token_id_count[next_token_id] += 1
             req_obj.update_finish_status(self.eos_id)
 
diff --git a/lightllm/server/router/model_infer/mode_backend/continues_batch/impl_for_reward_model.py b/lightllm/server/router/model_infer/mode_backend/continues_batch/impl_for_reward_model.py
@@ -32,7 +32,7 @@ def prefill(self, reqs: List[Tuple]):
             req_obj.set_next_gen_token_id(next_token_id, next_token_logprob)
             req_obj.cur_output_len += 1
 
-            if req_obj.out_token_id_count is not None:
+            if req_obj.need_out_token_id_statistics:
                 req_obj.out_token_id_count[next_token_id] += 1
             req_obj.update_finish_status(self.eos_id)