Skip to content

Commit 315a495

Browse files
authored
fix
1 parent b632123 commit 315a495

File tree

5 files changed

+2
-86
lines changed

5 files changed

+2
-86
lines changed

lightllm/server/router/model_infer/infer_batch.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -338,8 +338,8 @@ def update_finish_status(self, eos_ids):
338338
self.finish_status.set_status(FinishStatus.FINISHED_STOP)
339339
elif (
340340
self.cur_output_len > 0
341-
and self.sampling_param.shm_param.ignore_eos is False
342341
and self.get_last_gen_token() in eos_ids
342+
and self.sampling_param.shm_param.ignore_eos is False
343343
):
344344
self.finish_status.set_status(FinishStatus.FINISHED_STOP)
345345
elif self.cur_output_len >= self.sampling_param.shm_param.max_new_tokens:

lightllm/server/router/model_infer/mode_backend/base_backend.py

Lines changed: 0 additions & 76 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,6 @@
44
import rpyc
55
import torch
66
import socket
7-
import time
87
from datetime import timedelta
98
from typing import Dict, List, Tuple, Callable, Optional
109
from transformers.configuration_utils import PretrainedConfig
@@ -251,81 +250,6 @@ def _post_handle(
251250
is_chuncked_mode: bool,
252251
do_filter_finished_reqs: bool,
253252
extra_post_req_handle_func: Optional[Callable[[InferReq, int, float], None]] = None,
254-
) -> List[int]:
255-
"""
256-
extra_post_req_handle_func 用于提供在一个请求确定输出的时候,给出额外的后处理操作,主要是用于
257-
约束输出等模式,设置自己请求内部的状态机的状态,并添加额外的停止判定条件等。
258-
"""
259-
if not hasattr(self, "_post_handle_impl"):
260-
try:
261-
finished_req_ids = self._fast_post_handle(
262-
run_reqs,
263-
next_token_ids,
264-
next_token_logprobs,
265-
is_chuncked_mode,
266-
do_filter_finished_reqs,
267-
extra_post_req_handle_func,
268-
)
269-
self._post_handle_impl = self._fast_post_handle
270-
self.logger.info("use _fast_post_handle")
271-
return finished_req_ids
272-
except:
273-
finished_req_ids = self._python_post_handle(
274-
run_reqs,
275-
next_token_ids,
276-
next_token_logprobs,
277-
is_chuncked_mode,
278-
do_filter_finished_reqs,
279-
extra_post_req_handle_func,
280-
)
281-
self.logger.info("use _python_post_handle")
282-
self._post_handle_impl = self._python_post_handle
283-
return finished_req_ids
284-
else:
285-
return self._post_handle_impl(
286-
run_reqs,
287-
next_token_ids,
288-
next_token_logprobs,
289-
is_chuncked_mode,
290-
do_filter_finished_reqs,
291-
extra_post_req_handle_func,
292-
)
293-
294-
def _fast_post_handle(
295-
self,
296-
run_reqs: List[InferReq],
297-
next_token_ids,
298-
next_token_logprobs,
299-
is_chuncked_mode: bool,
300-
do_filter_finished_reqs: bool,
301-
extra_post_req_handle_func: Optional[Callable[[InferReq, int, float], None]] = None,
302-
):
303-
from . import cython_fast_impl
304-
305-
start = time.time()
306-
finished_req_ids = cython_fast_impl.fast_post_handle(
307-
self,
308-
run_reqs,
309-
next_token_ids,
310-
next_token_logprobs,
311-
is_chuncked_mode,
312-
do_filter_finished_reqs,
313-
extra_post_req_handle_func,
314-
)
315-
cost_time = time.time() - start
316-
if self.is_master_in_dp and cost_time > 0.001:
317-
self.logger.info(f"post handle cost time {cost_time} s, batch_size: {len(run_reqs)}")
318-
return finished_req_ids
319-
320-
# 一些可以复用的通用功能函数
321-
def _python_post_handle(
322-
self,
323-
run_reqs: List[InferReq],
324-
next_token_ids,
325-
next_token_logprobs,
326-
is_chuncked_mode: bool,
327-
do_filter_finished_reqs: bool,
328-
extra_post_req_handle_func: Optional[Callable[[InferReq, int, float], None]] = None,
329253
) -> List[int]:
330254
"""
331255
extra_post_req_handle_func 用于提供在一个请求确定输出的时候,给出额外的后处理操作,主要是用于

lightllm/server/router/model_infer/mode_backend/generic_post_process.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -65,7 +65,7 @@ def sample(logits, reqs, eos_id: List[int] = [2]):
6565
int64_batch_next_token_ids = torch.empty_like(batch_next_token_ids, dtype=torch.int64)
6666
int64_batch_next_token_ids[:] = batch_next_token_ids
6767
batch_next_token_probs = torch.gather(probs, dim=1, index=int64_batch_next_token_ids.view(-1, 1))
68-
return int64_batch_next_token_ids.view(-1), batch_next_token_probs.view(-1)
68+
return batch_next_token_ids.view(-1), batch_next_token_probs.view(-1)
6969
else:
7070
assert False, "dead path"
7171

requirements.txt

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -88,4 +88,3 @@ flashinfer-python==0.2.4
8888
sgl-kernel
8989
httpx==0.28.1
9090
librosa==0.11.0
91-
Cython

setup.py

Lines changed: 0 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,4 @@
11
from setuptools import setup, find_packages
2-
from Cython.Build import cythonize
32

43
package_data = {"lightllm": ["common/all_kernel_configs/*/*.json"]}
54
setup(
@@ -29,10 +28,4 @@
2928
"triton",
3029
],
3130
package_data=package_data,
32-
ext_modules=cythonize(
33-
[
34-
"lightllm/server/router/model_infer/mode_backend/cython_fast_impl.pyx",
35-
]
36-
),
37-
zip_safe=False,
3831
)

0 commit comments

Comments
 (0)