Skip to content

Commit a197c27

Browse files
committed
fix
1 parent 35572d5 commit a197c27

File tree

2 files changed

+38
-26
lines changed

2 files changed

+38
-26
lines changed

lightllm/server/core/objs/out_token_circlequeue.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
from typing import Tuple
44

55
LIGHTLLM_TOKEN_MAX_BYTES = int(os.getenv("LIGHTLLM_TOKEN_MAX_BYTES", 128))
6-
LIGHTLLM_OUT_TOKEN_QUEUE_SIZE = int(os.getenv("LIGHTLLM_OUT_TOKEN_QUEUE_SIZE", 6))
6+
LIGHTLLM_OUT_TOKEN_QUEUE_SIZE = int(os.getenv("LIGHTLLM_OUT_TOKEN_QUEUE_SIZE", 8))
77

88

99
class QueueItem(ctypes.Structure):

lightllm/server/httpserver/manager.py

Lines changed: 37 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222
from .async_queue import AsyncQueue
2323
from lightllm.server.core.objs import Req, FinishStatus
2424
from lightllm.server.core.objs import SamplingParams
25+
from lightllm.server.core.objs.out_token_circlequeue import LIGHTLLM_OUT_TOKEN_QUEUE_SIZE
2526
from lightllm.server.core.objs.io_objs import GroupReqObjs
2627
from lightllm.server.core.objs.shm_req_manager import ShmReqManager
2728
from lightllm.server.core.objs.atomic_array_lock import AtomicShmArrayLock, AsyncLock, AtomicLockItem
@@ -281,8 +282,12 @@ async def generate(
281282
alloced_req_indexes = []
282283
while len(alloced_req_indexes) < sampling_params.n:
283284
alloc_req_index = await self.shm_req_manager.async_alloc_req_index()
285+
sleep_time = 0.1
284286
while alloc_req_index is None:
285-
await asyncio.sleep(0.1)
287+
await asyncio.sleep(sleep_time)
288+
sleep_time *= 1.1
289+
sleep_time = min(1, sleep_time)
290+
286291
alloc_req_index = await self.shm_req_manager.async_alloc_req_index()
287292
alloced_req_indexes.append(alloc_req_index)
288293
req_objs = []
@@ -648,31 +653,38 @@ async def handle_loop(self):
648653
token_list = []
649654
for req in req_status.group_req_objs.shm_req_objs:
650655
req_id = req.request_id
651-
if not req.out_tokens_queue.is_empty():
652-
653-
text, src_index, special, count_output_tokens = req.out_tokens_queue.peek()
654-
req.cumlogprob += float(req.shm_logprobs.arr[src_index])
655-
metadata = {
656-
"id": int(req.shm_prompt_ids.arr[src_index]),
657-
"logprob": float(req.shm_logprobs.arr[src_index]),
658-
"cumlogprob": float(req.cumlogprob) / count_output_tokens,
659-
"special": special,
660-
"count_output_tokens": count_output_tokens,
661-
"prompt_cache_len": req.prompt_cache_len,
662-
"mtp_accepted_token_num": req.mtp_accepted_token_num,
663-
}
664-
if self.args.return_all_prompt_logprobs:
665-
metadata.update(req.get_all_prompt_metadata())
666-
if self.args.use_reward_model:
667-
metadata["score"] = float(req.reward_score)
668-
669-
req.out_tokens_queue.pop_no_ret()
670-
671-
if req.finish_token_index != src_index:
672-
token_list.append((req_id, text, metadata, FinishStatus()))
656+
read_token_count = 1
657+
if req.out_tokens_queue.is_full():
658+
read_token_count = LIGHTLLM_OUT_TOKEN_QUEUE_SIZE
659+
660+
for _ in range(read_token_count):
661+
if not req.out_tokens_queue.is_empty():
662+
663+
text, src_index, special, count_output_tokens = req.out_tokens_queue.peek()
664+
req.cumlogprob += float(req.shm_logprobs.arr[src_index])
665+
metadata = {
666+
"id": int(req.shm_prompt_ids.arr[src_index]),
667+
"logprob": float(req.shm_logprobs.arr[src_index]),
668+
"cumlogprob": float(req.cumlogprob) / count_output_tokens,
669+
"special": special,
670+
"count_output_tokens": count_output_tokens,
671+
"prompt_cache_len": req.prompt_cache_len,
672+
"mtp_accepted_token_num": req.mtp_accepted_token_num,
673+
}
674+
if self.args.return_all_prompt_logprobs:
675+
metadata.update(req.get_all_prompt_metadata())
676+
if self.args.use_reward_model:
677+
metadata["score"] = float(req.reward_score)
678+
679+
req.out_tokens_queue.pop_no_ret()
680+
681+
if req.finish_token_index != src_index:
682+
token_list.append((req_id, text, metadata, FinishStatus()))
683+
else:
684+
finish_status = FinishStatus(req.finish_status.status)
685+
token_list.append((req_id, text, metadata, finish_status))
673686
else:
674-
finish_status = FinishStatus(req.finish_status.status)
675-
token_list.append((req_id, text, metadata, finish_status))
687+
break
676688

677689
async with req_status.lock:
678690
req_status.out_token_info_list.extend(token_list)

0 commit comments

Comments
 (0)