|
22 | 22 | from .async_queue import AsyncQueue |
23 | 23 | from lightllm.server.core.objs import Req, FinishStatus |
24 | 24 | from lightllm.server.core.objs import SamplingParams |
| 25 | +from lightllm.server.core.objs.out_token_circlequeue import LIGHTLLM_OUT_TOKEN_QUEUE_SIZE |
25 | 26 | from lightllm.server.core.objs.io_objs import GroupReqObjs |
26 | 27 | from lightllm.server.core.objs.shm_req_manager import ShmReqManager |
27 | 28 | from lightllm.server.core.objs.atomic_array_lock import AtomicShmArrayLock, AsyncLock, AtomicLockItem |
@@ -281,8 +282,12 @@ async def generate( |
281 | 282 | alloced_req_indexes = [] |
282 | 283 | while len(alloced_req_indexes) < sampling_params.n: |
283 | 284 | alloc_req_index = await self.shm_req_manager.async_alloc_req_index() |
| 285 | + sleep_time = 0.1 |
284 | 286 | while alloc_req_index is None: |
285 | | - await asyncio.sleep(0.1) |
| 287 | + await asyncio.sleep(sleep_time) |
| 288 | + sleep_time *= 1.1 |
| 289 | + sleep_time = min(1, sleep_time) |
| 290 | + |
286 | 291 | alloc_req_index = await self.shm_req_manager.async_alloc_req_index() |
287 | 292 | alloced_req_indexes.append(alloc_req_index) |
288 | 293 | req_objs = [] |
@@ -648,31 +653,38 @@ async def handle_loop(self): |
648 | 653 | token_list = [] |
649 | 654 | for req in req_status.group_req_objs.shm_req_objs: |
650 | 655 | req_id = req.request_id |
651 | | - if not req.out_tokens_queue.is_empty(): |
652 | | - |
653 | | - text, src_index, special, count_output_tokens = req.out_tokens_queue.peek() |
654 | | - req.cumlogprob += float(req.shm_logprobs.arr[src_index]) |
655 | | - metadata = { |
656 | | - "id": int(req.shm_prompt_ids.arr[src_index]), |
657 | | - "logprob": float(req.shm_logprobs.arr[src_index]), |
658 | | - "cumlogprob": float(req.cumlogprob) / count_output_tokens, |
659 | | - "special": special, |
660 | | - "count_output_tokens": count_output_tokens, |
661 | | - "prompt_cache_len": req.prompt_cache_len, |
662 | | - "mtp_accepted_token_num": req.mtp_accepted_token_num, |
663 | | - } |
664 | | - if self.args.return_all_prompt_logprobs: |
665 | | - metadata.update(req.get_all_prompt_metadata()) |
666 | | - if self.args.use_reward_model: |
667 | | - metadata["score"] = float(req.reward_score) |
668 | | - |
669 | | - req.out_tokens_queue.pop_no_ret() |
670 | | - |
671 | | - if req.finish_token_index != src_index: |
672 | | - token_list.append((req_id, text, metadata, FinishStatus())) |
| 656 | + read_token_count = 1 |
| 657 | + if req.out_tokens_queue.is_full(): |
| 658 | + read_token_count = LIGHTLLM_OUT_TOKEN_QUEUE_SIZE |
| 659 | + |
| 660 | + for _ in range(read_token_count): |
| 661 | + if not req.out_tokens_queue.is_empty(): |
| 662 | + |
| 663 | + text, src_index, special, count_output_tokens = req.out_tokens_queue.peek() |
| 664 | + req.cumlogprob += float(req.shm_logprobs.arr[src_index]) |
| 665 | + metadata = { |
| 666 | + "id": int(req.shm_prompt_ids.arr[src_index]), |
| 667 | + "logprob": float(req.shm_logprobs.arr[src_index]), |
| 668 | + "cumlogprob": float(req.cumlogprob) / count_output_tokens, |
| 669 | + "special": special, |
| 670 | + "count_output_tokens": count_output_tokens, |
| 671 | + "prompt_cache_len": req.prompt_cache_len, |
| 672 | + "mtp_accepted_token_num": req.mtp_accepted_token_num, |
| 673 | + } |
| 674 | + if self.args.return_all_prompt_logprobs: |
| 675 | + metadata.update(req.get_all_prompt_metadata()) |
| 676 | + if self.args.use_reward_model: |
| 677 | + metadata["score"] = float(req.reward_score) |
| 678 | + |
| 679 | + req.out_tokens_queue.pop_no_ret() |
| 680 | + |
| 681 | + if req.finish_token_index != src_index: |
| 682 | + token_list.append((req_id, text, metadata, FinishStatus())) |
| 683 | + else: |
| 684 | + finish_status = FinishStatus(req.finish_status.status) |
| 685 | + token_list.append((req_id, text, metadata, finish_status)) |
673 | 686 | else: |
674 | | - finish_status = FinishStatus(req.finish_status.status) |
675 | | - token_list.append((req_id, text, metadata, finish_status)) |
| 687 | + break |
676 | 688 |
|
677 | 689 | async with req_status.lock: |
678 | 690 | req_status.out_token_info_list.extend(token_list) |
|
0 commit comments