Skip to content

Commit 296a579

Browse files
committed
fix chunked prefill
1 parent b33b3b4 commit 296a579

File tree

4 files changed

+7
-30
lines changed

4 files changed

+7
-30
lines changed

lightllm/server/api_cli.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -102,7 +102,7 @@ def make_argument_parser() -> argparse.ArgumentParser:
102102
help="the port for multinode http manager, default is 20000",
103103
)
104104
parser.add_argument(
105-
"--multinode_router_gloo_port",
105+
"--multinode_router_nccl_port",
106106
type=int,
107107
default=20001,
108108
help="the gloo port for multinode router, default is 20001",

lightllm/server/router/manager.py

Lines changed: 3 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -77,12 +77,10 @@ def __init__(self, args, router_port, detokenization_port, model_rpc_ports, metr
7777
self.send_to_detokenization.connect(f"{args.zmq_mode}127.0.0.1:{detokenization_port}")
7878
self.model_rpc_ports = model_rpc_ports
7979

80-
self.multinode_req_manager = None
81-
self.multinode_req_queue_lock = asyncio.Lock()
8280
if args.nnodes > 1:
8381
self.mulitnode_group = dist.init_process_group(
84-
backend="gloo",
85-
init_method=f"tcp://{args.nccl_host}:{args.multinode_router_gloo_port}",
82+
backend="nccl",
83+
init_method=f"tcp://{args.nccl_host}:{args.multinode_router_nccl_port}",
8684
world_size=args.nnodes,
8785
rank=args.node_rank,
8886
)
@@ -217,8 +215,7 @@ async def add_req(self, group_req_indexes: GroupReqIndexes):
217215
req_group.append(req)
218216

219217
logger.info(f"router recive req id {req.request_id} cost time {time.time() - req.start_time} s")
220-
async with self.multinode_req_queue_lock:
221-
self.req_queue.extend(req_group)
218+
self.req_queue.extend(req_group)
222219
self.send_to_detokenization.send_pyobj(group_req_indexes, protocol=pickle.HIGHEST_PROTOCOL)
223220
return
224221

lightllm/server/router/model_infer/mode_backend/chunked_prefill/impl.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -54,7 +54,7 @@ def post_handel(self, run_reqs: List[InferReq], next_token_ids, next_token_logpr
5454

5555
req_obj.cur_kv_len = len(req_obj.get_chuncked_input_token_ids())
5656
if req_obj.cur_kv_len < req_obj.get_cur_total_len():
57-
if self.tp_rank < self.dp_size:
57+
if self.local_tp_rank < self.dp_size:
5858
req_obj.shm_req.shm_cur_kv_len = req_obj.cur_kv_len
5959
continue
6060

@@ -67,7 +67,7 @@ def post_handel(self, run_reqs: List[InferReq], next_token_ids, next_token_logpr
6767
if req_obj.finish_status.is_finished() or req_obj.shm_req.router_aborted:
6868
finished_req_ids.append(req_obj.shm_req.request_id)
6969

70-
if self.tp_rank < self.dp_size:
70+
if self.local_tp_rank < self.dp_size:
7171
# shm_cur_kv_len shm_cur_output_len 是 router 调度进程需要读的信息
7272
# finish_token_index finish_status candetoken_out_len 是
7373
# detokenization 进程需要的信息,注意这些变量的写入顺序避免异步协同问题。

lightllm/server/router/req_queue/base_queue.py

Lines changed: 1 addition & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -30,27 +30,7 @@ def __init__(self, args, router, dp_index, dp_size) -> None:
3030
@property
3131
def waiting_req_id_list(self):
3232
return [req.request_id for req in self.waiting_req_list]
33-
34-
def all_contain(self, id_list: List[int]):
35-
return all([_id in self.waiting_req_id_list for _id in id_list])
36-
37-
def arrange(self, id_list: List[int]):
38-
id2index_list = {}
39-
for index, req in enumerate(self.waiting_req_list):
40-
id2index_list[req.request_id] = index
41-
return [self.waiting_req_list[id2index_list[id]] for id in id_list]
42-
43-
def pop_list(self, req_list):
44-
id2index_list = {}
45-
for index, req in enumerate(self.waiting_req_list):
46-
id2index_list[req.request_id] = index
47-
remove_target = []
48-
for req in req_list:
49-
remove_target.append(self.waiting_req_list[id2index_list[req.request_id]])
50-
for req in remove_target:
51-
self.waiting_req_list.remove(req)
52-
return
53-
33+
5434
def append(self, req: Req):
5535
req.sample_params.suggested_dp_index = self.dp_index
5636
self.waiting_req_list.append(req)

0 commit comments

Comments
 (0)