Skip to content

Commit 39b90bf

Browse files
committed
fix
1 parent 0bde847 commit 39b90bf

File tree

2 files changed

+5
-4
lines changed

2 files changed

+5
-4
lines changed

lightllm/server/httpserver/manager.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -249,7 +249,8 @@ async def generate(
249249

250250
# 将请求转发给其他节点
251251
await self.order_req_manager.add_request(req_status.group_req_objs)
252-
await self.transfer_to_next_module()
252+
async with self.order_req_manager.lock:
253+
await self.transfer_to_next_module()
253254

254255
results_generator = self._wait_to_token_package(
255256
start_time,

lightllm/utils/dist_utils.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
# node_world_size 指一个推理节点的使用的卡数,如两机 tp 推理,如果两机器8卡,则 node_world_size 为 8.
1414
# rank_in_node 指在一个node内的rank序号,如两机8卡推理,每机上的rank序号都是0-8
1515

16+
1617
def set_environ(environ_name, value):
1718
os.environ[environ_name] = str(value)
1819

@@ -37,8 +38,7 @@ def _init_distributed_env(kvargs):
3738
set_current_rank_in_node(get_global_rank() % node_world_size)
3839
set_node_world_size(node_world_size)
3940

40-
41-
device_id = kvargs["rank_id"] % size_per_node
41+
device_id = kvargs["rank_id"] % get_node_world_size()
4242
set_current_device_id(device_id)
4343
torch.cuda.set_device(device_id)
4444
if kvargs["world_size"] > 1:
@@ -113,7 +113,7 @@ def get_current_device_id():
113113
return int(get_environ("LIGHTLLM_CURRENT_DEVICE_ID"))
114114

115115

116-
def set_current_rank_in_node(rank:int):
116+
def set_current_rank_in_node(rank: int):
117117
set_environ("LIGHTLLM_CURRENT_RANK_IN_NODE", rank)
118118

119119

0 commit comments

Comments
 (0)