Skip to content

Commit 1eb7c7b

Browse files
committed
fix
1 parent ffc1ad4 commit 1eb7c7b

File tree

2 files changed

+25
-64
lines changed

2 files changed

+25
-64
lines changed
Lines changed: 12 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -1,22 +1,16 @@
1-
from .continues_batch.impl import ContinuesBatchBackend
2-
from .continues_batch.impl_for_return_all_prompt_logprobs import ReturnPromptLogProbBackend
3-
from .continues_batch.impl_for_reward_model import RewardModelBackend
41
from .chunked_prefill.impl import ChunkedPrefillBackend
5-
from .diverse_backend.impl import DiversehBackend
6-
from .chunked_prefill.impl_for_token_healing import TokenHealingBackend
7-
from .chunked_prefill.impl_for_outlines_constraint_mode import OutlinesConstraintBackend
82
from .chunked_prefill.impl_for_first_token_constraint_mode import FirstTokenConstraintBackend
9-
from .dp_backend.impl import DPChunkedPrefillBackend
10-
from .dp_backend.impl_mtp import DPChunkedPrefillWithMTPBackend
11-
from .continues_batch.pd_mode.prefill_node_impl.prefill_impl import ChunckedPrefillForPrefillNode
12-
from .continues_batch.pd_mode.decode_node_impl.decode_impl import ContinuesBatchBackendForDecodeNode
3+
from .chunked_prefill.impl_for_outlines_constraint_mode import OutlinesConstraintBackend
4+
from .chunked_prefill.impl_for_return_all_prompt_logprobs import ReturnPromptLogProbBackend
5+
from .chunked_prefill.impl_for_reward_model import RewardModelBackend
6+
from .chunked_prefill.impl_for_token_healing import TokenHealingBackend
137
from .chunked_prefill.impl_for_xgrammar_mode import XgrammarBackend
14-
from .continues_batch.pd_mode.prefill_node_impl.prefill_impl_for_dp_chuncked import DPChunkedForPrefillNode
15-
from .continues_batch.pd_mode.decode_node_impl.decode_impl_for_dp import DPForDecodeNode
16-
from .continues_batch.impl_mtp import ContinuesBatchWithMTPBackend
178

18-
# mtp and pd mode backend
19-
from .continues_batch.pd_mode.decode_node_impl.decode_impl_mtp_for_dp import DPForMtpDecodeNode
20-
from .continues_batch.pd_mode.decode_node_impl.decode_impl_mtp import ContinuesBatchBackendForMtpDecodeNode
21-
from .continues_batch.pd_mode.prefill_node_impl.prefill_impl_mtp import ChunckedPrefillForMtpPrefillNode
22-
from .continues_batch.pd_mode.prefill_node_impl.prefill_impl_mtp_for_dp_chuncked import DPChunkedForMtpPrefillNode
9+
from .dp_backend.impl import DPChunkedPrefillBackend
10+
from .diverse_backend.impl import DiversehBackend
11+
12+
# pd mode backend
13+
from .continues_batch.pd_mode.decode_node_impl.decode_impl import DecodeNode
14+
from .continues_batch.pd_mode.decode_node_impl.decode_impl_for_dp import DPForDecodeNode
15+
from .continues_batch.pd_mode.prefill_node_impl.prefill_impl import ChunckedPrefillForPrefillNode
16+
from .continues_batch.pd_mode.prefill_node_impl.prefill_impl_for_dp import DPChunkedForPrefillNode

lightllm/server/router/model_infer/model_rpc.py

Lines changed: 13 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -7,26 +7,19 @@
77
from datetime import timedelta
88
from typing import Dict, List, Tuple
99
from lightllm.server.router.model_infer.mode_backend import (
10-
ContinuesBatchBackend,
11-
ReturnPromptLogProbBackend,
1210
ChunkedPrefillBackend,
13-
DiversehBackend,
11+
FirstTokenConstraintBackend,
12+
OutlinesConstraintBackend,
13+
ReturnPromptLogProbBackend,
1414
RewardModelBackend,
1515
TokenHealingBackend,
16-
OutlinesConstraintBackend,
1716
XgrammarBackend,
18-
FirstTokenConstraintBackend,
1917
DPChunkedPrefillBackend,
20-
ContinuesBatchBackendForDecodeNode,
18+
DiversehBackend,
19+
DecodeNode,
2120
DPForDecodeNode,
2221
ChunckedPrefillForPrefillNode,
2322
DPChunkedForPrefillNode,
24-
ContinuesBatchWithMTPBackend,
25-
DPChunkedPrefillWithMTPBackend,
26-
DPForMtpDecodeNode,
27-
ContinuesBatchBackendForMtpDecodeNode,
28-
ChunckedPrefillForMtpPrefillNode,
29-
DPChunkedForMtpPrefillNode,
3023
)
3124
from lightllm.server.router.model_infer.mode_backend.redundancy_expert_manager import RedundancyExpertManager
3225
from lightllm.server.core.objs import RpcShmParams, RpcShmResults, ShmSyncStatusArray
@@ -112,7 +105,6 @@ def init_model(self, kvargs):
112105
# 填充真正的 rank_id 参数
113106
kvargs["rank_id"] = self.rank
114107
self.world_size = kvargs["world_size"]
115-
disable_chunked_prefill = self.args.disable_chunked_prefill
116108
return_all_prompt_logprobs = self.args.return_all_prompt_logprobs
117109
use_reward_model = self.args.use_reward_model
118110
diverse_mode = self.args.diverse_mode
@@ -125,35 +117,18 @@ def init_model(self, kvargs):
125117
is_prefill_node = self.args.run_mode == "prefill"
126118
is_decode_node = self.args.run_mode == "decode"
127119

128-
enable_mtp = self.args.mtp_mode is not None
129-
130120
if is_prefill_node:
131-
if enable_mtp:
132-
if self.args.dp > 1:
133-
self.backend = DPChunkedForMtpPrefillNode(self.info_queue, self.mem_queue)
134-
else:
135-
self.backend = ChunckedPrefillForMtpPrefillNode(self.info_queue, self.mem_queue)
121+
if self.args.dp > 1:
122+
self.backend = DPChunkedForPrefillNode(self.info_queue, self.mem_queue)
136123
else:
137-
if self.args.dp > 1:
138-
self.backend = DPChunkedForPrefillNode(self.info_queue, self.mem_queue)
139-
else:
140-
self.backend = ChunckedPrefillForPrefillNode(self.info_queue, self.mem_queue)
124+
self.backend = ChunckedPrefillForPrefillNode(self.info_queue, self.mem_queue)
141125
elif is_decode_node:
142-
if enable_mtp:
143-
if self.args.dp > 1:
144-
self.backend = DPForMtpDecodeNode(self.info_queue, self.mem_queue)
145-
else:
146-
self.backend = ContinuesBatchBackendForMtpDecodeNode(self.info_queue, self.mem_queue)
126+
if self.args.dp > 1:
127+
self.backend = DPForDecodeNode(self.info_queue, self.mem_queue)
147128
else:
148-
if self.args.dp > 1:
149-
self.backend = DPForDecodeNode(self.info_queue, self.mem_queue)
150-
else:
151-
self.backend = ContinuesBatchBackendForDecodeNode(self.info_queue, self.mem_queue)
129+
self.backend = DecodeNode(self.info_queue, self.mem_queue)
152130
elif self.args.dp > 1:
153-
if enable_mtp:
154-
self.backend = DPChunkedPrefillWithMTPBackend()
155-
else:
156-
self.backend = DPChunkedPrefillBackend()
131+
self.backend = DPChunkedPrefillBackend()
157132
elif use_reward_model:
158133
self.backend = RewardModelBackend()
159134
elif return_all_prompt_logprobs:
@@ -168,16 +143,8 @@ def init_model(self, kvargs):
168143
self.backend = XgrammarBackend()
169144
elif is_first_token_constraint_mode:
170145
self.backend = FirstTokenConstraintBackend()
171-
elif disable_chunked_prefill:
172-
if enable_mtp:
173-
self.backend = ContinuesBatchWithMTPBackend()
174-
else:
175-
self.backend = ContinuesBatchBackend()
176146
else:
177-
if enable_mtp:
178-
self.backend = ContinuesBatchWithMTPBackend()
179-
else:
180-
self.backend = ChunkedPrefillBackend()
147+
self.backend = ChunkedPrefillBackend()
181148

182149
logger.info(f"use {self.backend.__class__.__name__}")
183150
self.backend.init_model(kvargs)

0 commit comments

Comments
 (0)