77from datetime import timedelta
88from typing import Dict , List , Tuple
99from lightllm .server .router .model_infer .mode_backend import (
10- ContinuesBatchBackend ,
11- ReturnPromptLogProbBackend ,
1210 ChunkedPrefillBackend ,
13- DiversehBackend ,
11+ FirstTokenConstraintBackend ,
12+ OutlinesConstraintBackend ,
13+ ReturnPromptLogProbBackend ,
1414 RewardModelBackend ,
1515 TokenHealingBackend ,
16- OutlinesConstraintBackend ,
1716 XgrammarBackend ,
18- FirstTokenConstraintBackend ,
1917 DPChunkedPrefillBackend ,
20- ContinuesBatchBackendForDecodeNode ,
18+ DiversehBackend ,
19+ DecodeNode ,
2120 DPForDecodeNode ,
2221 ChunckedPrefillForPrefillNode ,
2322 DPChunkedForPrefillNode ,
24- ContinuesBatchWithMTPBackend ,
25- DPChunkedPrefillWithMTPBackend ,
26- DPForMtpDecodeNode ,
27- ContinuesBatchBackendForMtpDecodeNode ,
28- ChunckedPrefillForMtpPrefillNode ,
29- DPChunkedForMtpPrefillNode ,
3023)
3124from lightllm .server .router .model_infer .mode_backend .redundancy_expert_manager import RedundancyExpertManager
3225from lightllm .server .core .objs import RpcShmParams , RpcShmResults , ShmSyncStatusArray
@@ -112,7 +105,6 @@ def init_model(self, kvargs):
112105 # 填充真正的 rank_id 参数
113106 kvargs ["rank_id" ] = self .rank
114107 self .world_size = kvargs ["world_size" ]
115- disable_chunked_prefill = self .args .disable_chunked_prefill
116108 return_all_prompt_logprobs = self .args .return_all_prompt_logprobs
117109 use_reward_model = self .args .use_reward_model
118110 diverse_mode = self .args .diverse_mode
@@ -125,35 +117,18 @@ def init_model(self, kvargs):
125117 is_prefill_node = self .args .run_mode == "prefill"
126118 is_decode_node = self .args .run_mode == "decode"
127119
128- enable_mtp = self .args .mtp_mode is not None
129-
130120 if is_prefill_node :
131- if enable_mtp :
132- if self .args .dp > 1 :
133- self .backend = DPChunkedForMtpPrefillNode (self .info_queue , self .mem_queue )
134- else :
135- self .backend = ChunckedPrefillForMtpPrefillNode (self .info_queue , self .mem_queue )
121+ if self .args .dp > 1 :
122+ self .backend = DPChunkedForPrefillNode (self .info_queue , self .mem_queue )
136123 else :
137- if self .args .dp > 1 :
138- self .backend = DPChunkedForPrefillNode (self .info_queue , self .mem_queue )
139- else :
140- self .backend = ChunckedPrefillForPrefillNode (self .info_queue , self .mem_queue )
124+ self .backend = ChunckedPrefillForPrefillNode (self .info_queue , self .mem_queue )
141125 elif is_decode_node :
142- if enable_mtp :
143- if self .args .dp > 1 :
144- self .backend = DPForMtpDecodeNode (self .info_queue , self .mem_queue )
145- else :
146- self .backend = ContinuesBatchBackendForMtpDecodeNode (self .info_queue , self .mem_queue )
126+ if self .args .dp > 1 :
127+ self .backend = DPForDecodeNode (self .info_queue , self .mem_queue )
147128 else :
148- if self .args .dp > 1 :
149- self .backend = DPForDecodeNode (self .info_queue , self .mem_queue )
150- else :
151- self .backend = ContinuesBatchBackendForDecodeNode (self .info_queue , self .mem_queue )
129+ self .backend = DecodeNode (self .info_queue , self .mem_queue )
152130 elif self .args .dp > 1 :
153- if enable_mtp :
154- self .backend = DPChunkedPrefillWithMTPBackend ()
155- else :
156- self .backend = DPChunkedPrefillBackend ()
131+ self .backend = DPChunkedPrefillBackend ()
157132 elif use_reward_model :
158133 self .backend = RewardModelBackend ()
159134 elif return_all_prompt_logprobs :
@@ -168,16 +143,8 @@ def init_model(self, kvargs):
168143 self .backend = XgrammarBackend ()
169144 elif is_first_token_constraint_mode :
170145 self .backend = FirstTokenConstraintBackend ()
171- elif disable_chunked_prefill :
172- if enable_mtp :
173- self .backend = ContinuesBatchWithMTPBackend ()
174- else :
175- self .backend = ContinuesBatchBackend ()
176146 else :
177- if enable_mtp :
178- self .backend = ContinuesBatchWithMTPBackend ()
179- else :
180- self .backend = ChunkedPrefillBackend ()
147+ self .backend = ChunkedPrefillBackend ()
181148
182149 logger .info (f"use { self .backend .__class__ .__name__ } " )
183150 self .backend .init_model (kvargs )
0 commit comments