fix

hiworldwzj · hiworldwzj · commit 1eb7c7bf1ded · 2025-07-14T09:59:12.000Z
diff --git a/lightllm/server/router/model_infer/mode_backend/__init__.py b/lightllm/server/router/model_infer/mode_backend/__init__.py
@@ -1,22 +1,16 @@
-from .continues_batch.impl import ContinuesBatchBackend
-from .continues_batch.impl_for_return_all_prompt_logprobs import ReturnPromptLogProbBackend
-from .continues_batch.impl_for_reward_model import RewardModelBackend
 from .chunked_prefill.impl import ChunkedPrefillBackend
-from .diverse_backend.impl import DiversehBackend
-from .chunked_prefill.impl_for_token_healing import TokenHealingBackend
-from .chunked_prefill.impl_for_outlines_constraint_mode import OutlinesConstraintBackend
 from .chunked_prefill.impl_for_first_token_constraint_mode import FirstTokenConstraintBackend
-from .dp_backend.impl import DPChunkedPrefillBackend
-from .dp_backend.impl_mtp import DPChunkedPrefillWithMTPBackend
-from .continues_batch.pd_mode.prefill_node_impl.prefill_impl import ChunckedPrefillForPrefillNode
-from .continues_batch.pd_mode.decode_node_impl.decode_impl import ContinuesBatchBackendForDecodeNode
+from .chunked_prefill.impl_for_outlines_constraint_mode import OutlinesConstraintBackend
+from .chunked_prefill.impl_for_return_all_prompt_logprobs import ReturnPromptLogProbBackend
+from .chunked_prefill.impl_for_reward_model import RewardModelBackend
+from .chunked_prefill.impl_for_token_healing import TokenHealingBackend
 from .chunked_prefill.impl_for_xgrammar_mode import XgrammarBackend
-from .continues_batch.pd_mode.prefill_node_impl.prefill_impl_for_dp_chuncked import DPChunkedForPrefillNode
-from .continues_batch.pd_mode.decode_node_impl.decode_impl_for_dp import DPForDecodeNode
-from .continues_batch.impl_mtp import ContinuesBatchWithMTPBackend
 
-# mtp and pd mode backend
-from .continues_batch.pd_mode.decode_node_impl.decode_impl_mtp_for_dp import DPForMtpDecodeNode
-from .continues_batch.pd_mode.decode_node_impl.decode_impl_mtp import ContinuesBatchBackendForMtpDecodeNode
-from .continues_batch.pd_mode.prefill_node_impl.prefill_impl_mtp import ChunckedPrefillForMtpPrefillNode
-from .continues_batch.pd_mode.prefill_node_impl.prefill_impl_mtp_for_dp_chuncked import DPChunkedForMtpPrefillNode
+from .dp_backend.impl import DPChunkedPrefillBackend
+from .diverse_backend.impl import DiversehBackend
+
+# pd mode backend
+from .continues_batch.pd_mode.decode_node_impl.decode_impl import DecodeNode
+from .continues_batch.pd_mode.decode_node_impl.decode_impl_for_dp import DPForDecodeNode
+from .continues_batch.pd_mode.prefill_node_impl.prefill_impl import ChunckedPrefillForPrefillNode
+from .continues_batch.pd_mode.prefill_node_impl.prefill_impl_for_dp import DPChunkedForPrefillNode
diff --git a/lightllm/server/router/model_infer/model_rpc.py b/lightllm/server/router/model_infer/model_rpc.py
@@ -7,26 +7,19 @@
 from datetime import timedelta
 from typing import Dict, List, Tuple
 from lightllm.server.router.model_infer.mode_backend import (
-    ContinuesBatchBackend,
-    ReturnPromptLogProbBackend,
     ChunkedPrefillBackend,
-    DiversehBackend,
+    FirstTokenConstraintBackend,
+    OutlinesConstraintBackend,
+    ReturnPromptLogProbBackend,
     RewardModelBackend,
     TokenHealingBackend,
-    OutlinesConstraintBackend,
     XgrammarBackend,
-    FirstTokenConstraintBackend,
     DPChunkedPrefillBackend,
-    ContinuesBatchBackendForDecodeNode,
+    DiversehBackend,
+    DecodeNode,
     DPForDecodeNode,
     ChunckedPrefillForPrefillNode,
     DPChunkedForPrefillNode,
-    ContinuesBatchWithMTPBackend,
-    DPChunkedPrefillWithMTPBackend,
-    DPForMtpDecodeNode,
-    ContinuesBatchBackendForMtpDecodeNode,
-    ChunckedPrefillForMtpPrefillNode,
-    DPChunkedForMtpPrefillNode,
 )
 from lightllm.server.router.model_infer.mode_backend.redundancy_expert_manager import RedundancyExpertManager
 from lightllm.server.core.objs import RpcShmParams, RpcShmResults, ShmSyncStatusArray
@@ -112,7 +105,6 @@ def init_model(self, kvargs):
         # 填充真正的 rank_id 参数
         kvargs["rank_id"] = self.rank
         self.world_size = kvargs["world_size"]
-        disable_chunked_prefill = self.args.disable_chunked_prefill
         return_all_prompt_logprobs = self.args.return_all_prompt_logprobs
         use_reward_model = self.args.use_reward_model
         diverse_mode = self.args.diverse_mode
@@ -125,35 +117,18 @@ def init_model(self, kvargs):
         is_prefill_node = self.args.run_mode == "prefill"
         is_decode_node = self.args.run_mode == "decode"
 
-        enable_mtp = self.args.mtp_mode is not None
-
         if is_prefill_node:
-            if enable_mtp:
-                if self.args.dp > 1:
-                    self.backend = DPChunkedForMtpPrefillNode(self.info_queue, self.mem_queue)
-                else:
-                    self.backend = ChunckedPrefillForMtpPrefillNode(self.info_queue, self.mem_queue)
+            if self.args.dp > 1:
+                self.backend = DPChunkedForPrefillNode(self.info_queue, self.mem_queue)
             else:
-                if self.args.dp > 1:
-                    self.backend = DPChunkedForPrefillNode(self.info_queue, self.mem_queue)
-                else:
-                    self.backend = ChunckedPrefillForPrefillNode(self.info_queue, self.mem_queue)
+                self.backend = ChunckedPrefillForPrefillNode(self.info_queue, self.mem_queue)
         elif is_decode_node:
-            if enable_mtp:
-                if self.args.dp > 1:
-                    self.backend = DPForMtpDecodeNode(self.info_queue, self.mem_queue)
-                else:
-                    self.backend = ContinuesBatchBackendForMtpDecodeNode(self.info_queue, self.mem_queue)
+            if self.args.dp > 1:
+                self.backend = DPForDecodeNode(self.info_queue, self.mem_queue)
             else:
-                if self.args.dp > 1:
-                    self.backend = DPForDecodeNode(self.info_queue, self.mem_queue)
-                else:
-                    self.backend = ContinuesBatchBackendForDecodeNode(self.info_queue, self.mem_queue)
+                self.backend = DecodeNode(self.info_queue, self.mem_queue)
         elif self.args.dp > 1:
-            if enable_mtp:
-                self.backend = DPChunkedPrefillWithMTPBackend()
-            else:
-                self.backend = DPChunkedPrefillBackend()
+            self.backend = DPChunkedPrefillBackend()
         elif use_reward_model:
             self.backend = RewardModelBackend()
         elif return_all_prompt_logprobs:
@@ -168,16 +143,8 @@ def init_model(self, kvargs):
             self.backend = XgrammarBackend()
         elif is_first_token_constraint_mode:
             self.backend = FirstTokenConstraintBackend()
-        elif disable_chunked_prefill:
-            if enable_mtp:
-                self.backend = ContinuesBatchWithMTPBackend()
-            else:
-                self.backend = ContinuesBatchBackend()
         else:
-            if enable_mtp:
-                self.backend = ContinuesBatchWithMTPBackend()
-            else:
-                self.backend = ChunkedPrefillBackend()
+            self.backend = ChunkedPrefillBackend()
 
         logger.info(f"use {self.backend.__class__.__name__}")
         self.backend.init_model(kvargs)