Skip to content

Commit c56c998

Browse files
Revert "[BugFix] num_seqs (#3291)" (#3316)
This reverts commit e0aeac5.
1 parent 9571c45 commit c56c998

File tree

3 files changed

+3
-6
lines changed

3 files changed

+3
-6
lines changed

fastdeploy/model_executor/models/ernie4_5_moe.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -450,7 +450,7 @@ def empty_input_forward(self):
450450
self.fd_config.model_config.moe_layer_start_index,
451451
self.fd_config.model_config.num_hidden_layers,
452452
):
453-
self.ernie.layers[i].mlp.experts(fake_hidden_states, self.ernie.layers[i].mlp.gate)
453+
self.ernie.layers[i].mlp.expert(fake_hidden_states)
454454

455455
def forward(
456456
self,

fastdeploy/worker/gpu_model_runner.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -799,7 +799,7 @@ def _prepare_inputs(self) -> None:
799799
output_padding_offset,
800800
) = pre_process(
801801
self.share_inputs["input_ids"],
802-
getattr(self.share_inputs, "seq_lens_this_time", self.seq_lens_this_time_buffer),
802+
self.share_inputs["seq_lens_this_time"],
803803
self.speculative_decoding,
804804
(self.share_inputs["draft_tokens"] if self.speculative_decoding else None),
805805
self.share_inputs["seq_lens_encoder"],
@@ -884,7 +884,7 @@ def initialize_forward_meta(self):
884884
max_len_tensor_cpu=self.share_inputs["max_len_tensor_cpu"],
885885
seq_lens_encoder=self.share_inputs["seq_lens_encoder"],
886886
seq_lens_decoder=self.share_inputs["seq_lens_decoder"],
887-
seq_lens_this_time=getattr(self.share_inputs, "seq_lens_this_time", self.seq_lens_this_time_buffer),
887+
seq_lens_this_time=self.share_inputs["seq_lens_this_time"],
888888
batch_id_per_token=self.share_inputs["batch_id_per_token"],
889889
cu_seqlens_q=self.share_inputs["cu_seqlens_q"],
890890
cu_seqlens_k=self.share_inputs["cu_seqlens_k"],

fastdeploy/worker/worker_process.py

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -244,7 +244,6 @@ def event_loop_ep(self) -> None:
244244
"""
245245
while True:
246246
self.worker_healthy_live_signal.value[self.local_rank % self.max_chips_per_node] = int(time.time())
247-
num_running_requests = 0
248247

249248
if self.fd_config.parallel_config.tensor_parallel_rank == 0 and self.task_queue.num_tasks() > 0:
250249
tasks, read_finish = self.task_queue.get_tasks()
@@ -272,8 +271,6 @@ def event_loop_normal(self) -> None:
272271
self.nnode = int((self.parallel_config.tensor_parallel_size + 7) // 8)
273272
mp_num_per_node = self.parallel_config.tensor_parallel_size // self.nnode
274273
req_ids = []
275-
num_running_requests = 0
276-
277274
while True:
278275
if self.local_rank == 0:
279276
if self.model_weights_status.value[0] != 0:

0 commit comments

Comments
 (0)