Skip to content

Commit 86ff68b

Browse files
authored
[BugFix] fix ep (#3290)
* fix ep * fix
1 parent 702c313 commit 86ff68b

File tree

2 files changed

+4
-3
lines changed

2 files changed

+4
-3
lines changed

fastdeploy/worker/gpu_model_runner.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -777,7 +777,7 @@ def _prepare_inputs(self) -> None:
777777
output_padding_offset,
778778
) = pre_process(
779779
self.share_inputs["input_ids"],
780-
self.share_inputs["seq_lens_this_time"],
780+
getattr(self.share_inputs, "seq_lens_this_time", self.seq_lens_this_time_buffer),
781781
self.speculative_decoding,
782782
(self.share_inputs["draft_tokens"] if self.speculative_decoding else None),
783783
self.share_inputs["seq_lens_encoder"],
@@ -861,7 +861,7 @@ def initialize_forward_meta(self):
861861
max_len_tensor_cpu=self.share_inputs["max_len_tensor_cpu"],
862862
seq_lens_encoder=self.share_inputs["seq_lens_encoder"],
863863
seq_lens_decoder=self.share_inputs["seq_lens_decoder"],
864-
seq_lens_this_time=self.share_inputs["seq_lens_this_time"],
864+
seq_lens_this_time=getattr(self.share_inputs, "seq_lens_this_time", self.seq_lens_this_time_buffer),
865865
batch_id_per_token=self.share_inputs["batch_id_per_token"],
866866
cu_seqlens_q=self.share_inputs["cu_seqlens_q"],
867867
cu_seqlens_k=self.share_inputs["cu_seqlens_k"],

fastdeploy/worker/worker_process.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -244,7 +244,7 @@ def event_loop_ep(self) -> None:
244244
"""
245245
while True:
246246
self.worker_healthy_live_signal.value[self.local_rank % self.max_chips_per_node] = int(time.time())
247-
247+
num_running_requests = 0
248248
if self.fd_config.parallel_config.tensor_parallel_rank == 0 and self.task_queue.num_tasks() > 0:
249249
tasks, read_finish = self.task_queue.get_tasks()
250250

@@ -271,6 +271,7 @@ def event_loop_normal(self) -> None:
271271
self.nnode = int((self.parallel_config.tensor_parallel_size + 7) // 8)
272272
mp_num_per_node = self.parallel_config.tensor_parallel_size // self.nnode
273273
req_ids = []
274+
num_running_requests = 0
274275
while True:
275276
if self.local_rank == 0:
276277
if self.model_weights_status.value[0] != 0:

0 commit comments

Comments
 (0)