Skip to content

Commit a4572a5

Browse files
authored
fix bug for pd step signal (#3230)
1 parent a9d231c commit a4572a5

File tree

2 files changed

+6
-5
lines changed

2 files changed

+6
-5
lines changed

fastdeploy/splitwise/internal_adapter_utils.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,7 @@ def _get_current_server_info(self):
6161
"max_batch_size": int(available_batch_size),
6262
"max_input_token_num": self.cfg.max_num_batched_tokens,
6363
"unhandled_request_num": self.engine.scheduler.get_unhandled_request_num(),
64+
"available_batch": int(self.engine.resource_manager.available_batch()),
6465
}
6566
return server_info
6667

fastdeploy/worker/gpu_model_runner.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1268,18 +1268,18 @@ class at the server level, which is too granular for ModelRunner.
12681268
We plan to replace it with 'ModelForwardBatch'.
12691269
intermediate_tensors:
12701270
"""
1271-
# 1. Prepare inputs of model and sampler.
1272-
skip_idx_list = self._get_skip_idx(model_forward_batch)
1273-
self._prepare_inputs()
1274-
self.sampler.pre_process(skip_idx_list)
1275-
12761271
# NOTE(wufeisheng): If `not_need_stop`` is False, it means the current worker is in an idle state.
12771272
# This logic is not used in TP (Tensor Parallelism) mode. However, in EP (Expert Parallelism) mode,
12781273
# when there is data on other runner, the current runner is required to execute part of the model.
12791274
if not self.not_need_stop():
12801275
self._execute_empty_input()
12811276
return None
12821277

1278+
# 1. Prepare inputs of model and sampler.
1279+
skip_idx_list = self._get_skip_idx(model_forward_batch)
1280+
self._prepare_inputs()
1281+
self.sampler.pre_process(skip_idx_list)
1282+
12831283
# 2. Padding inputs for cuda graph
12841284
self.padding_cudagraph_inputs()
12851285

0 commit comments

Comments
 (0)