diff --git a/fastdeploy/entrypoints/openai/api_server.py b/fastdeploy/entrypoints/openai/api_server.py index a87f742aed..0a4e5935f8 100644 --- a/fastdeploy/entrypoints/openai/api_server.py +++ b/fastdeploy/entrypoints/openai/api_server.py @@ -517,6 +517,7 @@ def control_scheduler(request: ControlSchedulerRequest): return JSONResponse(content=content.model_dump(), status_code=500) if request.reset: + llm_engine.engine.clear_data() llm_engine.engine.scheduler.reset() if request.load_shards_num or request.reallocate_shard: diff --git a/fastdeploy/rl/dynamic_weight_manager.py b/fastdeploy/rl/dynamic_weight_manager.py index 6d55c3e414..cc714f79dc 100644 --- a/fastdeploy/rl/dynamic_weight_manager.py +++ b/fastdeploy/rl/dynamic_weight_manager.py @@ -256,6 +256,7 @@ def check_model_weights_status(model_weights_status, model_runner, pid): while model_weights_status.value[0] != 0: if model_weights_status.value[0] == 1: logger.info("infer engine stopped! start to load new checkpoint...") + model_runner.clear_requests() model_runner.update_parameters(pid) elif model_weights_status.value[0] == -1: logger.info("infer engine stopped! start to clear checkpoint...") diff --git a/fastdeploy/worker/worker_process.py b/fastdeploy/worker/worker_process.py index 81e66cc076..0405dc1baf 100644 --- a/fastdeploy/worker/worker_process.py +++ b/fastdeploy/worker/worker_process.py @@ -332,6 +332,8 @@ def event_loop_normal(self) -> None: self.worker.model_runner, self.parallel_config.engine_worker_queue_port, ) + logger.info(f"current task queue data: {self.task_queue.num_tasks()}") + self.task_queue.clear_data() self.model_weights_signal[0] = 0 logger.info(f"Rank: {self.local_rank} has updated or cleared parameters.")