2929from lightllm .utils .log_utils import init_logger , log_time_ready
3030from lightllm .server .router .token_load import TokenLoad
3131from lightllm .server .metrics .manager import MetricClient
32+ from lightllm .server .router .dynamic_prompt .shared_arr import SharedInt
3233from lightllm .common .basemodel .infer_lock import g_router_lock
3334from lightllm .common .mem_manager import ReadOnlyStaticsMemoryManager
3435from lightllm .utils .graceful_utils import graceful_registry
@@ -71,6 +72,11 @@ def __init__(self, args, router_port, detokenization_port, metric_port):
7172 self .shared_token_load .set_logical_max_load (0.0 , dp_index )
7273 self .shared_token_load .set_dynamic_max_load (0.0 , dp_index )
7374
75+ # The timemark of the latest inference(prefill/decode) which is used to check the health status of the system.
76+ # If the timemark is not updated for a pre-set time, a prob request will be sent to the backend.
77+ self .latest_req_inference_timemark = SharedInt (f"{ get_unique_server_name ()} _latest_req_inference_timemark" )
78+ self .latest_req_inference_timemark .set_value (int (time .time ()))
79+
7480 self .pause_strategy = Fcfs ()
7581 self .running_batch : Batch = None
7682 self .eos_id = args .eos_id
@@ -389,6 +395,8 @@ async def _prefill_batch(self, batch: Batch):
389395 self .metric_client .histogram_observe (
390396 "lightllm_batch_inference_duration_bucket" , time .time () - start_time , "prefill"
391397 )
398+ # update inference timemark
399+ self .latest_req_inference_timemark .set_value (int (time .time ()))
392400 return
393401
394402 async def _decode_batch (self , batch : Batch ):
@@ -404,6 +412,8 @@ async def _decode_batch(self, batch: Batch):
404412 self .metric_client .histogram_observe (
405413 "lightllm_batch_inference_duration_bucket" , time .time () - start_time , "decode"
406414 )
415+ # update inference timemark
416+ self .latest_req_inference_timemark .set_value (int (time .time ()))
407417 return
408418
409419 async def _pause_reqs (self , pasue_reqs ):
0 commit comments