11import os
2+ import time
23import asyncio
34import numpy as np
45from dataclasses import dataclass
56from lightllm .server .core .objs import SamplingParams
67from lightllm .server .multimodal_params import MultimodalParams
78from lightllm .server .httpserver .manager import HttpServerManager
9+ from lightllm .server .router .dynamic_prompt .shared_arr import SharedInt
810from fastapi import Request
911from lightllm .server .req_id_generator import ReqIDGenerator
1012from lightllm .utils .log_utils import init_logger
13+ from lightllm .utils .envs_utils import get_unique_server_name
1114
1215logger = init_logger (__name__ )
1316
@@ -24,6 +27,7 @@ class HealthObj:
2427 _failure_threshold : int = int (os .getenv ("HEALTH_FAILURE_THRESHOLD" , 3 ))
2528 timeout : int = int (os .getenv ("HEALTH_TIMEOUT" , 100 ))
2629 dynamic_timeout : int = int (os .getenv ("HEALTH_TIMEOUT" , 100 ))
30+ latest_success_infer_time_mark = SharedInt (f"{ get_unique_server_name ()} _latest_success_infer_time_mark" )
2731
2832 def begin_check (self ):
2933 self ._is_health_checking = True
@@ -48,13 +52,22 @@ def is_health(self):
4852 def is_checking (self ):
4953 return self ._is_health_checking
5054
55+ def has_latest_inference (self ):
56+ last_timemark = self .latest_success_infer_time_mark .get_value ()
57+ time_diff = time .time () - last_timemark
58+ return time_diff < self .timeout
59+
5160
5261health_obj = HealthObj ()
5362
5463
5564async def health_check (args , httpserver_manager : HttpServerManager , request : Request ):
5665 if health_obj .is_checking ():
5766 return health_obj .is_health ()
67+
68+ if health_obj .is_health () and health_obj .has_latest_inference ():
69+ return health_obj .is_health ()
70+
5871 health_obj .begin_check ()
5972 try :
6073 request_dict = {"inputs" : "你好!" , "parameters" : {"do_sample" : True , "temperature" : 0.8 , "max_new_tokens" : 2 }}
0 commit comments