Skip to content

Commit 1be3398

Browse files
committed
update healh check
1 parent 84d35d3 commit 1be3398

File tree

2 files changed

+20
-1
lines changed

2 files changed

+20
-1
lines changed

lightllm/server/router/manager.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@
2929
from lightllm.utils.log_utils import init_logger, log_time_ready
3030
from lightllm.server.router.token_load import TokenLoad
3131
from lightllm.server.metrics.manager import MetricClient
32+
from lightllm.server.router.dynamic_prompt.shared_arr import SharedInt
3233
from lightllm.common.basemodel.infer_lock import g_router_lock
3334
from lightllm.common.mem_manager import ReadOnlyStaticsMemoryManager
3435
from lightllm.utils.graceful_utils import graceful_registry
@@ -71,6 +72,11 @@ def __init__(self, args, router_port, detokenization_port, metric_port):
7172
self.shared_token_load.set_logical_max_load(0.0, dp_index)
7273
self.shared_token_load.set_dynamic_max_load(0.0, dp_index)
7374

75+
# The timemark of the latest inference(prefill/decode) which is used to check the health status of the system.
76+
# If the timemark is not updated for a pre-set time, a prob request will be sent to the backend.
77+
self.latest_req_inference_timemark = SharedInt(f"{get_unique_server_name()}_latest_req_inference_timemark")
78+
self.latest_req_inference_timemark.set_value(int(time.time()))
79+
7480
self.pause_strategy = Fcfs()
7581
self.running_batch: Batch = None
7682
self.eos_id = args.eos_id
@@ -389,6 +395,8 @@ async def _prefill_batch(self, batch: Batch):
389395
self.metric_client.histogram_observe(
390396
"lightllm_batch_inference_duration_bucket", time.time() - start_time, "prefill"
391397
)
398+
# update inference timemark
399+
self.latest_req_inference_timemark.set_value(int(time.time()))
392400
return
393401

394402
async def _decode_batch(self, batch: Batch):
@@ -404,6 +412,8 @@ async def _decode_batch(self, batch: Batch):
404412
self.metric_client.histogram_observe(
405413
"lightllm_batch_inference_duration_bucket", time.time() - start_time, "decode"
406414
)
415+
# update inference timemark
416+
self.latest_req_inference_timemark.set_value(int(time.time()))
407417
return
408418

409419
async def _pause_reqs(self, pasue_reqs):

lightllm/utils/health_check.py

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,16 @@
11
import os
2+
import time
23
import asyncio
34
import numpy as np
45
from dataclasses import dataclass
56
from lightllm.server.core.objs import SamplingParams
67
from lightllm.server.multimodal_params import MultimodalParams
78
from lightllm.server.httpserver.manager import HttpServerManager
9+
from lightllm.server.router.dynamic_prompt.shared_arr import SharedInt
810
from fastapi import Request
911
from lightllm.server.req_id_generator import ReqIDGenerator
1012
from lightllm.utils.log_utils import init_logger
13+
from lightllm.utils.envs_utils import get_unique_server_name
1114

1215
logger = init_logger(__name__)
1316

@@ -24,6 +27,7 @@ class HealthObj:
2427
_failure_threshold: int = int(os.getenv("HEALTH_FAILURE_THRESHOLD", 3))
2528
timeout: int = int(os.getenv("HEALTH_TIMEOUT", 100))
2629
dynamic_timeout: int = int(os.getenv("HEALTH_TIMEOUT", 100))
30+
latest_req_inferece_timemark = SharedInt(f"{get_unique_server_name()}_latest_req_inference_timemark")
2731

2832
def begin_check(self):
2933
self._is_health_checking = True
@@ -48,12 +52,17 @@ def is_health(self):
4852
def is_checking(self):
4953
return self._is_health_checking
5054

55+
def has_latest_inference(self):
56+
last_timemark = self.latest_req_inferece_timemark.get_value()
57+
time_diff = time.time() - last_timemark
58+
return time_diff < self.timeout
59+
5160

5261
health_obj = HealthObj()
5362

5463

5564
async def health_check(args, httpserver_manager: HttpServerManager, request: Request):
56-
if health_obj.is_checking():
65+
if health_obj.is_checking() or health_obj.has_latest_inference():
5766
return health_obj.is_health()
5867
health_obj.begin_check()
5968
try:

0 commit comments

Comments
 (0)