Skip to content

Commit c29f685

Browse files
shihaobaiXHPlus
authored andcommitted
update healh check (ModelTC#824)
1 parent e11ae42 commit c29f685

File tree

2 files changed

+22
-0
lines changed

2 files changed

+22
-0
lines changed

lightllm/server/httpserver/manager.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,12 +27,14 @@
2727
from lightllm.server.core.objs.io_objs import GroupReqObjs
2828
from fastapi import Request
2929
from lightllm.server.core.objs.shm_req_manager import ShmReqManager
30+
from lightllm.server.router.dynamic_prompt.shared_arr import SharedInt
3031
from lightllm.utils.log_utils import init_logger
3132
from lightllm.utils.envs_utils import get_env_start_args
3233
from lightllm.server.metrics.manager import MetricClient
3334
from lightllm.utils.statics_utils import MovingAverage
3435
from lightllm.utils.net_utils import get_hostname_ip
3536
from lightllm.utils.config_utils import get_vocab_size
37+
from lightllm.utils.envs_utils import get_unique_server_name
3638

3739
logger = init_logger(__name__)
3840

@@ -107,6 +109,10 @@ def __init__(
107109
# 有的模型的vocab size 读取tokenizer和config.json中不一致
108110
self.vocab_size = max(get_vocab_size(args.model_dir), self.tokenizer.vocab_size)
109111

112+
# The timemark of the latest inference(prefill/decode) which is used to check the health status of the system.
113+
# If the timemark is not updated for a pre-set time, a prob request will be sent to the backend.
114+
self.latest_success_infer_time_mark = SharedInt(f"{get_unique_server_name()}_latest_success_infer_time_mark")
115+
self.latest_success_infer_time_mark.set_value(int(time.time()))
110116
return
111117

112118
# connect cache server, calculate md5, alloc resource, return uuid
@@ -491,6 +497,9 @@ async def _wait_to_token_package(
491497

492498
out_token_counter += 1
493499

500+
# update inference timemark
501+
self.latest_success_infer_time_mark.set_value(int(time.time()))
502+
494503
yield sub_req_id, out_str, metadata, finish_status
495504
# 如果有子请求完成,就更新计数
496505
if finish_status.is_finished():

lightllm/utils/health_check.py

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,16 @@
11
import os
2+
import time
23
import asyncio
34
import numpy as np
45
from dataclasses import dataclass
56
from lightllm.server.core.objs import SamplingParams
67
from lightllm.server.multimodal_params import MultimodalParams
78
from lightllm.server.httpserver.manager import HttpServerManager
9+
from lightllm.server.router.dynamic_prompt.shared_arr import SharedInt
810
from fastapi import Request
911
from lightllm.server.req_id_generator import ReqIDGenerator
1012
from lightllm.utils.log_utils import init_logger
13+
from lightllm.utils.envs_utils import get_unique_server_name
1114

1215
logger = init_logger(__name__)
1316

@@ -24,6 +27,7 @@ class HealthObj:
2427
_failure_threshold: int = int(os.getenv("HEALTH_FAILURE_THRESHOLD", 3))
2528
timeout: int = int(os.getenv("HEALTH_TIMEOUT", 100))
2629
dynamic_timeout: int = int(os.getenv("HEALTH_TIMEOUT", 100))
30+
latest_success_infer_time_mark = SharedInt(f"{get_unique_server_name()}_latest_success_infer_time_mark")
2731

2832
def begin_check(self):
2933
self._is_health_checking = True
@@ -48,13 +52,22 @@ def is_health(self):
4852
def is_checking(self):
4953
return self._is_health_checking
5054

55+
def has_latest_inference(self):
56+
last_timemark = self.latest_success_infer_time_mark.get_value()
57+
time_diff = time.time() - last_timemark
58+
return time_diff < self.timeout
59+
5160

5261
health_obj = HealthObj()
5362

5463

5564
async def health_check(args, httpserver_manager: HttpServerManager, request: Request):
5665
if health_obj.is_checking():
5766
return health_obj.is_health()
67+
68+
if health_obj.is_health() and health_obj.has_latest_inference():
69+
return health_obj.is_health()
70+
5871
health_obj.begin_check()
5972
try:
6073
request_dict = {"inputs": "你好!", "parameters": {"do_sample": True, "temperature": 0.8, "max_new_tokens": 2}}

0 commit comments

Comments
 (0)