Skip to content

Commit ffc9af4

Browse files
fix health timeout (#773)
Co-authored-by: shihaobai <baishihao@sensetime.com> Co-authored-by: hiworldwzj <30762946+hiworldwzj@users.noreply.github.com>
1 parent c16e7b8 commit ffc9af4

File tree

1 file changed

+5
-2
lines changed

1 file changed

+5
-2
lines changed

lightllm/utils/health_check.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@ class HealthObj:
2323
_failure_count: int = 0
2424
_failure_threshold: int = int(os.getenv("HEALTH_FAILURE_THRESHOLD", 3))
2525
timeout: int = int(os.getenv("HEALTH_TIMEOUT", 100))
26+
dynamic_timeout: int = int(os.getenv("HEALTH_TIMEOUT", 100))
2627

2728
def begin_check(self):
2829
self._is_health_checking = True
@@ -32,12 +33,14 @@ def end_check(self):
3233

3334
def set_unhealth(self):
3435
self._failure_count += 1
36+
self.dynamic_timeout += self.timeout
3537
if self._failure_count > self._failure_threshold:
3638
self._is_health = False
3739

3840
def set_health(self):
3941
self._is_health = True
4042
self._failure_count = 0
43+
self.dynamic_timeout = self.timeout
4144

4245
def is_health(self):
4346
return self._is_health
@@ -73,11 +76,11 @@ async def check_timeout(results_generator):
7376
pass
7477

7578
try:
76-
await asyncio.wait_for(check_timeout(results_generator), timeout=health_obj.timeout)
79+
await asyncio.wait_for(check_timeout(results_generator), timeout=health_obj.dynamic_timeout)
7780
health_obj.set_health()
7881
except asyncio.TimeoutError:
7982
health_obj.set_unhealth()
80-
logger.warning("Health check timeout!")
83+
logger.warning(f"Health check timeout! The failure count is: {str(health_obj._failure_count)}")
8184
return health_obj.is_health()
8285
except Exception as e:
8386
logger.exception(str(e))

0 commit comments

Comments
 (0)