Skip to content

Commit 669c6e7

Browse files
committed
Option to let the LLM ready status get expired
1 parent 3ea924f commit 669c6e7

File tree

4 files changed

+61
-2
lines changed

4 files changed

+61
-2
lines changed

examples/rcsconfig.yaml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -72,6 +72,7 @@ ols_config:
7272
suppress_auth_checks_warning_in_log: false
7373
default_provider: my_bam
7474
default_model: ibm/granite-13b-chat-v2
75+
expire_llm_is_ready_persistent_state: -1
7576
# query_filters:
7677
# - name: foo_filter
7778
# pattern: '\b(?:foo)\b'

ols/app/endpoints/health.py

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
"""
77

88
import logging
9+
import time
910
from typing import Any
1011

1112
from fastapi import APIRouter, HTTPException, status
@@ -22,6 +23,7 @@
2223
router = APIRouter(tags=["health"])
2324
logger = logging.getLogger(__name__)
2425
llm_is_ready_persistent_state: bool = False
26+
llm_is_ready_timestamp = 0
2527

2628

2729
def llm_is_ready() -> bool:
@@ -30,9 +32,17 @@ def llm_is_ready() -> bool:
3032
If so, store the success to `llm_is_ready_persistent_state` to cache
3133
the result for future calls.
3234
"""
33-
global llm_is_ready_persistent_state # pylint: disable=global-statement
34-
if llm_is_ready_persistent_state is True:
35+
global llm_is_ready_persistent_state, llm_is_ready_timestamp # pylint: disable=global-statement
36+
last_called, llm_is_ready_timestamp = llm_is_ready_timestamp, int(time.time())
37+
if llm_is_ready_persistent_state is True and (
38+
not config.ols_config.expire_llm_is_ready_persistent_state
39+
or config.ols_config.expire_llm_is_ready_persistent_state < 0
40+
or (llm_is_ready_timestamp - last_called)
41+
< config.ols_config.expire_llm_is_ready_persistent_state
42+
):
3543
return True
44+
# Reset `llm_is_ready_persistent_state`
45+
llm_is_ready_persistent_state = False
3646
try:
3747
bare_llm = load_llm(
3848
config.ols_config.default_provider, config.ols_config.default_model

ols/app/models/config.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -949,6 +949,7 @@ class OLSConfig(BaseModel):
949949

950950
default_provider: Optional[str] = None
951951
default_model: Optional[str] = None
952+
expire_llm_is_ready_persistent_state: Optional[int] = -1
952953
max_workers: Optional[int] = None
953954
query_filters: Optional[list[QueryFilter]] = None
954955
query_validation_method: Optional[str] = constants.QueryValidationMethod.DISABLED

tests/unit/app/endpoints/test_health.py

Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
"""Unit tests for health endpoints handlers."""
22

3+
import time
34
from unittest.mock import patch
45

56
import pytest
@@ -68,6 +69,52 @@ def test_readiness_probe_llm_check__state_cache(mocked_load_llm):
6869
assert mocked_load_llm.call_count == 1
6970

7071

72+
@patch("ols.app.endpoints.health.llm_is_ready_persistent_state", new=False)
73+
@patch("ols.app.endpoints.health.load_llm")
74+
def test_readiness_probe_llm_check__state_cache_not_expired(mocked_load_llm):
75+
"""Test the scenario with cache not expired - LLM check is done only once."""
76+
try:
77+
# Set cache expiration time to 1 sec.
78+
config.ols_config.expire_llm_is_ready_persistent_state = 1
79+
mocked_load_llm.return_value = MockedLLM(invoke_return="message")
80+
assert llm_is_ready()
81+
assert mocked_load_llm.call_count == 1
82+
83+
response = readiness_probe_get_method()
84+
assert response == ReadinessResponse(ready=True, reason="service is ready")
85+
86+
# try again and check if the llm function was invoked again - it shouldn't
87+
llm_is_ready()
88+
assert mocked_load_llm.call_count == 1
89+
finally:
90+
# Reset the expire_llm_is_ready_persistent_state option.
91+
config.ols_config.expire_llm_is_ready_persistent_state = -1
92+
93+
94+
@patch("ols.app.endpoints.health.llm_is_ready_persistent_state", new=False)
95+
@patch("ols.app.endpoints.health.load_llm")
96+
def test_readiness_probe_llm_check__state_cache_expired(mocked_load_llm):
97+
"""Test the scenario with cache expired - LLM check is done twice."""
98+
try:
99+
# Set cache expiration time to 1 sec.
100+
config.ols_config.expire_llm_is_ready_persistent_state = 1
101+
mocked_load_llm.return_value = MockedLLM(invoke_return="message")
102+
assert llm_is_ready()
103+
assert mocked_load_llm.call_count == 1
104+
105+
response = readiness_probe_get_method()
106+
assert response == ReadinessResponse(ready=True, reason="service is ready")
107+
# Wait for 1.5 secs and let the cache get expired.
108+
time.sleep(1.5)
109+
110+
# try again and check if the llm function was invoked again - it should.
111+
llm_is_ready()
112+
assert mocked_load_llm.call_count == 2
113+
finally:
114+
# Reset the expire_llm_is_ready_persistent_state option.
115+
config.ols_config.expire_llm_is_ready_persistent_state = -1
116+
117+
71118
@patch("ols.app.endpoints.health.llm_is_ready_persistent_state", new=False)
72119
@patch("ols.app.endpoints.health.load_llm")
73120
def test_readiness_probe_llm_check__llm_raise(mocked_load_llm):

0 commit comments

Comments
 (0)