Merge pull request road-core#146 from TamiTakamiya/TamiTakamiya/AAP-35769/expire-llm-ready-state

tisnik · web-flow · commit c1e7361252d4 · 2024-11-25T15:58:39.000+01:00
Option to let the LLM ready status get expired
diff --git a/examples/rcsconfig.yaml b/examples/rcsconfig.yaml
@@ -72,6 +72,7 @@ ols_config:
     suppress_auth_checks_warning_in_log: false
   default_provider: my_bam
   default_model: ibm/granite-13b-chat-v2
+  expire_llm_is_ready_persistent_state: -1
   # query_filters:
   #   - name: foo_filter
   #     pattern: '\b(?:foo)\b'
diff --git a/ols/app/endpoints/health.py b/ols/app/endpoints/health.py
@@ -6,6 +6,7 @@
 """
 
 import logging
+import time
 from typing import Any
 
 from fastapi import APIRouter, HTTPException, status
@@ -22,6 +23,7 @@
 router = APIRouter(tags=["health"])
 logger = logging.getLogger(__name__)
 llm_is_ready_persistent_state: bool = False
+llm_is_ready_timestamp = 0
 
 
 def llm_is_ready() -> bool:
@@ -30,9 +32,17 @@ def llm_is_ready() -> bool:
     If so, store the success to `llm_is_ready_persistent_state` to cache
     the result for future calls.
     """
-    global llm_is_ready_persistent_state  # pylint: disable=global-statement
-    if llm_is_ready_persistent_state is True:
+    global llm_is_ready_persistent_state, llm_is_ready_timestamp  # pylint: disable=global-statement
+    last_called, llm_is_ready_timestamp = llm_is_ready_timestamp, int(time.time())
+    if llm_is_ready_persistent_state is True and (
+        not config.ols_config.expire_llm_is_ready_persistent_state
+        or config.ols_config.expire_llm_is_ready_persistent_state < 0
+        or (llm_is_ready_timestamp - last_called)
+        < config.ols_config.expire_llm_is_ready_persistent_state
+    ):
         return True
+    # Reset `llm_is_ready_persistent_state`
+    llm_is_ready_persistent_state = False
     try:
         bare_llm = load_llm(
             config.ols_config.default_provider, config.ols_config.default_model
diff --git a/ols/app/models/config.py b/ols/app/models/config.py
@@ -949,6 +949,7 @@ class OLSConfig(BaseModel):
 
     default_provider: Optional[str] = None
     default_model: Optional[str] = None
+    expire_llm_is_ready_persistent_state: Optional[int] = -1
     max_workers: Optional[int] = None
     query_filters: Optional[list[QueryFilter]] = None
     query_validation_method: Optional[str] = constants.QueryValidationMethod.DISABLED
diff --git a/tests/unit/app/endpoints/test_health.py b/tests/unit/app/endpoints/test_health.py
@@ -1,5 +1,6 @@
 """Unit tests for health endpoints handlers."""
 
+import time
 from unittest.mock import patch
 
 import pytest
@@ -68,6 +69,52 @@ def test_readiness_probe_llm_check__state_cache(mocked_load_llm):
     assert mocked_load_llm.call_count == 1
 
 
+@patch("ols.app.endpoints.health.llm_is_ready_persistent_state", new=False)
+@patch("ols.app.endpoints.health.load_llm")
+def test_readiness_probe_llm_check__state_cache_not_expired(mocked_load_llm):
+    """Test the scenario with cache not expired - LLM check is done only once."""
+    try:
+        # Set cache expiration time to 1 sec.
+        config.ols_config.expire_llm_is_ready_persistent_state = 1
+        mocked_load_llm.return_value = MockedLLM(invoke_return="message")
+        assert llm_is_ready()
+        assert mocked_load_llm.call_count == 1
+
+        response = readiness_probe_get_method()
+        assert response == ReadinessResponse(ready=True, reason="service is ready")
+
+        # try again and check if the llm function was invoked again - it shouldn't
+        llm_is_ready()
+        assert mocked_load_llm.call_count == 1
+    finally:
+        # Reset the expire_llm_is_ready_persistent_state option.
+        config.ols_config.expire_llm_is_ready_persistent_state = -1
+
+
+@patch("ols.app.endpoints.health.llm_is_ready_persistent_state", new=False)
+@patch("ols.app.endpoints.health.load_llm")
+def test_readiness_probe_llm_check__state_cache_expired(mocked_load_llm):
+    """Test the scenario with cache expired - LLM check is done twice."""
+    try:
+        # Set cache expiration time to 1 sec.
+        config.ols_config.expire_llm_is_ready_persistent_state = 1
+        mocked_load_llm.return_value = MockedLLM(invoke_return="message")
+        assert llm_is_ready()
+        assert mocked_load_llm.call_count == 1
+
+        response = readiness_probe_get_method()
+        assert response == ReadinessResponse(ready=True, reason="service is ready")
+        # Wait for 1.5 secs and let the cache get expired.
+        time.sleep(1.5)
+
+        # try again and check if the llm function was invoked again - it should.
+        llm_is_ready()
+        assert mocked_load_llm.call_count == 2
+    finally:
+        # Reset the expire_llm_is_ready_persistent_state option.
+        config.ols_config.expire_llm_is_ready_persistent_state = -1
+
+
 @patch("ols.app.endpoints.health.llm_is_ready_persistent_state", new=False)
 @patch("ols.app.endpoints.health.load_llm")
 def test_readiness_probe_llm_check__llm_raise(mocked_load_llm):