diff --git a/litellm/caching/redis_cache.py b/litellm/caching/redis_cache.py index af7468ba14c8..351c412e4238 100644 --- a/litellm/caching/redis_cache.py +++ b/litellm/caching/redis_cache.py @@ -822,7 +822,8 @@ async def async_get_cache( print_verbose( f"Got Async Redis Cache: key: {key}, cached_response {cached_response}" ) - response = self._get_cache_logic(cached_response=cached_response) + # Parse cached response off the event loop if it is potentially blocking (large payload etc) + response = await asyncio.to_thread(self._get_cache_logic, cached_response) end_time = time.time() _duration = end_time - start_time diff --git a/litellm/proxy/health_check_utils/shared_health_check_manager.py b/litellm/proxy/health_check_utils/shared_health_check_manager.py index cfd03a4d1789..befe1070b13a 100644 --- a/litellm/proxy/health_check_utils/shared_health_check_manager.py +++ b/litellm/proxy/health_check_utils/shared_health_check_manager.py @@ -128,7 +128,7 @@ async def get_cached_health_check_results(self) -> Optional[Dict[str, Any]]: # Parse the cached data if isinstance(cached_data, str): - cached_results = json.loads(cached_data) + cached_results = await asyncio.to_thread(json.loads, cached_data) else: cached_results = cached_data