@@ -247,6 +247,31 @@ async def run_step(
247247 dn .log_metric ("max_tokens" , 1 )
248248 return None
249249
250+ # Handle caching-related errors by disabling cache and retrying
251+ if "cache_control" in str (chat .error ) and args .enable_cache :
252+ logger .warning (f"|- Caching not supported by provider, disabling cache and retrying: { chat .error } " )
253+ dn .log_metric ("cache_unsupported" , 1 )
254+ # Create new pipeline without caching
255+ retry_pipeline = (
256+ generator .wrap (backoff_wrapper )
257+ .chat (pipeline .chat .messages )
258+ .cache (False )
259+ )
260+ try :
261+ retry_chat = await retry_pipeline .catch (
262+ litellm .exceptions .InternalServerError ,
263+ litellm .exceptions .BadRequestError ,
264+ litellm .exceptions .Timeout ,
265+ litellm .exceptions .ServiceUnavailableError ,
266+ litellm .exceptions .APIConnectionError ,
267+ on_failed = "include" ,
268+ ).run ()
269+ if not retry_chat .failed :
270+ logger .info ("|- Successfully retried without cache" )
271+ return retry_pipeline
272+ except Exception as e :
273+ logger .warning (f"|- Retry without cache also failed: { e } " )
274+
250275 logger .warning (f"|- Chat failed: { chat .error } " )
251276 dn .log_metric ("failed_chats" , 1 )
252277 pipeline .chat .generated = []
0 commit comments