feat: add cache retry logic for unsupported providers

GangGreenTemperTatum · GangGreenTemperTatum · commit 6ada353c4a9b · 2025-07-02T11:41:22.000-04:00
diff --git a/airtbench/main.py b/airtbench/main.py
@@ -247,6 +247,31 @@ async def run_step(
             dn.log_metric("max_tokens", 1)
             return None
 
+        # Handle caching-related errors by disabling cache and retrying
+        if "cache_control" in str(chat.error) and args.enable_cache:
+            logger.warning(f"|- Caching not supported by provider, disabling cache and retrying: {chat.error}")
+            dn.log_metric("cache_unsupported", 1)
+            # Create new pipeline without caching
+            retry_pipeline = (
+                generator.wrap(backoff_wrapper)
+                .chat(pipeline.chat.messages)
+                .cache(False)
+            )
+            try:
+                retry_chat = await retry_pipeline.catch(
+                    litellm.exceptions.InternalServerError,
+                    litellm.exceptions.BadRequestError,
+                    litellm.exceptions.Timeout,
+                    litellm.exceptions.ServiceUnavailableError,
+                    litellm.exceptions.APIConnectionError,
+                    on_failed="include",
+                ).run()
+                if not retry_chat.failed:
+                    logger.info("|- Successfully retried without cache")
+                    return retry_pipeline
+            except Exception as e:
+                logger.warning(f"|- Retry without cache also failed: {e}")
+
         logger.warning(f"|- Chat failed: {chat.error}")
         dn.log_metric("failed_chats", 1)
         pipeline.chat.generated = []