fix: gracefully handling litellm.RateLimitError with a 3 seconds sleep

evilsocket · evilsocket · commit 7cb5067afeb0 · 2025-04-03T20:29:43.000+02:00
diff --git a/nerve/generation/litellm.py b/nerve/generation/litellm.py
@@ -1,3 +1,4 @@
+import asyncio
 import json
 import traceback
 import typing as t
@@ -77,6 +78,10 @@ async def _litellm_generate(
                 total_tokens=response.usage.total_tokens,
                 cost=response._hidden_params.get("response_cost", None),
             ), response.choices[0].message
+        except litellm.RateLimitError as e:  # type: ignore
+            logger.warning(f"rate limit exceeded, sleeping for 3 seconds: {e}")
+            await asyncio.sleep(3)
+            return await self._litellm_generate(conversation, tools_schema)
         except litellm.AuthenticationError as e:  # type: ignore
             logger.error(e)
             exit(1)