Skip to content

Commit 7cb5067

Browse files
committed
fix: gracefully handling litellm.RateLimitError with a 3 seconds sleep
1 parent 75bcf6e commit 7cb5067

File tree

1 file changed

+5
-0
lines changed

1 file changed

+5
-0
lines changed

nerve/generation/litellm.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
import asyncio
12
import json
23
import traceback
34
import typing as t
@@ -77,6 +78,10 @@ async def _litellm_generate(
7778
total_tokens=response.usage.total_tokens,
7879
cost=response._hidden_params.get("response_cost", None),
7980
), response.choices[0].message
81+
except litellm.RateLimitError as e: # type: ignore
82+
logger.warning(f"rate limit exceeded, sleeping for 3 seconds: {e}")
83+
await asyncio.sleep(3)
84+
return await self._litellm_generate(conversation, tools_schema)
8085
except litellm.AuthenticationError as e: # type: ignore
8186
logger.error(e)
8287
exit(1)

0 commit comments

Comments
 (0)