77"""
88
99import asyncio
10- import os
1110from unittest .mock import patch
1211
1312import litellm
@@ -20,7 +19,7 @@ async def demo_rate_limit_handling():
2019 """Demonstrate rate limit handling with exponential backoff."""
2120 print ("🚀 Rate Limit Handling Demo" )
2221 print ("=" * 50 )
23-
22+
2423 # Create a test LLM configuration
2524 config = LLMConfig (
2625 api_key = "demo-key" ,
@@ -29,69 +28,69 @@ async def demo_rate_limit_handling():
2928 max_tokens = 1000 ,
3029 temperature = 0.1 ,
3130 )
32-
31+
3332 client = LLMClient (config )
34-
33+
3534 print ("📝 Test 1: Successful retry after rate limit errors" )
3635 print ("-" * 50 )
37-
36+
3837 # Mock response for successful case
3938 from unittest .mock import MagicMock
4039 mock_response = MagicMock ()
4140 mock_response .choices = [MagicMock ()]
4241 mock_response .choices [0 ].message .content = "Success after retries!"
43-
42+
4443 # Simulate rate limit errors followed by success
4544 with patch .object (client , "_litellm" ) as mock_litellm :
4645 mock_litellm .completion .side_effect = [
4746 litellm .RateLimitError ("Rate limit exceeded" , "openai" , "gpt-4" ),
4847 litellm .RateLimitError ("Rate limit exceeded" , "openai" , "gpt-4" ),
4948 mock_response , # Success on third attempt
5049 ]
51-
50+
5251 messages = [{"role" : "user" , "content" : "Hello, world!" }]
53-
52+
5453 try :
5554 result = await client .generate_text (messages )
5655 print (f"✅ Success: { result } " )
5756 print (f"📊 Total attempts: { mock_litellm .completion .call_count } " )
5857 except Exception as e :
5958 print (f"❌ Failed: { e } " )
60-
59+
6160 print ("\n 📝 Test 2: Rate limit exhaustion (all retries fail)" )
6261 print ("-" * 50 )
63-
62+
6463 # Simulate persistent rate limit errors
6564 with patch .object (client , "_litellm" ) as mock_litellm :
6665 mock_litellm .completion .side_effect = litellm .RateLimitError (
6766 "Rate limit exceeded" , "openai" , "gpt-4"
6867 )
69-
68+
7069 messages = [{"role" : "user" , "content" : "This will fail" }]
71-
70+
7271 try :
7372 result = await client .generate_text (messages )
7473 print (f"✅ Unexpected success: { result } " )
7574 except Exception as e :
7675 print (f"❌ Expected failure after retries: { e } " )
7776 print (f"📊 Total attempts: { mock_litellm .completion .call_count } " )
78-
77+
7978 print ("\n 📝 Test 3: Non-rate-limit error (no retries)" )
8079 print ("-" * 50 )
81-
80+
8281 # Simulate a different type of error
8382 with patch .object (client , "_litellm" ) as mock_litellm :
8483 mock_litellm .completion .side_effect = ValueError ("Invalid input" )
85-
84+
8685 messages = [{"role" : "user" , "content" : "This will fail immediately" }]
87-
86+
8887 try :
8988 result = await client .generate_text (messages )
9089 print (f"✅ Unexpected success: { result } " )
9190 except Exception as e :
9291 print (f"❌ Expected immediate failure: { e } " )
9392 print (f"📊 Total attempts: { mock_litellm .completion .call_count } " )
94-
93+
9594 print ("\n 🎯 Rate Limit Configuration" )
9695 print ("-" * 50 )
9796 print ("• Max retries: 5 attempts" )
@@ -100,7 +99,7 @@ async def demo_rate_limit_handling():
10099 print ("• Exponential base: 2.0" )
101100 print ("• Jitter: Enabled (±20%)" )
102101 print ("\n Delay pattern: ~2s, ~4s, ~8s, ~16s, ~32s" )
103-
102+
104103 print ("\n ✨ Demo completed!" )
105104
106105
0 commit comments