@@ -154,22 +154,27 @@ The LLM Manager provides provider-agnostic access with automatic fallback:
154154
155155``` python
156156from spoon_ai.llm.manager import LLMManager
157+ from spoon_ai.schema import Message
158+ import asyncio
157159
158- # Initialize with multiple providers
159- # Note: Check each provider's docs for latest model names
160- llm_manager = LLMManager(
161- primary_provider = " openai" ,
162- fallback_providers = [" anthropic" , " gemini" ],
163- model_preferences = {
164- " openai" : " gpt-5.1-chat-latest" ,
165- " anthropic" : " claude-sonnet-4-20250514" ,
166- " gemini" : " gemini-3-pro" ,
167- " deepseek" : " deepseek-reasoner"
168- }
169- )
160+ # Initialize LLM Manager
161+ llm_manager = LLMManager()
162+
163+ # Clear default_provider so fallback_chain takes precedence
164+ llm_manager.default_provider = None
170165
171- # Use with automatic fallback
172- response = await llm_manager.generate(" Explain quantum computing" )
166+ # Set fallback chain (primary provider first, then fallbacks)
167+ llm_manager.set_fallback_chain([" gemini" , " openai" ])
168+
169+ async def main ():
170+ # Create messages
171+ messages = [Message(role = " user" , content = " Explain quantum computing in one sentence" )]
172+ response = await llm_manager.chat(messages)
173+ print (f " Response: { response.content} " )
174+ print (f " Provider used: { response.provider} " )
175+
176+ if __name__ == " __main__" :
177+ asyncio.run(main())
173178```
174179
175180## Configuration
@@ -209,46 +214,80 @@ DEFAULT_TEMPERATURE=0.3
209214### Response Caching
210215
211216``` python
212- from spoon_ai.llm.cache import LLMResponseCache
217+ from spoon_ai.llm.cache import LLMResponseCache, CachedLLMManager
218+ from spoon_ai.llm.manager import LLMManager
219+ from spoon_ai.schema import Message
220+ import asyncio
221+
213222
214223# Enable response caching to avoid redundant API calls
215224cache = LLMResponseCache()
216- llm = ChatBot(
217- model_name = " claude-sonnet-4-20250514" ,
218- llm_provider = " anthropic" ,
219- )
220- # Cache is automatically managed by the framework
221- ```
225+ llm_manager = LLMManager()
226+ cached_manager = CachedLLMManager(llm_manager, cache = cache)
227+
228+ async def main ():
229+ messages = [Message(role = " user" , content = " Explain quantum computing in one sentence" )]
230+ response1 = await cached_manager.chat(messages)
231+ print (response1)
232+
233+ if __name__ == " __main__" :
234+ asyncio.run(main())
222235
223236# ## Streaming Responses
224237
225238```python
226239# Stream responses for real-time interaction
227- async for chunk in llm.stream(" Write a long story about AI" ):
228- print (chunk, end = " " , flush = True )
240+ import asyncio
241+ from spoon_ai.chat import ChatBot
242+
243+ async def main ():
244+ # Create a ChatBot instance
245+ llm = ChatBot(
246+ model_name = " gpt-5.1-chat-latest" ,
247+ llm_provider = " openai" ,
248+ temperature = 0.7
249+ )
250+
251+ # Prepare messages
252+ messages = [{" role" : " user" , " content" : " Write a long story about AI" }]
253+
254+ # Stream the response chunk by chunk
255+ async for chunk in llm.astream(messages):
256+ # chunk.delta contains the text content of this chunk
257+ print (chunk.delta, end = " " , flush = True )
258+
259+ if __name__ == " __main__" :
260+ asyncio.run(main())
229261```
230262
231263### Function Calling
232264
233265``` python
234266# Define functions for the model to call
235- functions = [
267+ tools = [
236268 {
237- " name" : " get_weather" ,
238- " description" : " Get current weather" ,
239- " parameters" : {
240- " type" : " object" ,
241- " properties" : {
242- " location" : {" type" : " string" }
269+ " type" : " function" ,
270+ " function" : {
271+ " name" : " get_weather" ,
272+ " description" : " Get current weather for a location" ,
273+ " parameters" : {
274+ " type" : " object" ,
275+ " properties" : {
276+ " location" : {
277+ " type" : " string" ,
278+ " description" : " The city and state, e.g. San Francisco, CA"
279+ }
280+ },
281+ " required" : [" location" ]
243282 }
244283 }
245284 }
246285]
247286
248- response = await llm.generate (
249- " What's the weather in New York? " ,
250- functions = functions
251- )
287+ response = await llm.ask_tool (
288+ messages = messages ,
289+ tools = tools
290+ )
252291```
253292
254293## Model Selection Guide
@@ -296,18 +335,28 @@ response = await llm.generate(
296335The framework provides built-in error handling with automatic fallback between providers:
297336
298337``` python
338+ """
339+ LLMManager with fallback chain demo - demonstrates automatic provider fallback.
340+ """
299341from spoon_ai.llm.manager import LLMManager
342+ from spoon_ai.schema import Message
343+ import asyncio
300344
301- # Configure fallback chain - errors are handled automatically
302- llm_manager = LLMManager(
303- primary_provider = " openai" ,
304- fallback_providers = [" anthropic" , " google" ],
305- retry_attempts = 3 ,
306- timeout = 30
307- )
345+ # Initialize LLM Manager
346+ llm_manager = LLMManager()
347+ # Clear default_provider so fallback_chain takes precedence
348+ llm_manager.default_provider = None
349+ # The manager will try providers in order: gemini -> openai -> anthropic
350+ llm_manager.set_fallback_chain([" gemini" , " openai" , " anthropic" ])
308351
309- # Automatic fallback on provider failures
310- response = await llm_manager.generate(" Hello world" )
352+ async def main ():
353+ # Create messages
354+ messages = [Message(role = " user" , content = " Hello world" )]
355+ response = await llm_manager.chat(messages)
356+ print (response.content)
357+
358+ if __name__ == " __main__" :
359+ asyncio.run(main())
311360```
312361
313362### Error Types & Recovery
@@ -318,7 +367,7 @@ The framework uses structured error types for clean error handling:
318367from spoon_ai.llm.errors import RateLimitError, AuthenticationError, ModelNotFoundError
319368
320369# Simple error handling with specific error types
321- response = await llm.generate( " Hello world" )
370+ response = await llm.ask([{ " role " : " user " , " content " : " Hello world" }] )
322371
323372# Framework handles common errors automatically:
324373# - Rate limits: automatic retry with backoff
@@ -331,15 +380,14 @@ response = await llm.generate("Hello world")
331380
332381``` python
333382# Framework provides graceful degradation patterns
334- llm_manager = LLMManager(
335- primary_provider = " openai" ,
336- fallback_providers = [" deepseek" , " gemini" ], # Cost-effective fallbacks
337- enable_graceful_degradation = True
338- )
383+ llm_manager = LLMManager()
384+ llm_manager.default_provider = " openai"
385+ llm_manager.set_fallback_chain([" openai" , " deepseek" , " gemini" ]) # Cost-effective fallbacks
339386
340387# If primary fails, automatically uses fallback
341388# No manual error handling required
342- response = await llm_manager.generate(" Complex reasoning task" )
389+ messages = [Message(role = " user" , content = " Complex reasoning task: Explain quantum computing and its applications" )]
390+ await llm_manager.chat(messages)
343391```
344392
345393## Monitoring & Metrics
@@ -356,9 +404,14 @@ collector = get_metrics_collector()
356404response = await llm.ask([{" role" : " user" , " content" : " Hello" }])
357405
358406# Get collected stats per provider
359- stats = collector.get_stats(" openai" )
360- print (f " Total requests: { stats.total_requests} " )
361- print (f " Average latency: { stats.average_latency:.2f } s " )
407+ stats = collector.get_provider_stats(" openai" )
408+ print (f " Total requests: { stats.total_requests} " )
409+ print (f " Successful requests: { stats.successful_requests} " )
410+ print (f " Failed requests: { stats.failed_requests} " )
411+ print (f " Success rate: { stats.success_rate:.2f } % " )
412+ print (f " Average duration: { stats.average_duration:.3f } s " )
413+ print (f " Total tokens: { stats.total_tokens} " )
414+ print (f " Total cost: $ { stats.total_cost:.6f } " )
362415```
363416
364417### Performance Monitoring
@@ -372,9 +425,17 @@ print(f"Average latency: {stats.average_latency:.2f}s")
372425
373426# Access provider-specific stats
374427for provider in [" openai" , " anthropic" , " gemini" ]:
375- stats = collector.get_stats(provider)
376- if stats.total_requests > 0 :
377- print (f " { provider} : { stats.total_requests} requests, { stats.error_count} errors " )
428+ stats = collector.get_provider_stats(provider)
429+ if stats and stats.total_requests > 0 :
430+ print (f " { provider} : { stats.total_requests} requests, { stats.failed_requests} errors " )
431+
432+ # Access provider-specific stats
433+ all_stats = collector.get_all_stats()
434+ if all_stats:
435+ print (f " \n 📈 All Providers Summary: " )
436+ for provider_name, provider_stats in all_stats.items():
437+ print (f " { provider_name} : { provider_stats.total_requests} requests, "
438+ f " { provider_stats.success_rate:.1f } % success rate " )
378439```
379440
380441## Best Practices
@@ -408,7 +469,8 @@ The SpoonOS framework follows a "fail-fast, recover-gracefully" approach:
408469
409470``` python
410471# Preferred: Let framework handle errors
411- response = await llm_manager.generate(" Hello world" )
472+ messages = [Message(role = " user" , content = " Hello world" )]
473+ response = await llm_manager.chat(" Hello world" )
412474
413475# Only use explicit error handling for custom business logic
414476if response.provider != " openai" :
0 commit comments