@@ -73,7 +73,7 @@ asyncio.run(main())
7373
7474### OpenAI
7575
76- - ** Models** : gpt -5.1-chat-latest (default) , GPT-4o, GPT-4o-mini, o1-preview, o1-mini
76+ - ** Models** : GPT -5.1, GPT-4o, o1, o3, etc. ( [ See latest models ] ( https://platform.openai.com/docs/models ) )
7777- ** Features** : Function calling, streaming, embeddings, reasoning models
7878- ** Best for** : General-purpose tasks, reasoning, code generation
7979
@@ -82,52 +82,52 @@ from spoon_ai.chat import ChatBot
8282
8383# OpenAI configuration with default model
8484llm = ChatBot(
85- model_name = " gpt-5.1-chat-latest" , # Framework default
85+ model_name = " gpt-5.1-chat-latest" , # Check docs for latest model names
8686 llm_provider = " openai" ,
8787 temperature = 0.7
8888)
8989```
9090
9191### Anthropic (Claude)
9292
93- - ** Models** : Claude-Sonnet-4-20250514 (default) , Claude-3 .5 Sonnet, Claude-3.5 Haiku
93+ - ** Models** : Claude 4.5 Opus , Claude 4 .5 Sonnet, etc. ( [ See latest models ] ( https://docs.anthropic.com/en/docs/about-claude/models ) )
9494- ** Features** : Large context windows, prompt caching, safety features
9595- ** Best for** : Long documents, analysis, safety-critical applications
9696
9797``` python
9898# Anthropic configuration with default model
9999llm = ChatBot(
100- model_name = " claude-sonnet-4-20250514" , # Framework default
100+ model_name = " claude-sonnet-4-20250514" , # Check docs for latest model names
101101 llm_provider = " anthropic" ,
102102 temperature = 0.1
103103)
104104```
105105
106106### Google (Gemini)
107107
108- - ** Models** : Gemini-2.5-Pro (default) , Gemini-2.0- Flash, Gemini-1.5-Pro
108+ - ** Models** : Gemini 3 Pro , Gemini 2.5 Flash, etc. ( [ See latest models ] ( https://ai.google.dev/gemini-api/docs/models ) )
109109- ** Features** : Multimodal capabilities, fast inference, large context
110110- ** Best for** : Multimodal tasks, cost-effective solutions, long context
111111
112112``` python
113113# Google configuration with default model
114114llm = ChatBot(
115- model_name = " gemini-2.5 -pro" , # Framework default
115+ model_name = " gemini-3 -pro" , # Check docs for latest model names
116116 llm_provider = " gemini" ,
117117 temperature = 0.1
118118)
119119```
120120
121121### DeepSeek
122122
123- - ** Models** : DeepSeek-Reasoner (default) , DeepSeek-V3, DeepSeek-Chat
123+ - ** Models** : DeepSeek-V3 , DeepSeek-Reasoner, etc. ( [ See latest models ] ( https://platform.deepseek.com/api-docs/ ) )
124124- ** Features** : Advanced reasoning, code-specialized models, cost-effective
125125- ** Best for** : Complex reasoning, code generation, technical tasks
126126
127127``` python
128128# DeepSeek configuration with default model
129129llm = ChatBot(
130- model_name = " deepseek-reasoner" , # Framework default
130+ model_name = " deepseek-reasoner" , # Check docs for latest model names
131131 llm_provider = " deepseek" ,
132132 temperature = 0.2
133133)
@@ -156,13 +156,14 @@ The LLM Manager provides provider-agnostic access with automatic fallback:
156156from spoon_ai.llm.manager import LLMManager
157157
158158# Initialize with multiple providers
159+ # Note: Check each provider's docs for latest model names
159160llm_manager = LLMManager(
160161 primary_provider = " openai" ,
161162 fallback_providers = [" anthropic" , " gemini" ],
162163 model_preferences = {
163164 " openai" : " gpt-5.1-chat-latest" ,
164165 " anthropic" : " claude-sonnet-4-20250514" ,
165- " gemini" : " gemini-2.5 -pro" ,
166+ " gemini" : " gemini-3 -pro" ,
166167 " deepseek" : " deepseek-reasoner"
167168 }
168169)
@@ -205,17 +206,18 @@ DEFAULT_TEMPERATURE=0.3
205206
206207## Advanced Features
207208
208- ### Prompt Caching (Anthropic)
209+ ### Response Caching
209210
210211``` python
211- from spoon_ai.llm.cache import PromptCache
212+ from spoon_ai.llm.cache import LLMResponseCache
212213
213- # Enable prompt caching for repeated system prompts
214+ # Enable response caching to avoid redundant API calls
215+ cache = LLMResponseCache()
214216llm = ChatBot(
215217 model_name = " claude-sonnet-4-20250514" ,
216218 llm_provider = " anthropic" ,
217- enable_caching = True
218219)
220+ # Cache is automatically managed by the framework
219221```
220222
221223### Streaming Responses
@@ -253,35 +255,39 @@ response = await llm.generate(
253255
254256### Task-Based Recommendations
255257
256- #### Code Generation
258+ > Choose the right model for your use case. Check official documentation for the latest model capabilities.
257259
258- - Primary: DeepSeek-Reasoner, gpt-5.1-chat-latest
259- - Alternative: Claude-Sonnet-4
260+ #### Code Generation
261+ - ** Recommended** : DeepSeek (cost-effective), OpenAI GPT models (fast iteration)
262+ - ** Alternative** : Anthropic Claude (strong reasoning)
260263
261264#### Analysis & Reasoning
262-
263- - Primary: DeepSeek-Reasoner, gpt-5.1-chat-latest, Claude-Sonnet-4
264- - Alternative: Gemini-2.5-Pro
265+ - ** Recommended** : OpenAI o-series models, DeepSeek Reasoner, Claude
266+ - ** Alternative** : Gemini Pro
265267
266268#### Cost-Sensitive Tasks
267-
268- - Primary: DeepSeek-Reasoner, Gemini-2.5-Pro
269- - Alternative: gpt-5.1-chat-latest
269+ - ** Recommended** : DeepSeek models, Gemini models
270+ - ** Alternative** : OpenRouter for provider comparison
270271
271272#### Long Context Tasks
272-
273- - Primary: Gemini-2.5-Pro (250K tokens), Claude-Sonnet-4 (200K tokens)
274- - Alternative: DeepSeek-Reasoner (65K tokens)
273+ - ** Recommended** : Gemini (largest context), Claude (large context)
274+ - ** Alternative** : Check each provider's latest context window limits
275275
276276### Performance Comparison
277277
278- | Provider | Speed | Cost | Context | Quality |
279- | ------------------------- | --------- | -------- | ------- | -------------------- |
280- | OpenAI gpt-5.1-chat-latest | Fast | Medium | 128K | Excellent |
281- | Anthropic Claude-Sonnet-4 | Medium | Medium | 200K | Excellent |
282- | Google Gemini-2.5-Pro | Very Fast | Low | 250K | Very Good |
283- | DeepSeek-Reasoner | Fast | Very Low | 65K | Superior (Reasoning) |
284- | OpenAI o1-preview | Slow | High | 128K | Superior (Reasoning) |
278+ > ** Note** : Model capabilities and pricing change frequently. Always check the official documentation for the latest information:
279+ > - [ OpenAI Models] ( https://platform.openai.com/docs/models )
280+ > - [ Anthropic Models] ( https://docs.anthropic.com/en/docs/about-claude/models )
281+ > - [ Google Gemini Models] ( https://ai.google.dev/gemini-api/docs/models )
282+ > - [ DeepSeek Models] ( https://platform.deepseek.com/api-docs/ )
283+
284+ | Provider | Model Example | Context Window | Best For |
285+ | ----------| ---------------| ----------------| ----------|
286+ | ** OpenAI** | gpt-5.1-chat-latest | Check docs | General purpose, tool calling |
287+ | ** Anthropic** | claude-sonnet-4-20250514 | Check docs | Analysis, long documents |
288+ | ** Google** | gemini-2.5-pro | Check docs | Multimodal, cost-effective |
289+ | ** DeepSeek** | deepseek-reasoner | Check docs | Reasoning, code generation |
290+ | ** OpenRouter** | Various | Varies | Access multiple providers |
285291
286292## Error Handling & Fallbacks
287293
@@ -341,29 +347,34 @@ response = await llm_manager.generate("Complex reasoning task")
341347### Usage Tracking
342348
343349``` python
344- from spoon_ai.llm.monitoring import LLMMonitor
350+ from spoon_ai.llm.monitoring import MetricsCollector, get_metrics_collector
345351
346- # Track usage and costs automatically
347- monitor = LLMMonitor()
348- response = await llm.generate(" Hello" , monitor = monitor)
352+ # Get the global metrics collector
353+ collector = get_metrics_collector()
349354
350- # Get metrics
351- metrics = monitor.get_metrics()
352- print (f " Tokens used: { metrics.total_tokens} " )
353- print (f " Cost: $ { metrics.total_cost} " )
355+ # Metrics are automatically tracked during LLM calls
356+ response = await llm.ask([{" role" : " user" , " content" : " Hello" }])
357+
358+ # Get collected stats per provider
359+ stats = collector.get_stats(" openai" )
360+ print (f " Total requests: { stats.total_requests} " )
361+ print (f " Average latency: { stats.average_latency:.2f } s " )
354362```
355363
356364### Performance Monitoring
357365
358366``` python
359- # Monitor response times and success rates
360- monitor.log_request(
361- provider = " openai" ,
362- model = " gpt-4" ,
363- tokens = 150 ,
364- latency = 1.2 ,
365- success = True
366- )
367+ # The MetricsCollector automatically tracks:
368+ # - Request counts and success/failure rates
369+ # - Token usage (input/output)
370+ # - Latency statistics (average, min, max)
371+ # - Error tracking per provider
372+
373+ # Access provider-specific stats
374+ for provider in [" openai" , " anthropic" , " gemini" ]:
375+ stats = collector.get_stats(provider)
376+ if stats.total_requests > 0 :
377+ print (f " { provider} : { stats.total_requests} requests, { stats.error_count} errors " )
367378```
368379
369380## Best Practices
0 commit comments