-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathlitellm_config.yaml
More file actions
39 lines (32 loc) · 1.07 KB
/
litellm_config.yaml
File metadata and controls
39 lines (32 loc) · 1.07 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
model_list:
# Tier 1: Frontier API
- model_name: executive-synthesis
litellm_params:
model: gemini/gemini-3.1-pro
api_key: os.environ/GEMINI_API_KEY
# Tier 2: The 34B Workhorse (Dummy Endpoint for now)
- model_name: local-heavy
litellm_params:
model: ollama/command-r
api_base: http://host.docker.internal:11434
# Tier 3: The 8B Swarm (Dummy Endpoint for now)
- model_name: local-swarm
litellm_params:
model: ollama/llama3
api_base: http://host.docker.internal:11434
litellm_settings:
# Telemetry: Drop a local log file of every single API call and its token cost
success_callback: ["local_debugging"]
failure_callback: ["local_debugging"]
router_settings:
routing_strategy: simple-shuffle
# THE KILL SWITCHES & THROTTLES
# 1. Requests Per Minute (RPM) Limit
# Forces the intern to "breathe".
rpm: 15
# 2. Tokens Per Minute (TPM) Limit
# Prevents massive context-window blowouts.
tpm: 30000
# 3. Timeout limits
# If the local engine hangs for more than 60 seconds, kill the request.
timeout: 60