Skip to content

Commit 605a345

Browse files
committed
[owl] Update preset model (#899)
added Opus, Sonnet 4.6 added Gemini 3.1 pro added Kimi K2.5, GLM 5, MiniMax2.5, Qwen3.5 removed Gemini 2.5 pro/flash added the route check for claude-*-4-6 and gemini 3.1 pro
1 parent 2e070a2 commit 605a345

File tree

2 files changed

+154
-36
lines changed

2 files changed

+154
-36
lines changed

services/api/src/owl/configs/preset_models.json

Lines changed: 150 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -109,6 +109,28 @@
109109
}
110110
]
111111
},
112+
{
113+
"meta": {
114+
"icon": "anthropic"
115+
},
116+
"id": "anthropic/claude-opus-4-6",
117+
"name": "Anthropic Claude Opus 4.6",
118+
"type": "llm",
119+
"context_length": 200000,
120+
"max_output_tokens": 64000,
121+
"capabilities": ["chat", "image", "reasoning", "tool"],
122+
"languages": ["en", "mul"],
123+
"llm_input_cost_per_mtoken": 5.0,
124+
"llm_output_cost_per_mtoken": 25.0,
125+
"deployments": [
126+
{
127+
"name": "Anthropic Claude Opus 4.6 Deployment",
128+
"provider": "anthropic",
129+
"routing_id": "anthropic/claude-opus-4-6",
130+
"api_base": ""
131+
}
132+
]
133+
},
112134
{
113135
"meta": {
114136
"icon": "anthropic"
@@ -131,6 +153,28 @@
131153
}
132154
]
133155
},
156+
{
157+
"meta": {
158+
"icon": "anthropic"
159+
},
160+
"id": "anthropic/claude-sonnet-4-6",
161+
"name": "Anthropic Claude Sonnet 4.6",
162+
"type": "llm",
163+
"context_length": 200000,
164+
"max_output_tokens": 64000,
165+
"capabilities": ["chat", "image", "reasoning", "tool"],
166+
"languages": ["en", "mul"],
167+
"llm_input_cost_per_mtoken": 3.0,
168+
"llm_output_cost_per_mtoken": 15.0,
169+
"deployments": [
170+
{
171+
"name": "Anthropic Claude Sonnet 4.6 Deployment",
172+
"provider": "anthropic",
173+
"routing_id": "anthropic/claude-sonnet-4-6",
174+
"api_base": ""
175+
}
176+
]
177+
},
134178
{
135179
"meta": {
136180
"icon": "anthropic"
@@ -179,8 +223,8 @@
179223
"meta": {
180224
"icon": "google"
181225
},
182-
"id": "google/gemini-3-pro-preview",
183-
"name": "Google Gemini 3 Pro Preview",
226+
"id": "google/gemini-3.1-pro-preview",
227+
"name": "Google Gemini 3.1 Pro Preview",
184228
"type": "llm",
185229
"context_length": 1048576,
186230
"max_output_tokens": 65536,
@@ -190,9 +234,9 @@
190234
"llm_output_cost_per_mtoken": 18.0,
191235
"deployments": [
192236
{
193-
"name": "Google Gemini 3 Pro Preview Deployment",
237+
"name": "Google Gemini 3.1 Pro Preview Deployment",
194238
"provider": "gemini",
195-
"routing_id": "gemini/gemini-3-pro-preview",
239+
"routing_id": "gemini/gemini-3.1-pro-preview",
196240
"api_base": ""
197241
}
198242
]
@@ -201,20 +245,20 @@
201245
"meta": {
202246
"icon": "google"
203247
},
204-
"id": "google/gemini-2.5-pro",
205-
"name": "Google Gemini 2.5 Pro",
248+
"id": "google/gemini-3-pro-preview",
249+
"name": "Google Gemini 3 Pro Preview",
206250
"type": "llm",
207251
"context_length": 1048576,
208252
"max_output_tokens": 65536,
209253
"capabilities": ["chat", "image", "reasoning", "tool"],
210254
"languages": ["en", "mul"],
211-
"llm_input_cost_per_mtoken": 2.5,
212-
"llm_output_cost_per_mtoken": 15.0,
255+
"llm_input_cost_per_mtoken": 4.0,
256+
"llm_output_cost_per_mtoken": 18.0,
213257
"deployments": [
214258
{
215-
"name": "Google Gemini 2.5 Pro Deployment",
259+
"name": "Google Gemini 3 Pro Preview Deployment",
216260
"provider": "gemini",
217-
"routing_id": "gemini/gemini-2.5-pro",
261+
"routing_id": "gemini/gemini-3-pro-preview",
218262
"api_base": ""
219263
}
220264
]
@@ -241,28 +285,6 @@
241285
}
242286
]
243287
},
244-
{
245-
"meta": {
246-
"icon": "google"
247-
},
248-
"id": "google/gemini-2.5-flash-preview-09-2025",
249-
"name": "Google Gemini 2.5 Flash Preview",
250-
"type": "llm",
251-
"context_length": 1048576,
252-
"max_output_tokens": 65536,
253-
"capabilities": ["chat", "image", "reasoning", "tool"],
254-
"languages": ["en", "mul"],
255-
"llm_input_cost_per_mtoken": 0.3,
256-
"llm_output_cost_per_mtoken": 2.5,
257-
"deployments": [
258-
{
259-
"name": "Google Gemini 2.5 Flash Preview Deployment",
260-
"provider": "gemini",
261-
"routing_id": "gemini/gemini-2.5-flash-preview-09-2025",
262-
"api_base": ""
263-
}
264-
]
265-
},
266288
{
267289
"meta": {
268290
"icon": "meta"
@@ -383,6 +405,30 @@
383405
}
384406
]
385407
},
408+
{
409+
"meta": {
410+
"icon": "qwen"
411+
},
412+
"id": "Qwen/Qwen3.5-397B-A17B",
413+
"name": "Qwen 3.5 (397B-A17B)",
414+
"type": "llm",
415+
"context_length": 256000,
416+
"capabilities": ["chat", "image", "reasoning", "tool"],
417+
"languages": ["en", "mul"],
418+
"llm_input_cost_per_mtoken": 0.7,
419+
"llm_output_cost_per_mtoken": 3.8,
420+
"deployments": [
421+
{
422+
"name": "Qwen 3.5 (397B-A17B) Deployment",
423+
"huggingface_id": "Qwen/Qwen3.5-397B-A17B-FP8",
424+
"cpu_count": "8",
425+
"memory_gb": "16",
426+
"required_vram": "400",
427+
"num_replicas": 1,
428+
"provider": "vllm"
429+
}
430+
]
431+
},
386432
{
387433
"meta": {
388434
"icon": "qwen"
@@ -527,6 +573,78 @@
527573
}
528574
]
529575
},
576+
{
577+
"meta": {
578+
"icon": "minimax"
579+
},
580+
"id": "MiniMaxAI/MiniMax-M2.5",
581+
"name": "MiniMax M2.5 (230B-A10B)",
582+
"type": "llm",
583+
"context_length": 196608,
584+
"capabilities": ["chat", "reasoning", "tool"],
585+
"languages": ["en", "mul"],
586+
"llm_input_cost_per_mtoken": 0.4,
587+
"llm_output_cost_per_mtoken": 1.4,
588+
"deployments": [
589+
{
590+
"name": "MiniMax M2.5 (230B-A10B) Deployment",
591+
"huggingface_id": "MiniMaxAI/MiniMax-M2.5",
592+
"cpu_count": "4",
593+
"memory_gb": "16",
594+
"required_vram": "230",
595+
"num_replicas": 1,
596+
"provider": "vllm"
597+
}
598+
]
599+
},
600+
{
601+
"meta": {
602+
"icon": "z_ai"
603+
},
604+
"id": "zai-org/GLM-5",
605+
"name": "GLM 5 (744B-A40B)",
606+
"type": "llm",
607+
"context_length": 202752,
608+
"capabilities": ["chat", "reasoning", "tool"],
609+
"languages": ["en", "mul"],
610+
"llm_input_cost_per_mtoken": 1.2,
611+
"llm_output_cost_per_mtoken": 3.4,
612+
"deployments": [
613+
{
614+
"name": "GLM 5 (744B-A40B) Deployment",
615+
"huggingface_id": "zai-org/GLM-5",
616+
"cpu_count": "4",
617+
"memory_gb": "16",
618+
"required_vram": "744",
619+
"num_replicas": 1,
620+
"provider": "vllm"
621+
}
622+
]
623+
},
624+
{
625+
"meta": {
626+
"icon": "kimi"
627+
},
628+
"id": "moonshotai/Kimi-K2.5",
629+
"name": "Kimi K2.5 (1T-A32B)",
630+
"type": "llm",
631+
"context_length": 256000,
632+
"capabilities": ["chat", "image", "reasoning", "tool"],
633+
"languages": ["en", "mul"],
634+
"llm_input_cost_per_mtoken": 1.2,
635+
"llm_output_cost_per_mtoken": 3.4,
636+
"deployments": [
637+
{
638+
"name": "Kimi K2.5 (1T-A32B) Deployment",
639+
"huggingface_id": "moonshotai/Kimi-K2.5",
640+
"cpu_count": "4",
641+
"memory_gb": "16",
642+
"required_vram": "512",
643+
"num_replicas": 1,
644+
"provider": "vllm"
645+
}
646+
]
647+
},
530648
{
531649
"meta": {
532650
"icon": "openai"

services/api/src/owl/utils/lm.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -707,8 +707,8 @@ def _prepare_hyperparams(
707707

708708
# Anthropic specific
709709
if ctx.inference_provider == CloudProvider.ANTHROPIC:
710-
# 4.1 and 4.5 models cannot specify both `temperature` and `top_p`
711-
if "-4-1" in ctx.routing_id or "-4-5" in ctx.routing_id:
710+
# 4.x models cannot specify both `temperature` and `top_p`
711+
if "-4-" in ctx.routing_id:
712712
t = hyperparams.get("temperature", None)
713713
p = hyperparams.get("top_p", None)
714714
if t is not None and p is not None:
@@ -736,8 +736,8 @@ def _prepare_hyperparams(
736736
hyperparams["reasoning_effort"] = "disable"
737737
return
738738
elif ctx.inference_provider == CloudProvider.GEMINI:
739-
# 3-Pro cannot disable thinking
740-
if "3-pro" in ctx.routing_id:
739+
# 3/3.1-Pro cannot disable thinking
740+
if "3-pro" in ctx.routing_id or "3.1-pro" in ctx.routing_id:
741741
hyperparams["reasoning_effort"] = "low"
742742
# 2.5 Pro cannot disable thinking
743743
elif "2.5-pro" in ctx.routing_id:

0 commit comments

Comments
 (0)