[owl] Update preset model (#899)

noobHappylife · noobHappylife · commit 605a3452b0ac · 2026-02-23T09:54:00.000Z
added Opus, Sonnet 4.6
added Gemini 3.1 pro
added Kimi K2.5, GLM 5, MiniMax2.5, Qwen3.5
removed Gemini 2.5 pro/flash
added the route check for claude-*-4-6 and gemini 3.1 pro
diff --git a/services/api/src/owl/configs/preset_models.json b/services/api/src/owl/configs/preset_models.json
@@ -109,6 +109,28 @@
       }
     ]
   },
+  {
+    "meta": {
+      "icon": "anthropic"
+    },
+    "id": "anthropic/claude-opus-4-6",
+    "name": "Anthropic Claude Opus 4.6",
+    "type": "llm",
+    "context_length": 200000,
+    "max_output_tokens": 64000,
+    "capabilities": ["chat", "image", "reasoning", "tool"],
+    "languages": ["en", "mul"],
+    "llm_input_cost_per_mtoken": 5.0,
+    "llm_output_cost_per_mtoken": 25.0,
+    "deployments": [
+      {
+        "name": "Anthropic Claude Opus 4.6 Deployment",
+        "provider": "anthropic",
+        "routing_id": "anthropic/claude-opus-4-6",
+        "api_base": ""
+      }
+    ]
+  },
   {
     "meta": {
       "icon": "anthropic"
@@ -131,6 +153,28 @@
       }
     ]
   },
+  {
+    "meta": {
+      "icon": "anthropic"
+    },
+    "id": "anthropic/claude-sonnet-4-6",
+    "name": "Anthropic Claude Sonnet 4.6",
+    "type": "llm",
+    "context_length": 200000,
+    "max_output_tokens": 64000,
+    "capabilities": ["chat", "image", "reasoning", "tool"],
+    "languages": ["en", "mul"],
+    "llm_input_cost_per_mtoken": 3.0,
+    "llm_output_cost_per_mtoken": 15.0,
+    "deployments": [
+      {
+        "name": "Anthropic Claude Sonnet 4.6 Deployment",
+        "provider": "anthropic",
+        "routing_id": "anthropic/claude-sonnet-4-6",
+        "api_base": ""
+      }
+    ]
+  },
   {
     "meta": {
       "icon": "anthropic"
@@ -179,8 +223,8 @@
     "meta": {
       "icon": "google"
     },
-    "id": "google/gemini-3-pro-preview",
-    "name": "Google Gemini 3 Pro Preview",
+    "id": "google/gemini-3.1-pro-preview",
+    "name": "Google Gemini 3.1 Pro Preview",
     "type": "llm",
     "context_length": 1048576,
     "max_output_tokens": 65536,
@@ -190,9 +234,9 @@
     "llm_output_cost_per_mtoken": 18.0,
     "deployments": [
       {
-        "name": "Google Gemini 3 Pro Preview Deployment",
+        "name": "Google Gemini 3.1 Pro Preview Deployment",
         "provider": "gemini",
-        "routing_id": "gemini/gemini-3-pro-preview",
+        "routing_id": "gemini/gemini-3.1-pro-preview",
         "api_base": ""
       }
     ]
@@ -201,20 +245,20 @@
     "meta": {
       "icon": "google"
     },
-    "id": "google/gemini-2.5-pro",
-    "name": "Google Gemini 2.5 Pro",
+    "id": "google/gemini-3-pro-preview",
+    "name": "Google Gemini 3 Pro Preview",
     "type": "llm",
     "context_length": 1048576,
     "max_output_tokens": 65536,
     "capabilities": ["chat", "image", "reasoning", "tool"],
     "languages": ["en", "mul"],
-    "llm_input_cost_per_mtoken": 2.5,
-    "llm_output_cost_per_mtoken": 15.0,
+    "llm_input_cost_per_mtoken": 4.0,
+    "llm_output_cost_per_mtoken": 18.0,
     "deployments": [
       {
-        "name": "Google Gemini 2.5 Pro Deployment",
+        "name": "Google Gemini 3 Pro Preview Deployment",
         "provider": "gemini",
-        "routing_id": "gemini/gemini-2.5-pro",
+        "routing_id": "gemini/gemini-3-pro-preview",
         "api_base": ""
       }
     ]
@@ -241,28 +285,6 @@
       }
     ]
   },
-  {
-    "meta": {
-      "icon": "google"
-    },
-    "id": "google/gemini-2.5-flash-preview-09-2025",
-    "name": "Google Gemini 2.5 Flash Preview",
-    "type": "llm",
-    "context_length": 1048576,
-    "max_output_tokens": 65536,
-    "capabilities": ["chat", "image", "reasoning", "tool"],
-    "languages": ["en", "mul"],
-    "llm_input_cost_per_mtoken": 0.3,
-    "llm_output_cost_per_mtoken": 2.5,
-    "deployments": [
-      {
-        "name": "Google Gemini 2.5 Flash Preview Deployment",
-        "provider": "gemini",
-        "routing_id": "gemini/gemini-2.5-flash-preview-09-2025",
-        "api_base": ""
-      }
-    ]
-  },
   {
     "meta": {
       "icon": "meta"
@@ -383,6 +405,30 @@
       }
     ]
   },
+  {
+    "meta": {
+      "icon": "qwen"
+    },
+    "id": "Qwen/Qwen3.5-397B-A17B",
+    "name": "Qwen 3.5 (397B-A17B)",
+    "type": "llm",
+    "context_length": 256000,
+    "capabilities": ["chat", "image", "reasoning", "tool"],
+    "languages": ["en", "mul"],
+    "llm_input_cost_per_mtoken": 0.7,
+    "llm_output_cost_per_mtoken": 3.8,
+    "deployments": [
+      {
+        "name": "Qwen 3.5 (397B-A17B) Deployment",
+        "huggingface_id": "Qwen/Qwen3.5-397B-A17B-FP8",
+        "cpu_count": "8",
+        "memory_gb": "16",
+        "required_vram": "400",
+        "num_replicas": 1,
+        "provider": "vllm"
+      }
+    ]
+  },
   {
     "meta": {
       "icon": "qwen"
@@ -527,6 +573,78 @@
       }
     ]
   },
+  {
+    "meta": {
+      "icon": "minimax"
+    },
+    "id": "MiniMaxAI/MiniMax-M2.5",
+    "name": "MiniMax M2.5 (230B-A10B)",
+    "type": "llm",
+    "context_length": 196608,
+    "capabilities": ["chat", "reasoning", "tool"],
+    "languages": ["en", "mul"],
+    "llm_input_cost_per_mtoken": 0.4,
+    "llm_output_cost_per_mtoken": 1.4,
+    "deployments": [
+      {
+        "name": "MiniMax M2.5 (230B-A10B) Deployment",
+        "huggingface_id": "MiniMaxAI/MiniMax-M2.5",
+        "cpu_count": "4",
+        "memory_gb": "16",
+        "required_vram": "230",
+        "num_replicas": 1,
+        "provider": "vllm"
+      }
+    ]
+  },
+  {
+    "meta": {
+      "icon": "z_ai"
+    },
+    "id": "zai-org/GLM-5",
+    "name": "GLM 5 (744B-A40B)",
+    "type": "llm",
+    "context_length": 202752,
+    "capabilities": ["chat", "reasoning", "tool"],
+    "languages": ["en", "mul"],
+    "llm_input_cost_per_mtoken": 1.2,
+    "llm_output_cost_per_mtoken": 3.4,
+    "deployments": [
+      {
+        "name": "GLM 5 (744B-A40B) Deployment",
+        "huggingface_id": "zai-org/GLM-5",
+        "cpu_count": "4",
+        "memory_gb": "16",
+        "required_vram": "744",
+        "num_replicas": 1,
+        "provider": "vllm"
+      }
+    ]
+  },
+  {
+    "meta": {
+      "icon": "kimi"
+    },
+    "id": "moonshotai/Kimi-K2.5",
+    "name": "Kimi K2.5 (1T-A32B)",
+    "type": "llm",
+    "context_length": 256000,
+    "capabilities": ["chat", "image", "reasoning", "tool"],
+    "languages": ["en", "mul"],
+    "llm_input_cost_per_mtoken": 1.2,
+    "llm_output_cost_per_mtoken": 3.4,
+    "deployments": [
+      {
+        "name": "Kimi K2.5 (1T-A32B) Deployment",
+        "huggingface_id": "moonshotai/Kimi-K2.5",
+        "cpu_count": "4",
+        "memory_gb": "16",
+        "required_vram": "512",
+        "num_replicas": 1,
+        "provider": "vllm"
+      }
+    ]
+  },
   {
     "meta": {
       "icon": "openai"
diff --git a/services/api/src/owl/utils/lm.py b/services/api/src/owl/utils/lm.py
@@ -707,8 +707,8 @@ def _prepare_hyperparams(
 
         # Anthropic specific
         if ctx.inference_provider == CloudProvider.ANTHROPIC:
-            # 4.1 and 4.5 models cannot specify both `temperature` and `top_p`
-            if "-4-1" in ctx.routing_id or "-4-5" in ctx.routing_id:
+            # 4.x models cannot specify both `temperature` and `top_p`
+            if "-4-" in ctx.routing_id:
                 t = hyperparams.get("temperature", None)
                 p = hyperparams.get("top_p", None)
                 if t is not None and p is not None:
@@ -736,8 +736,8 @@ def _prepare_hyperparams(
                 hyperparams["reasoning_effort"] = "disable"
                 return
             elif ctx.inference_provider == CloudProvider.GEMINI:
-                # 3-Pro cannot disable thinking
-                if "3-pro" in ctx.routing_id:
+                # 3/3.1-Pro cannot disable thinking
+                if "3-pro" in ctx.routing_id or "3.1-pro" in ctx.routing_id:
                     hyperparams["reasoning_effort"] = "low"
                 # 2.5 Pro cannot disable thinking
                 elif "2.5-pro" in ctx.routing_id: