Skip to content

Commit 58aef31

Browse files
committed
🤖 feat: restrict Grok to binary on/off reasoning
Grok models only support reasoning on/off, not gradual levels. Update UI to show binary toggle for Grok instead of full slider. Changes: - Add simple checks for Grok/GPT-5/Gemini special cases - Add models.json lookup for models without reasoning support - Preserve reasoning intent when switching between models _Generated with `mux`_
1 parent 17e4caf commit 58aef31

File tree

2 files changed

+78
-25
lines changed

2 files changed

+78
-25
lines changed

src/browser/utils/thinking/policy.test.ts

Lines changed: 28 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -10,11 +10,8 @@ describe("getThinkingPolicyForModel", () => {
1010
expect(getThinkingPolicyForModel("openai:gpt-5-pro-2025-10-06")).toEqual(["high"]);
1111
});
1212

13-
test("returns single HIGH for gpt-5-pro with whitespace after colon", () => {
14-
expect(getThinkingPolicyForModel("openai: gpt-5-pro")).toEqual(["high"]);
15-
});
16-
1713
test("returns all levels for gpt-5-pro-mini (not a fixed policy)", () => {
14+
// gpt-5-pro-mini shouldn't match the gpt-5-pro config
1815
expect(getThinkingPolicyForModel("openai:gpt-5-pro-mini")).toEqual([
1916
"off",
2017
"low",
@@ -48,6 +45,25 @@ describe("getThinkingPolicyForModel", () => {
4845
]);
4946
expect(getThinkingPolicyForModel("google:gemini-3-pro-preview")).toEqual(["low", "high"]);
5047
});
48+
49+
test("returns binary on/off for xAI Grok models", () => {
50+
expect(getThinkingPolicyForModel("xai:grok-4-1-fast")).toEqual(["off", "high"]);
51+
expect(getThinkingPolicyForModel("xai:grok-2-latest")).toEqual(["off", "high"]);
52+
expect(getThinkingPolicyForModel("xai:grok-beta")).toEqual(["off", "high"]);
53+
});
54+
55+
test("grok models with version suffixes also get binary policy", () => {
56+
expect(getThinkingPolicyForModel("xai:grok-4-1-fast-v2")).toEqual(["off", "high"]);
57+
});
58+
59+
test("grok-code does not match grok- prefix, gets default policy", () => {
60+
expect(getThinkingPolicyForModel("xai:grok-code-fast-1")).toEqual([
61+
"off",
62+
"low",
63+
"medium",
64+
"high",
65+
]);
66+
});
5167
});
5268

5369
describe("enforceThinkingPolicy", () => {
@@ -72,10 +88,15 @@ describe("enforceThinkingPolicy", () => {
7288
expect(enforceThinkingPolicy("anthropic:claude-opus-4", "high")).toBe("high");
7389
});
7490

75-
test("falls back to medium when requested level not allowed", () => {
76-
// Simulating behavior with gpt-5-pro (only allows "high")
77-
// When requesting "low", falls back to first allowed level which is "high"
91+
test("maps non-off levels to highest available when requested level not allowed", () => {
92+
// gpt-5-pro only allows "high"
7893
expect(enforceThinkingPolicy("openai:gpt-5-pro", "low")).toBe("high");
94+
expect(enforceThinkingPolicy("openai:gpt-5-pro", "medium")).toBe("high");
95+
96+
// Grok only allows "off" and "high" - preserve reasoning intent
97+
expect(enforceThinkingPolicy("xai:grok-4-1-fast", "low")).toBe("high");
98+
expect(enforceThinkingPolicy("xai:grok-4-1-fast", "medium")).toBe("high");
99+
expect(enforceThinkingPolicy("xai:grok-4-1-fast", "off")).toBe("off");
79100
});
80101
});
81102
});

src/browser/utils/thinking/policy.ts

Lines changed: 50 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -13,46 +13,72 @@
1313
*/
1414

1515
import type { ThinkingLevel } from "@/common/types/thinking";
16+
import modelsData from "@/common/utils/tokens/models.json";
1617

1718
/**
1819
* Thinking policy is simply the set of allowed thinking levels for a model.
1920
* Pure subset design - no wrapper object, no discriminated union.
2021
*/
2122
export type ThinkingPolicy = readonly ThinkingLevel[];
2223

24+
/**
25+
* Helper to look up model metadata from models.json
26+
*/
27+
function getModelMetadata(modelString: string): Record<string, unknown> | null {
28+
const colonIndex = modelString.indexOf(":");
29+
const provider = colonIndex !== -1 ? modelString.slice(0, colonIndex) : "";
30+
const modelName = colonIndex !== -1 ? modelString.slice(colonIndex + 1) : modelString;
31+
32+
const lookupKeys: string[] = [modelName];
33+
if (provider) {
34+
lookupKeys.push(`${provider}/${modelName}`);
35+
}
36+
37+
for (const key of lookupKeys) {
38+
const data = (modelsData as Record<string, Record<string, unknown>>)[key];
39+
if (data) {
40+
return data;
41+
}
42+
}
43+
44+
return null;
45+
}
46+
2347
/**
2448
* Returns the thinking policy for a given model.
25-
*
26-
* Rules:
27-
* - openai:gpt-5-pro → ["high"] (only supported level)
28-
* - default → ["off", "low", "medium", "high"] (all levels selectable)
29-
*
30-
* Tolerates version suffixes (e.g., gpt-5-pro-2025-10-06).
31-
* Does NOT match gpt-5-pro-mini (uses negative lookahead).
3249
*/
3350
export function getThinkingPolicyForModel(modelString: string): ThinkingPolicy {
34-
// Match "openai:" followed by optional whitespace and "gpt-5-pro"
35-
// Allow version suffixes like "-2025-10-06" but NOT "-mini" or other text suffixes
36-
if (/^openai:\s*gpt-5-pro(?!-[a-z])/.test(modelString)) {
51+
// GPT-5 Pro: always high (but not gpt-5-pro-mini)
52+
if (modelString.startsWith("openai:gpt-5-pro") && !modelString.includes("-mini")) {
3753
return ["high"];
3854
}
3955

40-
// Gemini 3 Pro only supports "low" and "high" reasoning levels
56+
// Gemini 3: limited levels
4157
if (modelString.includes("gemini-3")) {
4258
return ["low", "high"];
4359
}
4460

45-
// Default policy: all levels selectable
61+
// Grok: binary on/off (but not grok-code)
62+
if (modelString.startsWith("xai:grok-") && !modelString.includes("grok-code")) {
63+
return ["off", "high"];
64+
}
65+
66+
// Check models.json for no reasoning support
67+
const metadata = getModelMetadata(modelString);
68+
if (metadata?.supports_reasoning === false) {
69+
return ["off"];
70+
}
71+
72+
// Default: all levels
4673
return ["off", "low", "medium", "high"];
4774
}
4875

4976
/**
5077
* Enforce thinking policy by clamping requested level to allowed set.
5178
*
52-
* Fallback strategy:
53-
* 1. If requested level is allowed, use it
54-
* 2. If "medium" is allowed, use it (reasonable default)
55-
* 3. Otherwise use first allowed level
79+
* If the requested level isn't allowed:
80+
* - If user wanted reasoning (non-"off"), pick the highest available non-"off" level
81+
* - Otherwise return the first allowed level
5682
*/
5783
export function enforceThinkingPolicy(
5884
modelString: string,
@@ -64,6 +90,12 @@ export function enforceThinkingPolicy(
6490
return requested;
6591
}
6692

67-
// Fallback: prefer "medium" if allowed, else use first allowed level
68-
return allowed.includes("medium") ? "medium" : allowed[0];
93+
// If user wanted reasoning, keep it on with the best available level
94+
if (requested !== "off") {
95+
if (allowed.includes("high")) return "high";
96+
if (allowed.includes("medium")) return "medium";
97+
if (allowed.includes("low")) return "low";
98+
}
99+
100+
return allowed[0];
69101
}

0 commit comments

Comments
 (0)