Skip to content

Commit c43904d

Browse files
committed
feat(reasoning): add xhigh/max effort levels and Anthropic effort support
- Expand ReasoningEffort enum with None/Default/Minimal/XHigh/Max variants - Add Anthropic "effort" reasoning style (adaptive thinking + output_config) - Update UI: temperature disabled with reasoning, mid-chat warnings, new buttons - Fix tool call args normalization for empty/null values from LLMs - Improve reasoning display: paragraph breaks for bold titles, better caps lookup - Add n_ctx to SamplingParameters, update test timeouts and fixtures Fixes reasoning config inconsistencies across providers (OpenAI/Mistral/XAI/Qwen/etc).
1 parent ddf2619 commit c43904d

File tree

32 files changed

+444
-143
lines changed

32 files changed

+444
-143
lines changed

refact-agent/engine/src/call_validation.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,8 @@ pub enum ReasoningEffort {
3030
#[default]
3131
Medium,
3232
High,
33+
XHigh,
34+
Max,
3335
}
3436

3537
impl ReasoningEffort {

refact-agent/engine/src/llm/adapters/anthropic.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -105,6 +105,7 @@ impl LlmWireAdapter for AnthropicAdapter {
105105
}
106106
}
107107
}
108+
body.as_object_mut().map(|obj| obj.remove("temperature"));
108109
}
109110

110111
if body.get("thinking").and_then(|t| t.get("type")).and_then(|t| t.as_str()) == Some("enabled") {

refact-agent/engine/src/llm/adapters/refact.rs

Lines changed: 17 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -97,11 +97,27 @@ impl LlmWireAdapter for RefactAdapter {
9797
if settings.supports_reasoning {
9898
let rtype = settings.reasoning_type.as_deref().unwrap_or("");
9999
match rtype {
100-
"anthropic_budget" | "anthropic_effort" => {
100+
"anthropic_budget" => {
101101
if let Some(budget) = req.reasoning.to_anthropic_budget(DEFAULT_THINKING_BUDGET) {
102102
body["thinking"] = json!({"type": "enabled", "budget_tokens": budget});
103103
}
104104
}
105+
"anthropic_effort" => {
106+
match &req.reasoning {
107+
crate::llm::params::ReasoningIntent::BudgetTokens(n) => {
108+
body["thinking"] = json!({"type": "enabled", "budget_tokens": *n});
109+
}
110+
_ => {
111+
if let Some(effort) = req.reasoning.to_anthropic_effort() {
112+
body["thinking"] = json!({"type": "adaptive"});
113+
body["output_config"] = json!({"effort": effort});
114+
}
115+
}
116+
}
117+
}
118+
"xai" => {
119+
// do nothing since the reasoning supported only implicitly
120+
},
105121
_ => {
106122
// openai, deepseek, xai, qwen, gemini, kimi, zhipu, mistral, etc.
107123
if let Some(effort) = req.reasoning.to_openai_effort() {

refact-agent/engine/src/llm/params.rs

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,8 @@ pub enum ReasoningIntent {
5050
Low,
5151
Medium,
5252
High,
53+
XHigh,
54+
Max,
5355
BudgetTokens(usize),
5456
}
5557

@@ -66,6 +68,9 @@ impl ReasoningIntent {
6668
Self::Low => Some("low"),
6769
Self::Medium => Some("medium"),
6870
Self::High => Some("high"),
71+
Self::XHigh => Some("xhigh"),
72+
// OpenAI doesn't currently expose "max" effort; treat as highest.
73+
Self::Max => Some("xhigh"),
6974
Self::BudgetTokens(_) => Some("high"),
7075
}
7176
}
@@ -76,6 +81,8 @@ impl ReasoningIntent {
7681
Self::Low => Some(default_budget / 4),
7782
Self::Medium => Some(default_budget / 2),
7883
Self::High => Some(default_budget),
84+
Self::XHigh => Some(default_budget),
85+
Self::Max => Some(default_budget),
7986
Self::BudgetTokens(n) => Some(*n),
8087
}
8188
}
@@ -86,6 +93,9 @@ impl ReasoningIntent {
8693
Self::Low => Some("low"),
8794
Self::Medium => Some("medium"),
8895
Self::High => Some("high"),
96+
// Anthropic doesn't have a separate "xhigh" level; closest is "high".
97+
Self::XHigh => Some("max"),
98+
Self::Max => Some("max"),
8999
Self::BudgetTokens(_) => Some("high"),
90100
}
91101
}

refact-agent/engine/src/subchat.rs

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -159,8 +159,10 @@ pub async fn resolve_subchat_params(
159159
Some(re) if re.eq_ignore_ascii_case("low") => Some(ReasoningEffort::Low),
160160
Some(re) if re.eq_ignore_ascii_case("medium") => Some(ReasoningEffort::Medium),
161161
Some(re) if re.eq_ignore_ascii_case("high") => Some(ReasoningEffort::High),
162+
Some(re) if re.eq_ignore_ascii_case("xhigh") => Some(ReasoningEffort::XHigh),
163+
Some(re) if re.eq_ignore_ascii_case("max") => Some(ReasoningEffort::Max),
162164
Some(re) => return Err(format!(
163-
"invalid reasoning_effort '{}' for '{}', expected: low, medium, high",
165+
"invalid reasoning_effort '{}' for '{}', expected: low, medium, high, xhigh, max",
164166
re, tool_name
165167
)),
166168
None => None,

refact-agent/engine/src/yaml_configs/customization_registry.rs

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -702,7 +702,9 @@ mod tests {
702702
if let Some(ref reasoning_effort) = config.subchat.reasoning_effort {
703703
let valid = reasoning_effort.eq_ignore_ascii_case("low")
704704
|| reasoning_effort.eq_ignore_ascii_case("medium")
705-
|| reasoning_effort.eq_ignore_ascii_case("high");
705+
|| reasoning_effort.eq_ignore_ascii_case("high")
706+
|| reasoning_effort.eq_ignore_ascii_case("xhigh")
707+
|| reasoning_effort.eq_ignore_ascii_case("max");
706708
assert!(
707709
valid,
708710
"Subagent '{}' has invalid reasoning_effort: {}",

refact-agent/gui/src/__fixtures__/caps.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -285,7 +285,7 @@ export const STUB_CAPS_RESPONSE: CapsResponse = {
285285
supports_multimodality: true,
286286
supports_clicks: true,
287287
supports_agent: true,
288-
supports_reasoning: "anthropic",
288+
supports_reasoning: "anthropic_budget",
289289
supports_boost_reasoning: true,
290290
default_temperature: null,
291291
},

refact-agent/gui/src/__tests__/integration/chatSubscription.integration.test.ts

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -265,7 +265,8 @@ describe.skipIf(!(await isServerAvailable()))(
265265
const chatId = generateChatId("test-abort-stream");
266266

267267
// Start collecting events
268-
const eventsPromise = collectEvents(chatId, 15, 10000);
268+
// Use a higher cap here: streaming can emit many deltas before abort lands.
269+
const eventsPromise = collectEvents(chatId, 200, 15000);
269270

270271
await new Promise((r) => setTimeout(r, 300));
271272

@@ -282,8 +283,8 @@ describe.skipIf(!(await isServerAvailable()))(
282283
LSP_PORT,
283284
);
284285

285-
// Wait for generation to start
286-
await new Promise((r) => setTimeout(r, 1000));
286+
// Wait briefly for generation to start, then abort.
287+
await new Promise((r) => setTimeout(r, 200));
287288

288289
// Send abort
289290
await abortGeneration(chatId, LSP_PORT);

refact-agent/gui/src/app/middleware.ts

Lines changed: 103 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@ import {
3333
setThinkingBudget,
3434
setTemperature,
3535
setMaxTokens,
36+
buildThreadParamsPatch,
3637
} from "../features/Chat/Thread";
3738
import { saveLastThreadParams } from "../utils/threadStorage";
3839
import { statisticsApi } from "../services/refact/statistics";
@@ -82,7 +83,7 @@ const startListening = listenerMiddleware.startListening.withTypes<
8283

8384
startListening({
8485
actionCreator: newChatAction,
85-
effect: (_action, listenerApi) => {
86+
effect: async (_action, listenerApi) => {
8687
const state = listenerApi.getState();
8788
const chatId = state.chat.current_thread_id;
8889

@@ -102,6 +103,36 @@ startListening({
102103
}),
103104
);
104105
listenerApi.dispatch(clearError());
106+
107+
// New chats are created client-side first; sync the initial params to backend
108+
// immediately so the first snapshot doesn't overwrite local defaults.
109+
const runtime = state.chat.threads[chatId];
110+
const port = state.config.lspPort;
111+
if (!runtime || !port || !chatId) return;
112+
113+
try {
114+
const patch = buildThreadParamsPatch(runtime.thread, true);
115+
116+
// If reasoning is enabled by defaults (new chat), ensure temperature is sent as null.
117+
// Otherwise backend may fall back to a numeric default (often 0), which is invalid
118+
// for reasoning-enabled providers.
119+
const isReasoningEnabled =
120+
Boolean(runtime.thread.boost_reasoning) ||
121+
runtime.thread.reasoning_effort != null ||
122+
runtime.thread.thinking_budget != null;
123+
if (isReasoningEnabled) {
124+
patch.temperature = null;
125+
}
126+
127+
if (Object.keys(patch).length > 0) {
128+
await sendChatCommand(chatId, port, state.config.apiKey ?? undefined, {
129+
type: "set_params",
130+
patch,
131+
});
132+
}
133+
} catch {
134+
// Silently ignore - backend may not support this command
135+
}
105136
},
106137
});
107138

@@ -707,6 +738,15 @@ startListening({
707738
type: "set_params",
708739
patch: { boost_reasoning: action.payload.value },
709740
});
741+
742+
// When reasoning is enabled, temperature must be unset.
743+
// This avoids provider-side validation errors.
744+
if (action.payload.value) {
745+
await sendChatCommand(chatId, port, apiKey ?? undefined, {
746+
type: "set_params",
747+
patch: { temperature: null },
748+
});
749+
}
710750
} catch {
711751
// Silently ignore - backend may not support this command
712752
}
@@ -731,6 +771,14 @@ startListening({
731771
type: "set_params",
732772
patch: { reasoning_effort: action.payload.value },
733773
});
774+
775+
// Any explicit reasoning effort implies reasoning mode: unset temperature.
776+
if (action.payload.value != null) {
777+
await sendChatCommand(chatId, port, apiKey ?? undefined, {
778+
type: "set_params",
779+
patch: { temperature: null },
780+
});
781+
}
734782
} catch {
735783
// Silently ignore
736784
}
@@ -755,6 +803,14 @@ startListening({
755803
type: "set_params",
756804
patch: { thinking_budget: action.payload.value },
757805
});
806+
807+
// Any explicit thinking budget implies reasoning mode: unset temperature.
808+
if (action.payload.value != null) {
809+
await sendChatCommand(chatId, port, apiKey ?? undefined, {
810+
type: "set_params",
811+
patch: { temperature: null },
812+
});
813+
}
758814
} catch {
759815
// Silently ignore errors - user will see them via SSE events
760816
}
@@ -1028,23 +1084,52 @@ startListening({
10281084
if (!runtime) return;
10291085

10301086
const isUnstartedChat = runtime.thread.messages.length === 0;
1031-
if (!isUnstartedChat) return;
1032-
1033-
saveLastThreadParams({
1034-
model: runtime.thread.model,
1035-
mode: runtime.thread.mode,
1036-
boost_reasoning: runtime.thread.boost_reasoning,
1037-
reasoning_effort: runtime.thread.reasoning_effort,
1038-
thinking_budget: runtime.thread.thinking_budget,
1039-
temperature: runtime.thread.temperature,
1040-
max_tokens: runtime.thread.max_tokens,
1041-
increase_max_tokens: runtime.thread.increase_max_tokens,
1042-
include_project_info: runtime.thread.include_project_info,
1043-
context_tokens_cap: runtime.thread.context_tokens_cap,
1044-
system_prompt: state.chat.system_prompt,
1045-
checkpoints_enabled: state.chat.checkpoints_enabled,
1046-
follow_ups_enabled: state.chat.follow_ups_enabled,
1047-
});
1087+
const shouldPersistForNewChats =
1088+
isUnstartedChat ||
1089+
setBoostReasoning.match(_action) ||
1090+
setReasoningEffort.match(_action) ||
1091+
setThinkingBudget.match(_action);
1092+
if (!shouldPersistForNewChats) return;
1093+
1094+
// Persist the updated param(s) as defaults for *new* chats.
1095+
// IMPORTANT: For started chats, we only persist reasoning-related toggles
1096+
// (boost_reasoning / reasoning_effort / thinking_budget), keeping other
1097+
// sampling params “sticky” only before the first message.
1098+
const mode = runtime.thread.mode;
1099+
const patch: Parameters<typeof saveLastThreadParams>[0] = { mode };
1100+
1101+
if (isUnstartedChat) {
1102+
patch.model = runtime.thread.model;
1103+
patch.temperature = runtime.thread.temperature;
1104+
patch.max_tokens = runtime.thread.max_tokens;
1105+
patch.increase_max_tokens = runtime.thread.increase_max_tokens;
1106+
patch.include_project_info = runtime.thread.include_project_info;
1107+
patch.context_tokens_cap = runtime.thread.context_tokens_cap;
1108+
patch.system_prompt = state.chat.system_prompt;
1109+
patch.checkpoints_enabled = state.chat.checkpoints_enabled;
1110+
patch.follow_ups_enabled = state.chat.follow_ups_enabled;
1111+
}
1112+
1113+
if (setBoostReasoning.match(_action)) {
1114+
patch.boost_reasoning = runtime.thread.boost_reasoning;
1115+
// preserve temperature reset as part of “reasoning defaults”
1116+
patch.temperature = runtime.thread.temperature;
1117+
}
1118+
if (setReasoningEffort.match(_action)) {
1119+
patch.reasoning_effort = runtime.thread.reasoning_effort;
1120+
patch.temperature = runtime.thread.temperature;
1121+
}
1122+
if (setThinkingBudget.match(_action)) {
1123+
patch.thinking_budget = runtime.thread.thinking_budget;
1124+
patch.temperature = runtime.thread.temperature;
1125+
}
1126+
1127+
// Still persist model changes after start (matches current UX).
1128+
if (setChatModel.match(_action)) {
1129+
patch.model = runtime.thread.model;
1130+
}
1131+
1132+
saveLastThreadParams(patch);
10481133
},
10491134
});
10501135

refact-agent/gui/src/components/ChatContent/MessageFooter.tsx

Lines changed: 21 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -118,7 +118,8 @@ export const MessageFooter: React.FC<MessageFooterProps> = ({
118118
"cache_read_input_tokens",
119119
],
120120
});
121-
const hasUsageInfo = Boolean(usage && contextTokens > 0) || showCoins || hasUsd;
121+
const hasUsageInfo =
122+
Boolean(usage && contextTokens > 0) || showCoins || hasUsd;
122123

123124
return (
124125
<div className={styles.footerLane}>
@@ -259,18 +260,27 @@ export const MessageFooter: React.FC<MessageFooterProps> = ({
259260
<Text size="2" weight="bold">
260261
Cost
261262
</Text>
262-
<Text size="2">
263-
{formatUsd(meteringUsd.total_usd)}
264-
</Text>
263+
<Text size="2">{formatUsd(meteringUsd.total_usd)}</Text>
265264
</Flex>
266265
<UsdDisplay label="Prompt" value={meteringUsd.prompt_usd} />
267-
<UsdDisplay label="Completion" value={meteringUsd.generated_usd} />
268-
{meteringUsd.cache_read_usd !== undefined && meteringUsd.cache_read_usd > 0 && (
269-
<UsdDisplay label="Cache read" value={meteringUsd.cache_read_usd} />
270-
)}
271-
{meteringUsd.cache_creation_usd !== undefined && meteringUsd.cache_creation_usd > 0 && (
272-
<UsdDisplay label="Cache creation" value={meteringUsd.cache_creation_usd} />
273-
)}
266+
<UsdDisplay
267+
label="Completion"
268+
value={meteringUsd.generated_usd}
269+
/>
270+
{meteringUsd.cache_read_usd !== undefined &&
271+
meteringUsd.cache_read_usd > 0 && (
272+
<UsdDisplay
273+
label="Cache read"
274+
value={meteringUsd.cache_read_usd}
275+
/>
276+
)}
277+
{meteringUsd.cache_creation_usd !== undefined &&
278+
meteringUsd.cache_creation_usd > 0 && (
279+
<UsdDisplay
280+
label="Cache creation"
281+
value={meteringUsd.cache_creation_usd}
282+
/>
283+
)}
274284
</>
275285
)}
276286
</Flex>

0 commit comments

Comments
 (0)