smallcloudai
diff --git a/‎refact-agent/engine/src/call_validation.rs‎
Lines changed: 2 additions & 0 deletions b/‎refact-agent/engine/src/call_validation.rs‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎refact-agent/engine/src/llm/adapters/anthropic.rs‎
Lines changed: 1 addition & 0 deletions b/‎refact-agent/engine/src/llm/adapters/anthropic.rs‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎refact-agent/engine/src/llm/adapters/refact.rs‎
Lines changed: 17 additions & 1 deletion b/‎refact-agent/engine/src/llm/adapters/refact.rs‎
Lines changed: 17 additions & 1 deletion
diff --git a/‎refact-agent/engine/src/llm/params.rs‎
Lines changed: 10 additions & 0 deletions b/‎refact-agent/engine/src/llm/params.rs‎
Lines changed: 10 additions & 0 deletions
diff --git a/‎refact-agent/engine/src/subchat.rs‎
Lines changed: 3 additions & 1 deletion b/‎refact-agent/engine/src/subchat.rs‎
Lines changed: 3 additions & 1 deletion
diff --git a/‎refact-agent/engine/src/yaml_configs/customization_registry.rs‎
Lines changed: 3 additions & 1 deletion b/‎refact-agent/engine/src/yaml_configs/customization_registry.rs‎
Lines changed: 3 additions & 1 deletion
diff --git a/‎refact-agent/gui/src/__fixtures__/caps.ts‎
Lines changed: 1 addition & 1 deletion b/‎refact-agent/gui/src/__fixtures__/caps.ts‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎refact-agent/gui/src/__tests__/integration/chatSubscription.integration.test.ts‎
Lines changed: 4 additions & 3 deletions b/‎refact-agent/gui/src/__tests__/integration/chatSubscription.integration.test.ts‎
Lines changed: 4 additions & 3 deletions
diff --git a/‎refact-agent/gui/src/app/middleware.ts‎
Lines changed: 103 additions & 18 deletions b/‎refact-agent/gui/src/app/middleware.ts‎
Lines changed: 103 additions & 18 deletions
diff --git a/‎refact-agent/gui/src/components/ChatContent/MessageFooter.tsx‎
Lines changed: 21 additions & 11 deletions b/‎refact-agent/gui/src/components/ChatContent/MessageFooter.tsx‎
Lines changed: 21 additions & 11 deletions
@@ -30,6 +30,8 @@ pub enum ReasoningEffort {
     #[default]
     Medium,
     High,
+    XHigh,
+    Max,
 }
 
 impl ReasoningEffort {
 
@@ -105,6 +105,7 @@ impl LlmWireAdapter for AnthropicAdapter {
                     }
                 }
             }
+            body.as_object_mut().map(|obj| obj.remove("temperature"));
         }
 
         if body.get("thinking").and_then(|t| t.get("type")).and_then(|t| t.as_str()) == Some("enabled") {
 
@@ -97,11 +97,27 @@ impl LlmWireAdapter for RefactAdapter {
         if settings.supports_reasoning {
             let rtype = settings.reasoning_type.as_deref().unwrap_or("");
             match rtype {
-                "anthropic_budget" | "anthropic_effort" => {
+                "anthropic_budget" => {
                     if let Some(budget) = req.reasoning.to_anthropic_budget(DEFAULT_THINKING_BUDGET) {
                         body["thinking"] = json!({"type": "enabled", "budget_tokens": budget});
                     }
                 }
+                "anthropic_effort" => {
+                    match &req.reasoning {
+                        crate::llm::params::ReasoningIntent::BudgetTokens(n) => {
+                            body["thinking"] = json!({"type": "enabled", "budget_tokens": *n});
+                        }
+                        _ => {
+                            if let Some(effort) = req.reasoning.to_anthropic_effort() {
+                                body["thinking"] = json!({"type": "adaptive"});
+                                body["output_config"] = json!({"effort": effort});
+                            }
+                        }
+                    }
+                }
+                "xai" => {
+                    // do nothing since the reasoning supported only implicitly
+                },
                 _ => {
                     // openai, deepseek, xai, qwen, gemini, kimi, zhipu, mistral, etc.
                     if let Some(effort) = req.reasoning.to_openai_effort() {
 
@@ -50,6 +50,8 @@ pub enum ReasoningIntent {
     Low,
     Medium,
     High,
+    XHigh,
+    Max,
     BudgetTokens(usize),
 }
 
@@ -66,6 +68,9 @@ impl ReasoningIntent {
             Self::Low => Some("low"),
             Self::Medium => Some("medium"),
             Self::High => Some("high"),
+            Self::XHigh => Some("xhigh"),
+            // OpenAI doesn't currently expose "max" effort; treat as highest.
+            Self::Max => Some("xhigh"),
             Self::BudgetTokens(_) => Some("high"),
         }
     }
@@ -76,6 +81,8 @@ impl ReasoningIntent {
             Self::Low => Some(default_budget / 4),
             Self::Medium => Some(default_budget / 2),
             Self::High => Some(default_budget),
+            Self::XHigh => Some(default_budget),
+            Self::Max => Some(default_budget),
             Self::BudgetTokens(n) => Some(*n),
         }
     }
@@ -86,6 +93,9 @@ impl ReasoningIntent {
             Self::Low => Some("low"),
             Self::Medium => Some("medium"),
             Self::High => Some("high"),
+            // Anthropic doesn't have a separate "xhigh" level; closest is "high".
+            Self::XHigh => Some("max"),
+            Self::Max => Some("max"),
             Self::BudgetTokens(_) => Some("high"),
         }
     }
 
@@ -159,8 +159,10 @@ pub async fn resolve_subchat_params(
         Some(re) if re.eq_ignore_ascii_case("low") => Some(ReasoningEffort::Low),
         Some(re) if re.eq_ignore_ascii_case("medium") => Some(ReasoningEffort::Medium),
         Some(re) if re.eq_ignore_ascii_case("high") => Some(ReasoningEffort::High),
+        Some(re) if re.eq_ignore_ascii_case("xhigh") => Some(ReasoningEffort::XHigh),
+        Some(re) if re.eq_ignore_ascii_case("max") => Some(ReasoningEffort::Max),
         Some(re) => return Err(format!(
-            "invalid reasoning_effort '{}' for '{}', expected: low, medium, high",
+            "invalid reasoning_effort '{}' for '{}', expected: low, medium, high, xhigh, max",
             re, tool_name
         )),
         None => None,
 
@@ -702,7 +702,9 @@ mod tests {
                 if let Some(ref reasoning_effort) = config.subchat.reasoning_effort {
                     let valid = reasoning_effort.eq_ignore_ascii_case("low")
                         || reasoning_effort.eq_ignore_ascii_case("medium")
-                        || reasoning_effort.eq_ignore_ascii_case("high");
+                        || reasoning_effort.eq_ignore_ascii_case("high")
+                        || reasoning_effort.eq_ignore_ascii_case("xhigh")
+                        || reasoning_effort.eq_ignore_ascii_case("max");
                     assert!(
                         valid,
                         "Subagent '{}' has invalid reasoning_effort: {}",
 
@@ -285,7 +285,7 @@ export const STUB_CAPS_RESPONSE: CapsResponse = {
       supports_multimodality: true,
       supports_clicks: true,
       supports_agent: true,
-      supports_reasoning: "anthropic",
+      supports_reasoning: "anthropic_budget",
       supports_boost_reasoning: true,
       default_temperature: null,
     },
 
@@ -265,7 +265,8 @@ describe.skipIf(!(await isServerAvailable()))(
         const chatId = generateChatId("test-abort-stream");
 
         // Start collecting events
-        const eventsPromise = collectEvents(chatId, 15, 10000);
+        // Use a higher cap here: streaming can emit many deltas before abort lands.
+        const eventsPromise = collectEvents(chatId, 200, 15000);
 
         await new Promise((r) => setTimeout(r, 300));
 
@@ -282,8 +283,8 @@ describe.skipIf(!(await isServerAvailable()))(
           LSP_PORT,
         );
 
-        // Wait for generation to start
-        await new Promise((r) => setTimeout(r, 1000));
+        // Wait briefly for generation to start, then abort.
+        await new Promise((r) => setTimeout(r, 200));
 
         // Send abort
         await abortGeneration(chatId, LSP_PORT);
 
@@ -33,6 +33,7 @@ import {
   setThinkingBudget,
   setTemperature,
   setMaxTokens,
+  buildThreadParamsPatch,
 } from "../features/Chat/Thread";
 import { saveLastThreadParams } from "../utils/threadStorage";
 import { statisticsApi } from "../services/refact/statistics";
@@ -82,7 +83,7 @@ const startListening = listenerMiddleware.startListening.withTypes<
 
 startListening({
   actionCreator: newChatAction,
-  effect: (_action, listenerApi) => {
+  effect: async (_action, listenerApi) => {
     const state = listenerApi.getState();
     const chatId = state.chat.current_thread_id;
 
@@ -102,6 +103,36 @@ startListening({
       }),
     );
     listenerApi.dispatch(clearError());
+
+    // New chats are created client-side first; sync the initial params to backend
+    // immediately so the first snapshot doesn't overwrite local defaults.
+    const runtime = state.chat.threads[chatId];
+    const port = state.config.lspPort;
+    if (!runtime || !port || !chatId) return;
+
+    try {
+      const patch = buildThreadParamsPatch(runtime.thread, true);
+
+      // If reasoning is enabled by defaults (new chat), ensure temperature is sent as null.
+      // Otherwise backend may fall back to a numeric default (often 0), which is invalid
+      // for reasoning-enabled providers.
+      const isReasoningEnabled =
+        Boolean(runtime.thread.boost_reasoning) ||
+        runtime.thread.reasoning_effort != null ||
+        runtime.thread.thinking_budget != null;
+      if (isReasoningEnabled) {
+        patch.temperature = null;
+      }
+
+      if (Object.keys(patch).length > 0) {
+        await sendChatCommand(chatId, port, state.config.apiKey ?? undefined, {
+          type: "set_params",
+          patch,
+        });
+      }
+    } catch {
+      // Silently ignore - backend may not support this command
+    }
   },
 });
 
@@ -707,6 +738,15 @@ startListening({
         type: "set_params",
         patch: { boost_reasoning: action.payload.value },
       });
+
+      // When reasoning is enabled, temperature must be unset.
+      // This avoids provider-side validation errors.
+      if (action.payload.value) {
+        await sendChatCommand(chatId, port, apiKey ?? undefined, {
+          type: "set_params",
+          patch: { temperature: null },
+        });
+      }
     } catch {
       // Silently ignore - backend may not support this command
     }
@@ -731,6 +771,14 @@ startListening({
         type: "set_params",
         patch: { reasoning_effort: action.payload.value },
       });
+
+      // Any explicit reasoning effort implies reasoning mode: unset temperature.
+      if (action.payload.value != null) {
+        await sendChatCommand(chatId, port, apiKey ?? undefined, {
+          type: "set_params",
+          patch: { temperature: null },
+        });
+      }
     } catch {
       // Silently ignore
     }
@@ -755,6 +803,14 @@ startListening({
         type: "set_params",
         patch: { thinking_budget: action.payload.value },
       });
+
+      // Any explicit thinking budget implies reasoning mode: unset temperature.
+      if (action.payload.value != null) {
+        await sendChatCommand(chatId, port, apiKey ?? undefined, {
+          type: "set_params",
+          patch: { temperature: null },
+        });
+      }
     } catch {
       // Silently ignore errors - user will see them via SSE events
     }
@@ -1028,23 +1084,52 @@ startListening({
     if (!runtime) return;
 
     const isUnstartedChat = runtime.thread.messages.length === 0;
-    if (!isUnstartedChat) return;
-
-    saveLastThreadParams({
-      model: runtime.thread.model,
-      mode: runtime.thread.mode,
-      boost_reasoning: runtime.thread.boost_reasoning,
-      reasoning_effort: runtime.thread.reasoning_effort,
-      thinking_budget: runtime.thread.thinking_budget,
-      temperature: runtime.thread.temperature,
-      max_tokens: runtime.thread.max_tokens,
-      increase_max_tokens: runtime.thread.increase_max_tokens,
-      include_project_info: runtime.thread.include_project_info,
-      context_tokens_cap: runtime.thread.context_tokens_cap,
-      system_prompt: state.chat.system_prompt,
-      checkpoints_enabled: state.chat.checkpoints_enabled,
-      follow_ups_enabled: state.chat.follow_ups_enabled,
-    });
+    const shouldPersistForNewChats =
+      isUnstartedChat ||
+      setBoostReasoning.match(_action) ||
+      setReasoningEffort.match(_action) ||
+      setThinkingBudget.match(_action);
+    if (!shouldPersistForNewChats) return;
+
+    // Persist the updated param(s) as defaults for *new* chats.
+    // IMPORTANT: For started chats, we only persist reasoning-related toggles
+    // (boost_reasoning / reasoning_effort / thinking_budget), keeping other
+    // sampling params “sticky” only before the first message.
+    const mode = runtime.thread.mode;
+    const patch: Parameters<typeof saveLastThreadParams>[0] = { mode };
+
+    if (isUnstartedChat) {
+      patch.model = runtime.thread.model;
+      patch.temperature = runtime.thread.temperature;
+      patch.max_tokens = runtime.thread.max_tokens;
+      patch.increase_max_tokens = runtime.thread.increase_max_tokens;
+      patch.include_project_info = runtime.thread.include_project_info;
+      patch.context_tokens_cap = runtime.thread.context_tokens_cap;
+      patch.system_prompt = state.chat.system_prompt;
+      patch.checkpoints_enabled = state.chat.checkpoints_enabled;
+      patch.follow_ups_enabled = state.chat.follow_ups_enabled;
+    }
+
+    if (setBoostReasoning.match(_action)) {
+      patch.boost_reasoning = runtime.thread.boost_reasoning;
+      // preserve temperature reset as part of “reasoning defaults”
+      patch.temperature = runtime.thread.temperature;
+    }
+    if (setReasoningEffort.match(_action)) {
+      patch.reasoning_effort = runtime.thread.reasoning_effort;
+      patch.temperature = runtime.thread.temperature;
+    }
+    if (setThinkingBudget.match(_action)) {
+      patch.thinking_budget = runtime.thread.thinking_budget;
+      patch.temperature = runtime.thread.temperature;
+    }
+
+    // Still persist model changes after start (matches current UX).
+    if (setChatModel.match(_action)) {
+      patch.model = runtime.thread.model;
+    }
+
+    saveLastThreadParams(patch);
   },
 });
 
 
@@ -118,7 +118,8 @@ export const MessageFooter: React.FC<MessageFooterProps> = ({
       "cache_read_input_tokens",
     ],
   });
-  const hasUsageInfo = Boolean(usage && contextTokens > 0) || showCoins || hasUsd;
+  const hasUsageInfo =
+    Boolean(usage && contextTokens > 0) || showCoins || hasUsd;
 
   return (
     <div className={styles.footerLane}>
@@ -259,18 +260,27 @@ export const MessageFooter: React.FC<MessageFooterProps> = ({
                       <Text size="2" weight="bold">
                         Cost
                       </Text>
-                      <Text size="2">
-                        {formatUsd(meteringUsd.total_usd)}
-                      </Text>
+                      <Text size="2">{formatUsd(meteringUsd.total_usd)}</Text>
                     </Flex>
                     <UsdDisplay label="Prompt" value={meteringUsd.prompt_usd} />
-                    <UsdDisplay label="Completion" value={meteringUsd.generated_usd} />
-                    {meteringUsd.cache_read_usd !== undefined && meteringUsd.cache_read_usd > 0 && (
-                      <UsdDisplay label="Cache read" value={meteringUsd.cache_read_usd} />
-                    )}
-                    {meteringUsd.cache_creation_usd !== undefined && meteringUsd.cache_creation_usd > 0 && (
-                      <UsdDisplay label="Cache creation" value={meteringUsd.cache_creation_usd} />
-                    )}
+                    <UsdDisplay
+                      label="Completion"
+                      value={meteringUsd.generated_usd}
+                    />
+                    {meteringUsd.cache_read_usd !== undefined &&
+                      meteringUsd.cache_read_usd > 0 && (
+                        <UsdDisplay
+                          label="Cache read"
+                          value={meteringUsd.cache_read_usd}
+                        />
+                      )}
+                    {meteringUsd.cache_creation_usd !== undefined &&
+                      meteringUsd.cache_creation_usd > 0 && (
+                        <UsdDisplay
+                          label="Cache creation"
+                          value={meteringUsd.cache_creation_usd}
+                        />
+                      )}
                   </>
                 )}
               </Flex>
Original file line number	Diff line number	Diff line change
`@@ -30,6 +30,8 @@ pub enum ReasoningEffort {`
`30`	`30`	`#[default]`
`31`	`31`	`Medium,`
`32`	`32`	`High,`
	`33`	`+ XHigh,`
	`34`	`+ Max,`
`33`	`35`	`}`
`34`	`36`
`35`	`37`	`impl ReasoningEffort {`
Original file line number	Diff line number	Diff line change
`@@ -105,6 +105,7 @@ impl LlmWireAdapter for AnthropicAdapter {`
`105`	`105`	`}`
`106`	`106`	`}`
`107`	`107`	`}`
	`108`	`+ body.as_object_mut().map(\|obj\| obj.remove("temperature"));`
`108`	`109`	`}`
`109`	`110`
`110`	`111`	`if body.get("thinking").and_then(\|t\| t.get("type")).and_then(\|t\| t.as_str()) == Some("enabled") {`
Original file line number	Diff line number	Diff line change
`@@ -50,6 +50,8 @@ pub enum ReasoningIntent {`
`50`	`50`	`Low,`
`51`	`51`	`Medium,`
`52`	`52`	`High,`
	`53`	`+ XHigh,`
	`54`	`+ Max,`
`53`	`55`	`BudgetTokens(usize),`
`54`	`56`	`}`
`55`	`57`
`@@ -66,6 +68,9 @@ impl ReasoningIntent {`
`66`	`68`	`Self::Low => Some("low"),`
`67`	`69`	`Self::Medium => Some("medium"),`
`68`	`70`	`Self::High => Some("high"),`
	`71`	`+ Self::XHigh => Some("xhigh"),`
	`72`	`+ // OpenAI doesn't currently expose "max" effort; treat as highest.`
	`73`	`+ Self::Max => Some("xhigh"),`
`69`	`74`	`Self::BudgetTokens(_) => Some("high"),`
`70`	`75`	`}`
`71`	`76`	`}`
`@@ -76,6 +81,8 @@ impl ReasoningIntent {`
`76`	`81`	`Self::Low => Some(default_budget / 4),`
`77`	`82`	`Self::Medium => Some(default_budget / 2),`
`78`	`83`	`Self::High => Some(default_budget),`
	`84`	`+ Self::XHigh => Some(default_budget),`
	`85`	`+ Self::Max => Some(default_budget),`
`79`	`86`	`Self::BudgetTokens(n) => Some(*n),`
`80`	`87`	`}`
`81`	`88`	`}`
`@@ -86,6 +93,9 @@ impl ReasoningIntent {`
`86`	`93`	`Self::Low => Some("low"),`
`87`	`94`	`Self::Medium => Some("medium"),`
`88`	`95`	`Self::High => Some("high"),`
	`96`	`+ // Anthropic doesn't have a separate "xhigh" level; closest is "high".`
	`97`	`+ Self::XHigh => Some("max"),`
	`98`	`+ Self::Max => Some("max"),`
`89`	`99`	`Self::BudgetTokens(_) => Some("high"),`
`90`	`100`	`}`
`91`	`101`	`}`