Skip to content

Commit 2940df7

Browse files
committed
refactor(llm)!: introduce wire format adapters replacing HF endpoints
- Remove `forward_to_hf_endpoint.rs` and deprecate HF endpoint style - Add pluggable LLM wire adapters (OpenAI Chat, OpenAI Responses, Anthropic) - Introduce canonical `LlmRequest`/`LlmResponse`/`LlmStreamDelta` types - Add `WireFormat` enum to model capabilities with provider defaults - Migrate chat preparation/generation to new adapter system - Add new conversation modes: ask, plan, review, debug, learn, shell, past_work, quick_agent - Improve streaming parsing, logging, and error handling - Enhance tool call safety with pending call verification - Add `CacheControl`, `ResponseFormat` support and usage token parsing - Add xAI Responses provider config BREAKING CHANGE: HuggingFace endpoint style removed - use OpenAI-compatible endpoints instead
1 parent bb05bd8 commit 2940df7

File tree

21 files changed

+812
-134
lines changed

21 files changed

+812
-134
lines changed

refact-agent/engine/src/call_validation.rs

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -174,7 +174,11 @@ impl Default for ChatContent {
174174
pub struct ChatUsage {
175175
pub prompt_tokens: usize,
176176
pub completion_tokens: usize,
177-
pub total_tokens: usize, // TODO: remove (can produce self-contradictory data when prompt+completion != total)
177+
pub total_tokens: usize,
178+
#[serde(default, skip_serializing_if = "Option::is_none")]
179+
pub cache_creation_tokens: Option<usize>,
180+
#[serde(default, skip_serializing_if = "Option::is_none")]
181+
pub cache_read_tokens: Option<usize>,
178182
}
179183

180184
#[derive(Debug, Serialize, Clone, Default)]

refact-agent/engine/src/caps/caps.rs

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,8 @@ pub struct BaseModelRecord {
4646
#[serde(default, skip_serializing)]
4747
pub support_metadata: bool,
4848
#[serde(default, skip_serializing)]
49+
pub extra_headers: std::collections::HashMap<String, String>,
50+
#[serde(default, skip_serializing)]
4951
pub similar_models: Vec<String>,
5052
#[serde(default)]
5153
pub tokenizer: String,
@@ -54,6 +56,11 @@ pub struct BaseModelRecord {
5456
pub enabled: bool,
5557
#[serde(default)]
5658
pub experimental: bool,
59+
60+
/// Use max_completion_tokens instead of max_tokens (required for OpenAI o1/o3 models)
61+
#[serde(default)]
62+
pub supports_max_completion_tokens: bool,
63+
5764
// Fields used for Config/UI management
5865
#[serde(skip_deserializing)]
5966
pub removable: bool,

refact-agent/engine/src/caps/providers.rs

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,9 @@ pub struct CapsProvider {
4545
#[serde(default)]
4646
pub tokenizer_api_key: String,
4747

48+
#[serde(default)]
49+
pub extra_headers: std::collections::HashMap<String, String>,
50+
4851
#[serde(default)]
4952
pub code_completion_n_ctx: usize,
5053

@@ -263,6 +266,10 @@ const PROVIDER_TEMPLATES: &[(&str, &str)] = &[
263266
"openai",
264267
include_str!("../yaml_configs/default_providers/openai.yaml"),
265268
),
269+
(
270+
"openai_responses",
271+
include_str!("../yaml_configs/default_providers/openai_responses.yaml"),
272+
),
266273
(
267274
"openrouter",
268275
include_str!("../yaml_configs/default_providers/openrouter.yaml"),
@@ -271,6 +278,10 @@ const PROVIDER_TEMPLATES: &[(&str, &str)] = &[
271278
"xai",
272279
include_str!("../yaml_configs/default_providers/xai.yaml"),
273280
),
281+
(
282+
"xai_responses",
283+
include_str!("../yaml_configs/default_providers/xai_responses.yaml"),
284+
),
274285
];
275286
static PARSED_PROVIDERS: OnceLock<IndexMap<String, CapsProvider>> = OnceLock::new();
276287
static PARSED_MODEL_DEFAULTS: OnceLock<IndexMap<String, ModelDefaultSettingsUI>> = OnceLock::new();
@@ -513,6 +524,7 @@ pub fn add_models_to_caps(caps: &mut CodeAssistantCaps, providers: Vec<CapsProvi
513524
base_model_rec.support_metadata = provider.support_metadata;
514525
base_model_rec.endpoint_style = provider.endpoint_style.clone();
515526
base_model_rec.wire_format = provider.wire_format;
527+
base_model_rec.extra_headers = provider.extra_headers.clone();
516528
}
517529

518530
for mut provider in providers {

refact-agent/engine/src/caps/self_hosted.rs

Lines changed: 7 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -11,11 +11,15 @@ use crate::caps::{
1111
default_rejection_threshold, relative_to_full_url, normalize_string, resolve_relative_urls,
1212
};
1313
use crate::caps::providers;
14+
use crate::llm::WireFormat;
1415

1516
#[derive(Debug, Deserialize, Clone, Default)]
1617
pub struct SelfHostedCapsModelRecord {
1718
pub n_ctx: usize,
1819

20+
#[serde(default)]
21+
pub wire_format: WireFormat,
22+
1923
#[serde(default)]
2024
pub supports_scratchpads: HashMap<String, serde_json::Value>,
2125

@@ -218,22 +222,16 @@ impl SelfHostedCapsModelRecord {
218222
let mut base = BaseModelRecord {
219223
n_ctx: self.n_ctx,
220224
enabled: true,
225+
wire_format: self.wire_format,
221226
..Default::default()
222227
};
223228

224229
let (scratchpad, scratchpad_patch) = self.get_chat_scratchpad();
225230

226-
// Non passthrough models, don't support endpoints of `/v1/chat/completions` in openai style, only `/v1/completions`
227-
let endpoint_to_use = if scratchpad == "PASSTHROUGH" {
228-
&self_hosted_caps.chat.endpoint
229-
} else {
230-
&self_hosted_caps.completion.endpoint
231-
};
232-
233231
configure_base_model(
234232
&mut base,
235233
model_name,
236-
endpoint_to_use,
234+
&self_hosted_caps.chat.endpoint,
237235
&self_hosted_caps.cloud_name,
238236
&self_hosted_caps.tokenizer_endpoints,
239237
caps_url,
@@ -385,6 +383,7 @@ impl SelfHostedCaps {
385383
embedding_endpoint: self.embedding.endpoint.clone(),
386384
api_key: cmdline_api_key.to_string(),
387385
tokenizer_api_key: cmdline_api_key.to_string(),
386+
extra_headers: std::collections::HashMap::new(),
388387
code_completion_n_ctx: 0,
389388
support_metadata: self.support_metadata,
390389
completion_models: IndexMap::new(),

refact-agent/engine/src/chat/openai_merge.rs

Lines changed: 9 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -68,7 +68,11 @@ pub fn merge_tool_call(accumulated: &mut Vec<serde_json::Value>, new_tc: serde_j
6868

6969
if let Some(args) = func.get("arguments") {
7070
if !args.is_null() {
71-
let new_args = args.as_str().unwrap_or("");
71+
let new_args = if let Some(s) = args.as_str() {
72+
s.to_string()
73+
} else {
74+
serde_json::to_string(args).unwrap_or_default()
75+
};
7276
let prev_args = existing["function"]["arguments"].as_str().unwrap_or("");
7377
existing["function"]["arguments"] = json!(format!("{}{}", prev_args, new_args));
7478
}
@@ -264,7 +268,7 @@ mod tests {
264268
}
265269

266270
#[test]
267-
fn test_merge_tool_calls_arguments_object_treated_as_empty() {
271+
fn test_merge_tool_calls_arguments_object_stringified() {
268272
let mut accumulated = Vec::new();
269273
merge_tool_call(
270274
&mut accumulated,
@@ -275,11 +279,11 @@ mod tests {
275279
}),
276280
);
277281

278-
assert_eq!(accumulated[0]["function"]["arguments"], "");
282+
assert_eq!(accumulated[0]["function"]["arguments"], r#"{"key":"value"}"#);
279283
}
280284

281285
#[test]
282-
fn test_merge_tool_calls_arguments_number_treated_as_empty() {
286+
fn test_merge_tool_calls_arguments_number_stringified() {
283287
let mut accumulated = Vec::new();
284288
merge_tool_call(
285289
&mut accumulated,
@@ -290,7 +294,7 @@ mod tests {
290294
}),
291295
);
292296

293-
assert_eq!(accumulated[0]["function"]["arguments"], "");
297+
assert_eq!(accumulated[0]["function"]["arguments"], "123");
294298
}
295299

296300
#[test]

refact-agent/engine/src/chat/prepare.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -236,6 +236,8 @@ pub async fn prepare_chat_passthrough(
236236
tool_choice,
237237
parallel_tool_calls: options.parallel_tool_calls.unwrap_or(false),
238238
stream: true,
239+
response_format: None,
240+
cache_control: Default::default(),
239241
extra_body: None,
240242
};
241243

refact-agent/engine/src/chat/queue.rs

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -611,15 +611,16 @@ fn is_allowed_role_for_restore(role: &str) -> bool {
611611
matches!(role, "user" | "assistant" | "system" | "tool")
612612
}
613613

614-
/// Sanitize message for branching - preserves conversation structure (tool_calls, tool results)
615-
/// but strips transient metadata (usage, checkpoints, citations, etc.)
614+
/// Sanitize message for branching - preserves conversation structure but strips:
615+
/// - tool_calls from assistant messages (security: prevents prerun of injected tool calls)
616+
/// - transient metadata (usage, checkpoints, citations, etc.)
616617
fn sanitize_message_for_restore(msg: &ChatMessage) -> ChatMessage {
617618
ChatMessage {
618619
message_id: Uuid::new_v4().to_string(),
619620
role: msg.role.clone(),
620621
content: msg.content.clone(),
621-
tool_calls: msg.tool_calls.clone(), // Preserve tool calls for conversation context
622-
tool_call_id: msg.tool_call_id.clone(), // Preserve for tool messages
622+
tool_calls: None, // Security: strip tool_calls to prevent prerun of restored messages
623+
tool_call_id: msg.tool_call_id.clone(), // Preserve for tool result messages
623624
tool_failed: msg.tool_failed, // Preserve tool execution status
624625
usage: None, // Strip metering data
625626
checkpoints: vec![], // Strip checkpoint data

refact-agent/engine/src/chat/stream_core.rs

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,3 @@
1-
use std::collections::HashMap;
21
use std::sync::Arc;
32
use std::sync::atomic::{AtomicBool, Ordering};
43
use std::time::Instant;
@@ -55,6 +54,10 @@ pub async fn run_llm_stream<C: StreamCollector>(
5554
params: StreamRunParams,
5655
collector: &mut C,
5756
) -> Result<Vec<ChoiceFinal>, String> {
57+
if params.llm_request.params.n.unwrap_or(1) != 1 {
58+
return Err("Streaming with n > 1 is not supported".to_string());
59+
}
60+
5861
let (client, slowdown_arc) = {
5962
let gcx_locked = gcx.read().await;
6063
(
@@ -72,10 +75,11 @@ pub async fn run_llm_stream<C: StreamCollector>(
7275
let adapter_settings = AdapterSettings {
7376
api_key: params.model_rec.api_key.clone(),
7477
endpoint: params.model_rec.endpoint.clone(),
75-
extra_headers: HashMap::new(),
78+
extra_headers: params.model_rec.extra_headers.clone(),
7679
model_name: params.model_rec.name.clone(),
7780
supports_tools: params.supports_tools,
7881
supports_reasoning: params.supports_reasoning,
82+
supports_max_completion_tokens: params.model_rec.supports_max_completion_tokens,
7983
};
8084

8185
// Build HTTP request using adapter

refact-agent/engine/src/llm/adapter.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,7 @@ pub struct AdapterSettings {
4242
pub model_name: String,
4343
pub supports_tools: bool,
4444
pub supports_reasoning: bool,
45+
pub supports_max_completion_tokens: bool,
4546
}
4647

4748
pub trait LlmWireAdapter: Send + Sync {

0 commit comments

Comments
 (0)