From 1b6c25e43f09e815ddc1fbb4bf2712f7030dde7b Mon Sep 17 00:00:00 2001 From: Brent Rager Date: Sun, 14 Jun 2026 14:30:20 -0400 Subject: [PATCH] SMOODEV-1472: Add structured output (JSON-schema constrained responses) to LLM client MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The general-agent brain must emit a typed JSON object every turn; this lands the keystone capability on the Rust engine's LLM client. API: - ResponseFormat::JsonSchema { name, schema, strict } + json_schema() ctor - LlmClient::chat_structured / chat_with_format; chat delegates with None - LlmResponse::structured_json() / deserialize_json::() — clear error on empty/non-JSON content, never a silent empty value - LlmProvider::chat_structured trait method; MockLlmClient records the requested format on RecordedCall.response_format Provider handling: - OpenAI-compat: response_format { type: json_schema, json_schema {...} } on /chat/completions (LiteLLM gateway shape) - Anthropic-native: forced single tool call whose input_schema IS the schema, forced via tool_choice; tool input surfaced back as the JSON content Streaming structured output and agent-level wiring deferred as follow-ups. Co-Authored-By: Claude Opus 4.8 (1M context) --- .changeset/structured-output.md | 19 + rust/smooth-operator-core/src/lib.rs | 2 +- rust/smooth-operator-core/src/llm.rs | 415 +++++++++++++++++- rust/smooth-operator-core/src/llm_provider.rs | 131 +++++- 4 files changed, 557 insertions(+), 10 deletions(-) create mode 100644 .changeset/structured-output.md diff --git a/.changeset/structured-output.md b/.changeset/structured-output.md new file mode 100644 index 0000000..96fc62f --- /dev/null +++ b/.changeset/structured-output.md @@ -0,0 +1,19 @@ +--- +'@smooai/smooth-operator-core-monorepo': minor +--- + +Add structured output (schema-constrained JSON responses) to the LLM client (SMOODEV-1472). + +New public API: + +- `ResponseFormat` enum (`JsonSchema { name, schema, strict }`) with a `ResponseFormat::json_schema(name, schema)` constructor (defaults `strict = true`). +- `LlmClient::chat_structured(messages, &ResponseFormat)` and the lower-level `LlmClient::chat_with_format(messages, tools, Option<&ResponseFormat>)`. `chat` now delegates to `chat_with_format(.., None)`. +- `LlmResponse::structured_json() -> serde_json::Value` and `LlmResponse::deserialize_json::()` — both surface a clear error (never a silent empty value) when the model returned empty or non-JSON content. +- `LlmProvider::chat_structured` trait method; `MockLlmClient` records the requested `ResponseFormat` on `RecordedCall.response_format` for assertions. + +Provider handling: + +- **OpenAI-compatible** (LiteLLM gateway, etc.): serialized on `/chat/completions` as `response_format: { type: "json_schema", json_schema: { name, schema, strict } }`. +- **Anthropic-native** (`/v1/messages`): achieved via a forced single tool call — a synthetic tool whose `input_schema` is the requested schema, forced with `tool_choice: { type: "tool", name }`; the tool's `input` is surfaced back as the JSON content string. + +Streaming structured output and agent-level (`Agent::run`) wiring are deliberately deferred as follow-ups. diff --git a/rust/smooth-operator-core/src/lib.rs b/rust/smooth-operator-core/src/lib.rs index 9fda4a6..af5c309 100644 --- a/rust/smooth-operator-core/src/lib.rs +++ b/rust/smooth-operator-core/src/lib.rs @@ -32,7 +32,7 @@ pub use conversation::{CompactionResult, CompactionStrategy, Conversation, Messa pub use cost::{BudgetExceeded, CostBudget, CostEntry, CostTracker, ModelPricing}; pub use human::{human_channel, ConfirmationHook, HumanChannelPair, HumanRequest, HumanResponse}; pub use knowledge::{Document, DocumentType, InMemoryKnowledge, KnowledgeBase, KnowledgeResult}; -pub use llm::{accumulate_stream_events, LlmClient, LlmConfig, LlmResponse, StreamEvent}; +pub use llm::{accumulate_stream_events, LlmClient, LlmConfig, LlmResponse, ResponseFormat, StreamEvent}; pub use memory::{InMemoryMemory, Memory, MemoryEntry, MemoryType}; pub use providers::{Activity, ModelRouting, ModelSlot, ProviderConfig, ProviderRegistry}; pub use tool::{Tool, ToolCall, ToolRegistry, ToolResult, ToolSchema}; diff --git a/rust/smooth-operator-core/src/llm.rs b/rust/smooth-operator-core/src/llm.rs index f529090..5e2472f 100644 --- a/rust/smooth-operator-core/src/llm.rs +++ b/rust/smooth-operator-core/src/llm.rs @@ -54,6 +54,85 @@ pub enum ApiFormat { Anthropic, } +/// A constraint on the shape of the model's response — used to request +/// **structured output** (a guaranteed-JSON answer that conforms to a +/// caller-supplied JSON Schema). +/// +/// This is the keystone capability for an agent "brain" that must emit a +/// typed JSON object every turn (SMOODEV-1472). +/// +/// # Wire mapping +/// - **OpenAI-compatible** (`ApiFormat::OpenAiCompat`, e.g. the LiteLLM +/// gateway at `llm.smoo.ai`): serialized on `/chat/completions` as +/// `response_format: { type: "json_schema", json_schema: { name, schema, +/// strict } }`. This is what most models behind the gateway expect. +/// - **Anthropic-native** (`ApiFormat::Anthropic`, `/v1/messages`): Anthropic +/// has no `response_format` field, so structured output is achieved via a +/// **forced single tool call** — a synthetic tool whose `input_schema` IS the +/// requested schema, with `tool_choice` forcing exactly that tool. The tool's +/// `input` is then surfaced back as the response content (the JSON string). +#[derive(Debug, Clone, PartialEq, Eq)] +pub enum ResponseFormat { + /// Constrain the response to a named JSON Schema. + JsonSchema { + /// A short identifier for the schema (e.g. `"weather_report"`). On the + /// Anthropic forced-tool path this also names the synthetic tool. + name: String, + /// The JSON Schema the response object must conform to. + schema: serde_json::Value, + /// When `true`, request strict schema adherence. OpenAI/LiteLLM + /// enforce the schema exactly (no extra keys); on the Anthropic + /// forced-tool path this flag is informational (the forced tool call + /// already constrains the shape). + strict: bool, + }, +} + +impl ResponseFormat { + /// Convenience constructor for a strict JSON-schema response format. + #[must_use] + pub fn json_schema(name: impl Into, schema: serde_json::Value) -> Self { + Self::JsonSchema { + name: name.into(), + schema, + strict: true, + } + } +} + +/// OpenAI-compatible `response_format` wire object: +/// `{ "type": "json_schema", "json_schema": { name, schema, strict } }`. +#[derive(Debug, Serialize)] +struct OpenAiResponseFormat { + r#type: &'static str, + json_schema: OpenAiJsonSchema, +} + +#[derive(Debug, Serialize)] +struct OpenAiJsonSchema { + name: String, + schema: serde_json::Value, + strict: bool, +} + +impl ResponseFormat { + /// Render this format into the OpenAI-compatible `response_format` wire + /// object. Returns `None` for variants that don't map to `response_format` + /// (none today, but keeps the call site future-proof). + fn to_openai(&self) -> OpenAiResponseFormat { + match self { + Self::JsonSchema { name, schema, strict } => OpenAiResponseFormat { + r#type: "json_schema", + json_schema: OpenAiJsonSchema { + name: name.clone(), + schema: schema.clone(), + strict: *strict, + }, + }, + } + } +} + /// Configuration for the LLM client. #[derive(Clone)] pub struct LlmConfig { @@ -169,6 +248,47 @@ pub struct LlmResponse { pub reasoning_content: Option, } +impl LlmResponse { + /// Parse the response `content` as a JSON value. For a + /// [structured-output](ResponseFormat) response this is the + /// schema-conforming object the model produced. + /// + /// # Errors + /// Returns an error if `content` is empty or is not valid JSON — the error + /// includes a (truncated) snippet of the offending content so callers can + /// diagnose a model that ignored the schema. Never silently returns an + /// empty/null value. + pub fn structured_json(&self) -> anyhow::Result { + let trimmed = self.content.trim(); + if trimmed.is_empty() { + anyhow::bail!("structured output: model returned empty content (expected a JSON object)"); + } + serde_json::from_str(trimmed).map_err(|e| { + let snippet: String = trimmed.chars().take(200).collect(); + anyhow::anyhow!("structured output: response content was not valid JSON ({e}): {snippet}") + }) + } + + /// Parse the response `content` into a caller type `T`. + /// + /// Convenience over [`Self::structured_json`] for the common case of + /// deserializing directly into a typed struct. + /// + /// # Errors + /// Returns an error if `content` is empty, is not valid JSON, or does not + /// match the shape of `T`. + pub fn deserialize_json(&self) -> anyhow::Result { + let trimmed = self.content.trim(); + if trimmed.is_empty() { + anyhow::bail!("structured output: model returned empty content (expected JSON for the requested type)"); + } + serde_json::from_str(trimmed).map_err(|e| { + let snippet: String = trimmed.chars().take(200).collect(); + anyhow::anyhow!("structured output: could not deserialize response into the requested type ({e}): {snippet}") + }) + } +} + /// Parse the gateway's authoritative cost from an HTTP response's /// headers. Checks a few header name variants so the same parser /// works across LiteLLM versions and other OpenAI-compat gateways @@ -241,6 +361,13 @@ struct ChatRequest { /// the field without `tools`. #[serde(skip_serializing_if = "Option::is_none")] tool_choice: Option, + /// Structured-output constraint. When present, serialized as + /// `response_format: { type: "json_schema", json_schema: { name, schema, + /// strict } }` — the OpenAI/LiteLLM shape for schema-constrained JSON + /// responses (SMOODEV-1472). Skipped when absent so providers that don't + /// support it (and the no-structured-output path) see no change. + #[serde(skip_serializing_if = "Option::is_none")] + response_format: Option, } #[derive(Debug, Serialize)] @@ -535,6 +662,20 @@ struct AnthropicRequest { messages: Vec, #[serde(skip_serializing_if = "Vec::is_empty")] tools: Vec, + /// Forces a specific tool call. Anthropic has no `response_format`, so + /// structured output is achieved by attaching a single synthetic tool + /// (whose `input_schema` is the requested JSON Schema) and forcing it via + /// `tool_choice: { type: "tool", name }`. SMOODEV-1472. + #[serde(skip_serializing_if = "Option::is_none")] + tool_choice: Option, +} + +/// Anthropic `tool_choice` wire object. For structured output we use the +/// `{ "type": "tool", "name": "..." }` form to force exactly one tool call. +#[derive(Debug, Serialize)] +struct AnthropicToolChoice { + r#type: &'static str, + name: String, } #[derive(Debug, Serialize, Deserialize, Clone)] @@ -584,6 +725,23 @@ struct AnthropicUsage { output_tokens: u32, } +/// Sanitize a [`ResponseFormat`] schema name into a valid tool name for the +/// Anthropic forced-tool structured-output path. Anthropic tool names must +/// match `^[a-zA-Z0-9_-]{1,64}$`, so we replace any other character with `_` +/// and fall back to a stable default when the result would be empty. +fn sanitize_tool_name(name: &str) -> String { + let cleaned: String = name + .chars() + .map(|c| if c.is_ascii_alphanumeric() || c == '_' || c == '-' { c } else { '_' }) + .take(64) + .collect(); + if cleaned.is_empty() { + "structured_output".to_string() + } else { + cleaned + } +} + /// Calculate exponential backoff duration for a given retry attempt. fn calculate_backoff(attempt: u32, policy: &RetryPolicy) -> Duration { let exp_ms = policy.base_delay_ms.saturating_mul(1u64 << attempt); @@ -658,8 +816,39 @@ impl LlmClient { /// # Errors /// Returns error if the API call fails after all retries or returns an invalid response. pub async fn chat(&self, messages: &[&Message], tools: &[ToolSchema]) -> anyhow::Result { + self.chat_with_format(messages, tools, None).await + } + + /// Send a chat completion request constrained to a JSON Schema — + /// **structured output** (SMOODEV-1472). + /// + /// The returned [`LlmResponse`]'s `content` is the JSON string produced by + /// the model. Use [`LlmResponse::structured_json`] / + /// [`LlmResponse::deserialize_json`] to parse it; both surface a clear + /// error if the model returned non-JSON. + /// + /// # Provider handling + /// - **OpenAI-compatible**: sends the `response_format` field. + /// - **Anthropic-native**: forces a single tool call whose `input_schema` + /// is the requested schema, then surfaces the tool input as the content. + /// + /// # Errors + /// Returns error if the API call fails after all retries or returns an + /// invalid response. + pub async fn chat_structured(&self, messages: &[&Message], format: &ResponseFormat) -> anyhow::Result { + self.chat_with_format(messages, &[], Some(format)).await + } + + /// Core chat implementation shared by [`Self::chat`] and + /// [`Self::chat_structured`]. When `format` is `Some`, the request is + /// constrained to the given JSON Schema (see [`ResponseFormat`]). + /// + /// # Errors + /// Returns error if the API call fails after all retries or returns an + /// invalid response. + pub async fn chat_with_format(&self, messages: &[&Message], tools: &[ToolSchema], format: Option<&ResponseFormat>) -> anyhow::Result { match self.config.api_format { - ApiFormat::Anthropic => return self.chat_anthropic(messages, tools).await, + ApiFormat::Anthropic => return self.chat_anthropic(messages, tools, format).await, ApiFormat::OpenAiCompat => {} } @@ -690,6 +879,7 @@ impl LlmClient { temperature: self.config.temperature, tools: chat_tools, tool_choice, + response_format: format.map(ResponseFormat::to_openai), }; let url = format!("{}/chat/completions", self.config.api_url); @@ -854,6 +1044,10 @@ impl LlmClient { temperature: self.config.temperature, tools: chat_tools, tool_choice, + // Streaming structured output is not yet wired — callers needing a + // schema-constrained response use the non-streaming + // `chat_structured` path. SMOODEV-1472 follow-up. + response_format: None, }; let url = format!("{}/chat/completions", self.config.api_url); @@ -1025,10 +1219,10 @@ impl LlmClient { } /// Send a chat completion request using the Anthropic native API. - async fn chat_anthropic(&self, messages: &[&Message], tools: &[ToolSchema]) -> anyhow::Result { + async fn chat_anthropic(&self, messages: &[&Message], tools: &[ToolSchema], format: Option<&ResponseFormat>) -> anyhow::Result { let (system, anthropic_messages) = convert_messages_to_anthropic(messages); - let anthropic_tools: Vec = tools + let mut anthropic_tools: Vec = tools .iter() .map(|t| AnthropicTool { name: t.name.clone(), @@ -1037,12 +1231,36 @@ impl LlmClient { }) .collect(); + // Structured output on the Anthropic-native path: Anthropic has no + // `response_format`, so we attach a synthetic tool whose `input_schema` + // is the requested JSON Schema and force it via `tool_choice`. The + // model's tool `input` becomes the structured JSON answer. SMOODEV-1472. + let (tool_choice, forced_tool_name) = match format { + Some(ResponseFormat::JsonSchema { name, schema, .. }) => { + let tool_name = sanitize_tool_name(name); + anthropic_tools.push(AnthropicTool { + name: tool_name.clone(), + description: "Return the response as a single JSON object conforming to the schema.".into(), + input_schema: schema.clone(), + }); + ( + Some(AnthropicToolChoice { + r#type: "tool", + name: tool_name.clone(), + }), + Some(tool_name), + ) + } + None => (None, None), + }; + let request = AnthropicRequest { model: self.config.model.clone(), max_tokens: self.config.max_tokens, system, messages: anthropic_messages, tools: anthropic_tools, + tool_choice, }; let url = format!("{}/messages", self.config.api_url); @@ -1071,6 +1289,9 @@ impl LlmClient { let mut content = String::new(); let mut tool_calls = Vec::new(); + // For structured output (forced tool), capture the forced + // tool's `input` and surface it as the JSON content string. + let mut structured_content: Option = None; for block in anthropic_resp.content { match block { @@ -1081,12 +1302,23 @@ impl LlmClient { content.push_str(&text); } AnthropicContentBlock::ToolUse { id, name, input } => { - tool_calls.push(ToolCall { id, name, arguments: input }); + if forced_tool_name.as_deref() == Some(name.as_str()) { + // The forced structured-output tool: its input IS + // the answer. Serialize back to a JSON string so + // the content shape matches the OpenAI path. + structured_content = Some(serde_json::to_string(&input).unwrap_or_else(|_| input.to_string())); + } else { + tool_calls.push(ToolCall { id, name, arguments: input }); + } } AnthropicContentBlock::ToolResult { .. } => {} } } + if let Some(json) = structured_content { + content = json; + } + let finish_reason = anthropic_resp.stop_reason.unwrap_or_else(|| "stop".into()); let total = anthropic_resp.usage.input_tokens + anthropic_resp.usage.output_tokens; @@ -1173,6 +1405,9 @@ impl LlmClient { system, messages: anthropic_messages, tools: anthropic_tools, + // Streaming structured output is not wired on the Anthropic path — + // use the non-streaming `chat_structured` path. SMOODEV-1472. + tool_choice: None, }; // Add `stream: true` to the request body. AnthropicRequest doesn't @@ -2496,6 +2731,7 @@ mod tests { temperature: 0.0, tools: vec![], tool_choice: None, + response_format: None, }; let json = serde_json::to_string(&req).expect("serialize"); assert!(json.contains("test-model")); @@ -3259,6 +3495,7 @@ data: {"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text description: "Echoes text".into(), input_schema: serde_json::json!({"type": "object", "properties": {"text": {"type": "string"}}}), }], + tool_choice: None, }; let json: serde_json::Value = serde_json::to_value(&req).expect("serialize"); assert_eq!(json["model"], "claude-sonnet-4-20250514"); @@ -3494,6 +3731,7 @@ data: {"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text temperature: 0.0, tools: chat_tools, tool_choice: None, + response_format: None, } } @@ -3587,6 +3825,7 @@ data: {"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text temperature: 0.0, tools: chat_tools, tool_choice: None, + response_format: None, }; let json = serde_json::to_string(&req).expect("serialize"); assert!( @@ -3629,4 +3868,172 @@ data: {"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text let resp2: ChatResponse = serde_json::from_str(json2).expect("deserialize"); assert!(resp2.usage.expect("usage").prompt_tokens_details.is_none()); } + + // -------- Structured output (SMOODEV-1472) -------- + + fn weather_schema() -> serde_json::Value { + serde_json::json!({ + "type": "object", + "properties": { + "city": {"type": "string"}, + "temp_c": {"type": "number"} + }, + "required": ["city", "temp_c"], + "additionalProperties": false + }) + } + + #[test] + fn openai_request_carries_response_format_json_schema() { + // The OpenAI/LiteLLM wire shape: + // response_format: { type: "json_schema", json_schema: { name, schema, strict } } + let format = ResponseFormat::json_schema("weather_report", weather_schema()); + let req = ChatRequest { + model: "gpt-4o".into(), + messages: vec![ChatMessage { + role: "user".into(), + content: ChatContent::Text(Some("weather in SF?".into())), + tool_call_id: None, + tool_name: None, + tool_calls: vec![], + reasoning_content: None, + }], + max_tokens: 100, + temperature: 0.0, + tools: vec![], + tool_choice: None, + response_format: Some(format.to_openai()), + }; + let json: serde_json::Value = serde_json::to_value(&req).expect("serialize"); + assert_eq!(json["response_format"]["type"], "json_schema"); + assert_eq!(json["response_format"]["json_schema"]["name"], "weather_report"); + assert_eq!(json["response_format"]["json_schema"]["strict"], true); + assert_eq!(json["response_format"]["json_schema"]["schema"]["type"], "object"); + assert_eq!(json["response_format"]["json_schema"]["schema"]["required"][0], "city"); + } + + #[test] + fn no_response_format_is_omitted_from_the_wire() { + let req = ChatRequest { + model: "gpt-4o".into(), + messages: vec![], + max_tokens: 100, + temperature: 0.0, + tools: vec![], + tool_choice: None, + response_format: None, + }; + let json = serde_json::to_string(&req).expect("serialize"); + assert!(!json.contains("response_format"), "absent format must not serialize: {json}"); + } + + #[test] + fn anthropic_forced_tool_request_for_structured_output() { + // The Anthropic-native path expresses structured output as a forced + // tool call: a tool whose input_schema IS the requested schema, with + // tool_choice forcing exactly that tool. + let req = AnthropicRequest { + model: "claude-sonnet-4-20250514".into(), + max_tokens: 1024, + system: None, + messages: vec![AnthropicMessage { + role: "user".into(), + content: AnthropicContent::Text("weather?".into()), + }], + tools: vec![AnthropicTool { + name: "weather_report".into(), + description: "Return the response as a single JSON object conforming to the schema.".into(), + input_schema: weather_schema(), + }], + tool_choice: Some(AnthropicToolChoice { + r#type: "tool", + name: "weather_report".into(), + }), + }; + let json: serde_json::Value = serde_json::to_value(&req).expect("serialize"); + assert_eq!(json["tool_choice"]["type"], "tool"); + assert_eq!(json["tool_choice"]["name"], "weather_report"); + assert_eq!(json["tools"][0]["name"], "weather_report"); + assert_eq!(json["tools"][0]["input_schema"]["type"], "object"); + } + + #[test] + fn response_format_constructor_defaults_strict() { + let format = ResponseFormat::json_schema("x", serde_json::json!({"type": "object"})); + match format { + ResponseFormat::JsonSchema { name, strict, .. } => { + assert_eq!(name, "x"); + assert!(strict, "json_schema() must default strict=true"); + } + } + } + + #[test] + fn structured_json_parses_object_content() { + let resp = LlmResponse { + content: r#"{"city":"SF","temp_c":18.5}"#.into(), + tool_calls: vec![], + finish_reason: "stop".into(), + usage: Usage::default(), + rate_limit: None, + gateway_cost_usd: None, + resolved_model: None, + reasoning_content: None, + }; + let value = resp.structured_json().expect("valid JSON"); + assert_eq!(value["city"], "SF"); + assert_eq!(value["temp_c"], 18.5); + + #[derive(serde::Deserialize)] + struct Weather { + city: String, + temp_c: f64, + } + let typed: Weather = resp.deserialize_json().expect("deserialize"); + assert_eq!(typed.city, "SF"); + assert!((typed.temp_c - 18.5).abs() < f64::EPSILON); + } + + #[test] + fn structured_json_errors_on_non_json_content() { + let resp = LlmResponse { + content: "I'm sorry, I can't help with that.".into(), + tool_calls: vec![], + finish_reason: "stop".into(), + usage: Usage::default(), + rate_limit: None, + gateway_cost_usd: None, + resolved_model: None, + reasoning_content: None, + }; + let err = resp.structured_json().expect_err("non-JSON must error"); + assert!(err.to_string().contains("not valid JSON"), "err was: {err}"); + // And it must not silently swallow — the snippet is surfaced. + assert!(err.to_string().contains("I'm sorry"), "err should include snippet: {err}"); + } + + #[test] + fn structured_json_errors_on_empty_content() { + let resp = LlmResponse { + content: " ".into(), + tool_calls: vec![], + finish_reason: "stop".into(), + usage: Usage::default(), + rate_limit: None, + gateway_cost_usd: None, + resolved_model: None, + reasoning_content: None, + }; + let err = resp.structured_json().expect_err("empty must error"); + assert!(err.to_string().contains("empty content"), "err was: {err}"); + } + + #[test] + fn sanitize_tool_name_handles_invalid_chars() { + assert_eq!(sanitize_tool_name("weather report!"), "weather_report_"); + assert_eq!(sanitize_tool_name("ok_name-1"), "ok_name-1"); + assert_eq!(sanitize_tool_name(""), "structured_output"); + assert_eq!(sanitize_tool_name("***"), "___"); + assert_eq!(sanitize_tool_name(&"x".repeat(100)).len(), 64); + } } diff --git a/rust/smooth-operator-core/src/llm_provider.rs b/rust/smooth-operator-core/src/llm_provider.rs index b12e5cd..575289c 100644 --- a/rust/smooth-operator-core/src/llm_provider.rs +++ b/rust/smooth-operator-core/src/llm_provider.rs @@ -20,7 +20,7 @@ use futures_core::Stream; use futures_util::stream::{self, StreamExt}; use crate::conversation::Message; -use crate::llm::{LlmClient, LlmResponse, StreamEvent, Usage}; +use crate::llm::{LlmClient, LlmResponse, ResponseFormat, StreamEvent, Usage}; use crate::tool::{ToolCall, ToolSchema}; /// Boxed stream of streaming chat events — mirrors the return type of @@ -37,6 +37,14 @@ pub trait LlmProvider: Send + Sync { /// Streaming completion. Yields incremental [`StreamEvent`]s. async fn chat_stream(&self, messages: &[&Message], tools: &[ToolSchema]) -> anyhow::Result; + + /// Non-streaming completion constrained to a JSON Schema — **structured + /// output** (SMOODEV-1472). The returned response's `content` is the JSON + /// string the model produced; use + /// [`LlmResponse::structured_json`](crate::llm::LlmResponse::structured_json) + /// / [`LlmResponse::deserialize_json`](crate::llm::LlmResponse::deserialize_json) + /// to parse it. + async fn chat_structured(&self, messages: &[&Message], format: &ResponseFormat) -> anyhow::Result; } // The `LlmClient::` paths are intentional (not `Self::`): they fully-qualify the @@ -52,6 +60,10 @@ impl LlmProvider for LlmClient { async fn chat_stream(&self, messages: &[&Message], tools: &[ToolSchema]) -> anyhow::Result { LlmClient::chat_stream(self, messages, tools).await } + + async fn chat_structured(&self, messages: &[&Message], format: &ResponseFormat) -> anyhow::Result { + LlmClient::chat_structured(self, messages, format).await + } } /// Build a plain text [`LlmResponse`] with `stop` finish reason and otherwise @@ -97,8 +109,11 @@ pub struct RecordedCall { pub messages: Vec, /// The tool schemas offered to the model. pub tools: Vec, - /// `true` if this was a `chat_stream` call, `false` for `chat`. + /// `true` if this was a `chat_stream` call, `false` for `chat`/`chat_structured`. pub streamed: bool, + /// The structured-output format requested, if this was a `chat_structured` + /// call (SMOODEV-1472). `None` for plain `chat`/`chat_stream`. + pub response_format: Option, } /// A scripted outcome for a `chat` call. @@ -209,11 +224,12 @@ impl MockLlmClient { self.lock().calls.last().cloned() } - fn record(&self, messages: &[&Message], tools: &[ToolSchema], streamed: bool) { + fn record(&self, messages: &[&Message], tools: &[ToolSchema], streamed: bool, response_format: Option) { self.lock().calls.push(RecordedCall { messages: messages.iter().map(|m| (*m).clone()).collect(), tools: tools.to_vec(), streamed, + response_format, }); } } @@ -221,7 +237,21 @@ impl MockLlmClient { #[async_trait] impl LlmProvider for MockLlmClient { async fn chat(&self, messages: &[&Message], tools: &[ToolSchema]) -> anyhow::Result { - self.record(messages, tools, false); + self.record(messages, tools, false, None); + let outcome = self.lock().chat.pop_front(); + match outcome { + Some(ChatOutcome::Response(r)) => Ok(*r), + Some(ChatOutcome::Error(e)) => Err(anyhow::anyhow!(e)), + // Empty script: a benign terminal response so loops don't hang. + None => Ok(text_response("")), + } + } + + async fn chat_structured(&self, messages: &[&Message], format: &ResponseFormat) -> anyhow::Result { + // Structured-output calls share the scripted `chat` queue (FIFO) so a + // test scripts the JSON response via `push_text`/`push_response`. The + // requested format is captured on the RecordedCall for assertions. + self.record(messages, &[], false, Some(format.clone())); let outcome = self.lock().chat.pop_front(); match outcome { Some(ChatOutcome::Response(r)) => Ok(*r), @@ -232,7 +262,7 @@ impl LlmProvider for MockLlmClient { } async fn chat_stream(&self, messages: &[&Message], tools: &[ToolSchema]) -> anyhow::Result { - self.record(messages, tools, true); + self.record(messages, tools, true, None); let outcome = self.lock().streams.pop_front(); match outcome { Some(StreamOutcome::Events(events)) => Ok(stream::iter(events.into_iter().map(Ok)).boxed()), @@ -385,4 +415,95 @@ mod tests { let r = provider.chat(&msgs(&u), &[]).await.expect("chat"); assert_eq!(r.finish_reason, "stop"); } + + // -------- Structured output (SMOODEV-1472) -------- + + #[tokio::test] + async fn chat_structured_records_the_requested_format() { + let mock = MockLlmClient::new(); + mock.push_text(r#"{"answer":42}"#); + let u = Message::user("the question"); + let format = ResponseFormat::json_schema( + "answer_schema", + serde_json::json!({"type": "object", "properties": {"answer": {"type": "integer"}}}), + ); + + let resp = mock.chat_structured(&msgs(&u), &format).await.expect("structured chat"); + assert_eq!(resp.content, r#"{"answer":42}"#); + + let call = mock.last_call().expect("recorded a call"); + assert!(!call.streamed); + let recorded = call.response_format.expect("response_format captured"); + assert_eq!(recorded, format, "the requested format must be recorded for assertions"); + } + + #[tokio::test] + async fn chat_structured_response_parses_and_deserializes() { + let mock = MockLlmClient::new(); + mock.push_text(r#"{"city":"SF","temp_c":18.5}"#); + let u = Message::user("weather?"); + let format = ResponseFormat::json_schema("weather", serde_json::json!({"type": "object"})); + + let resp = mock.chat_structured(&msgs(&u), &format).await.expect("structured chat"); + + // serde_json::Value path + let value = resp.structured_json().expect("valid JSON object"); + assert_eq!(value["city"], "SF"); + + // typed deserialization path + #[derive(serde::Deserialize)] + struct Weather { + city: String, + temp_c: f64, + } + let typed: Weather = resp.deserialize_json().expect("deserialize into T"); + assert_eq!(typed.city, "SF"); + assert!((typed.temp_c - 18.5).abs() < f64::EPSILON); + } + + #[tokio::test] + async fn chat_structured_non_json_response_surfaces_clear_error() { + let mock = MockLlmClient::new(); + // Model ignored the schema and returned prose. + mock.push_text("Sorry, I cannot answer that."); + let u = Message::user("?"); + let format = ResponseFormat::json_schema("x", serde_json::json!({"type": "object"})); + + let resp = mock.chat_structured(&msgs(&u), &format).await.expect("structured chat"); + // Don't silently return empty: parsing must fail loudly. + let err = resp.structured_json().expect_err("non-JSON must error"); + assert!(err.to_string().contains("not valid JSON"), "err: {err}"); + } + + #[tokio::test] + async fn chat_structured_propagates_scripted_errors() { + let mock = MockLlmClient::new(); + mock.push_error("rate limited"); + let u = Message::user("?"); + let format = ResponseFormat::json_schema("x", serde_json::json!({"type": "object"})); + let err = mock.chat_structured(&msgs(&u), &format).await.expect_err("should error"); + assert!(err.to_string().contains("rate limited")); + } + + #[tokio::test] + async fn chat_does_not_record_a_response_format() { + let mock = MockLlmClient::new(); + mock.push_text("ok"); + let u = Message::user("hi"); + mock.chat(&msgs(&u), &[]).await.expect("chat"); + assert!(mock.last_call().expect("call").response_format.is_none()); + } + + #[tokio::test] + async fn chat_structured_usable_as_trait_object() { + let provider: Arc = Arc::new({ + let m = MockLlmClient::new(); + m.push_text(r#"{"ok":true}"#); + m + }); + let u = Message::user("hi"); + let format = ResponseFormat::json_schema("x", serde_json::json!({"type": "object"})); + let r = provider.chat_structured(&msgs(&u), &format).await.expect("structured"); + assert_eq!(r.structured_json().expect("json")["ok"], true); + } }