Skip to content

Commit f146981

Browse files
feat: add JSON schema sanitization for MCP tools to ensure compatibil… (#1975)
…ity with internal JsonSchema enum Closes: #1973 Co-authored-by: Dylan Hurd <[email protected]>
1 parent bff4435 commit f146981

File tree

1 file changed

+333
-1
lines changed

1 file changed

+333
-1
lines changed

codex-rs/core/src/openai_tools.rs

Lines changed: 333 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
use serde::Deserialize;
22
use serde::Serialize;
3+
use serde_json::Value as JsonValue;
34
use serde_json::json;
45
use std::collections::BTreeMap;
56
use std::collections::HashMap;
@@ -81,6 +82,8 @@ pub(crate) enum JsonSchema {
8182
#[serde(skip_serializing_if = "Option::is_none")]
8283
description: Option<String>,
8384
},
85+
/// MCP schema allows "number" | "integer" for Number
86+
#[serde(alias = "integer")]
8487
Number {
8588
#[serde(skip_serializing_if = "Option::is_none")]
8689
description: Option<String>,
@@ -296,7 +299,13 @@ pub(crate) fn mcp_tool_to_openai_tool(
296299
input_schema.properties = Some(serde_json::Value::Object(serde_json::Map::new()));
297300
}
298301

299-
let serialized_input_schema = serde_json::to_value(input_schema)?;
302+
// Serialize to a raw JSON value so we can sanitize schemas coming from MCP
303+
// servers. Some servers omit the top-level or nested `type` in JSON
304+
// Schemas (e.g. using enum/anyOf), or use unsupported variants like
305+
// `integer`. Our internal JsonSchema is a small subset and requires
306+
// `type`, so we coerce/sanitize here for compatibility.
307+
let mut serialized_input_schema = serde_json::to_value(input_schema)?;
308+
sanitize_json_schema(&mut serialized_input_schema);
300309
let input_schema = serde_json::from_value::<JsonSchema>(serialized_input_schema)?;
301310

302311
Ok(ResponsesApiTool {
@@ -307,6 +316,120 @@ pub(crate) fn mcp_tool_to_openai_tool(
307316
})
308317
}
309318

319+
/// Sanitize a JSON Schema (as serde_json::Value) so it can fit our limited
320+
/// JsonSchema enum. This function:
321+
/// - Ensures every schema object has a "type". If missing, infers it from
322+
/// common keywords (properties => object, items => array, enum/const/format => string)
323+
/// and otherwise defaults to "string".
324+
/// - Fills required child fields (e.g. array items, object properties) with
325+
/// permissive defaults when absent.
326+
fn sanitize_json_schema(value: &mut JsonValue) {
327+
match value {
328+
JsonValue::Bool(_) => {
329+
// JSON Schema boolean form: true/false. Coerce to an accept-all string.
330+
*value = json!({ "type": "string" });
331+
}
332+
JsonValue::Array(arr) => {
333+
for v in arr.iter_mut() {
334+
sanitize_json_schema(v);
335+
}
336+
}
337+
JsonValue::Object(map) => {
338+
// First, recursively sanitize known nested schema holders
339+
if let Some(props) = map.get_mut("properties") {
340+
if let Some(props_map) = props.as_object_mut() {
341+
for (_k, v) in props_map.iter_mut() {
342+
sanitize_json_schema(v);
343+
}
344+
}
345+
}
346+
if let Some(items) = map.get_mut("items") {
347+
sanitize_json_schema(items);
348+
}
349+
// Some schemas use oneOf/anyOf/allOf - sanitize their entries
350+
for combiner in ["oneOf", "anyOf", "allOf", "prefixItems"] {
351+
if let Some(v) = map.get_mut(combiner) {
352+
sanitize_json_schema(v);
353+
}
354+
}
355+
356+
// Normalize/ensure type
357+
let mut ty = map
358+
.get("type")
359+
.and_then(|v| v.as_str())
360+
.map(|s| s.to_string());
361+
362+
// If type is an array (union), pick first supported; else leave to inference
363+
if ty.is_none() {
364+
if let Some(JsonValue::Array(types)) = map.get("type") {
365+
for t in types {
366+
if let Some(tt) = t.as_str() {
367+
if matches!(
368+
tt,
369+
"object" | "array" | "string" | "number" | "integer" | "boolean"
370+
) {
371+
ty = Some(tt.to_string());
372+
break;
373+
}
374+
}
375+
}
376+
}
377+
}
378+
379+
// Infer type if still missing
380+
if ty.is_none() {
381+
if map.contains_key("properties")
382+
|| map.contains_key("required")
383+
|| map.contains_key("additionalProperties")
384+
{
385+
ty = Some("object".to_string());
386+
} else if map.contains_key("items") || map.contains_key("prefixItems") {
387+
ty = Some("array".to_string());
388+
} else if map.contains_key("enum")
389+
|| map.contains_key("const")
390+
|| map.contains_key("format")
391+
{
392+
ty = Some("string".to_string());
393+
} else if map.contains_key("minimum")
394+
|| map.contains_key("maximum")
395+
|| map.contains_key("exclusiveMinimum")
396+
|| map.contains_key("exclusiveMaximum")
397+
|| map.contains_key("multipleOf")
398+
{
399+
ty = Some("number".to_string());
400+
}
401+
}
402+
// If we still couldn't infer, default to string
403+
let ty = ty.unwrap_or_else(|| "string".to_string());
404+
map.insert("type".to_string(), JsonValue::String(ty.to_string()));
405+
406+
// Ensure object schemas have properties map
407+
if ty == "object" {
408+
if !map.contains_key("properties") {
409+
map.insert(
410+
"properties".to_string(),
411+
JsonValue::Object(serde_json::Map::new()),
412+
);
413+
}
414+
// If additionalProperties is an object schema, sanitize it too.
415+
// Leave booleans as-is, since JSON Schema allows boolean here.
416+
if let Some(ap) = map.get_mut("additionalProperties") {
417+
let is_bool = matches!(ap, JsonValue::Bool(_));
418+
if !is_bool {
419+
sanitize_json_schema(ap);
420+
}
421+
}
422+
}
423+
424+
// Ensure array schemas have items
425+
if ty == "array" && !map.contains_key("items") {
426+
map.insert("items".to_string(), json!({ "type": "string" }));
427+
}
428+
}
429+
_ => {}
430+
}
431+
}
432+
310433
/// Returns a list of OpenAiTools based on the provided config and MCP tools.
311434
/// Note that the keys of mcp_tools should be fully qualified names. See
312435
/// [`McpConnectionManager`] for more details.
@@ -351,6 +474,7 @@ pub(crate) fn get_openai_tools(
351474
mod tests {
352475
use crate::model_family::find_family_for_model;
353476
use mcp_types::ToolInputSchema;
477+
use pretty_assertions::assert_eq;
354478

355479
use super::*;
356480

@@ -497,4 +621,212 @@ mod tests {
497621
})
498622
);
499623
}
624+
625+
#[test]
626+
fn test_mcp_tool_property_missing_type_defaults_to_string() {
627+
let model_family = find_family_for_model("o3").expect("o3 should be a valid model family");
628+
let config = ToolsConfig::new(
629+
&model_family,
630+
AskForApproval::Never,
631+
SandboxPolicy::ReadOnly,
632+
false,
633+
);
634+
635+
let tools = get_openai_tools(
636+
&config,
637+
Some(HashMap::from([(
638+
"dash/search".to_string(),
639+
mcp_types::Tool {
640+
name: "search".to_string(),
641+
input_schema: ToolInputSchema {
642+
properties: Some(serde_json::json!({
643+
"query": {
644+
"description": "search query"
645+
}
646+
})),
647+
required: None,
648+
r#type: "object".to_string(),
649+
},
650+
output_schema: None,
651+
title: None,
652+
annotations: None,
653+
description: Some("Search docs".to_string()),
654+
},
655+
)])),
656+
);
657+
658+
assert_eq_tool_names(&tools, &["shell", "dash/search"]);
659+
660+
assert_eq!(
661+
tools[1],
662+
OpenAiTool::Function(ResponsesApiTool {
663+
name: "dash/search".to_string(),
664+
parameters: JsonSchema::Object {
665+
properties: BTreeMap::from([(
666+
"query".to_string(),
667+
JsonSchema::String {
668+
description: Some("search query".to_string())
669+
}
670+
)]),
671+
required: None,
672+
additional_properties: None,
673+
},
674+
description: "Search docs".to_string(),
675+
strict: false,
676+
})
677+
);
678+
}
679+
680+
#[test]
681+
fn test_mcp_tool_integer_normalized_to_number() {
682+
let model_family = find_family_for_model("o3").expect("o3 should be a valid model family");
683+
let config = ToolsConfig::new(
684+
&model_family,
685+
AskForApproval::Never,
686+
SandboxPolicy::ReadOnly,
687+
false,
688+
);
689+
690+
let tools = get_openai_tools(
691+
&config,
692+
Some(HashMap::from([(
693+
"dash/paginate".to_string(),
694+
mcp_types::Tool {
695+
name: "paginate".to_string(),
696+
input_schema: ToolInputSchema {
697+
properties: Some(serde_json::json!({
698+
"page": { "type": "integer" }
699+
})),
700+
required: None,
701+
r#type: "object".to_string(),
702+
},
703+
output_schema: None,
704+
title: None,
705+
annotations: None,
706+
description: Some("Pagination".to_string()),
707+
},
708+
)])),
709+
);
710+
711+
assert_eq_tool_names(&tools, &["shell", "dash/paginate"]);
712+
assert_eq!(
713+
tools[1],
714+
OpenAiTool::Function(ResponsesApiTool {
715+
name: "dash/paginate".to_string(),
716+
parameters: JsonSchema::Object {
717+
properties: BTreeMap::from([(
718+
"page".to_string(),
719+
JsonSchema::Number { description: None }
720+
)]),
721+
required: None,
722+
additional_properties: None,
723+
},
724+
description: "Pagination".to_string(),
725+
strict: false,
726+
})
727+
);
728+
}
729+
730+
#[test]
731+
fn test_mcp_tool_array_without_items_gets_default_string_items() {
732+
let model_family = find_family_for_model("o3").expect("o3 should be a valid model family");
733+
let config = ToolsConfig::new(
734+
&model_family,
735+
AskForApproval::Never,
736+
SandboxPolicy::ReadOnly,
737+
false,
738+
);
739+
740+
let tools = get_openai_tools(
741+
&config,
742+
Some(HashMap::from([(
743+
"dash/tags".to_string(),
744+
mcp_types::Tool {
745+
name: "tags".to_string(),
746+
input_schema: ToolInputSchema {
747+
properties: Some(serde_json::json!({
748+
"tags": { "type": "array" }
749+
})),
750+
required: None,
751+
r#type: "object".to_string(),
752+
},
753+
output_schema: None,
754+
title: None,
755+
annotations: None,
756+
description: Some("Tags".to_string()),
757+
},
758+
)])),
759+
);
760+
761+
assert_eq_tool_names(&tools, &["shell", "dash/tags"]);
762+
assert_eq!(
763+
tools[1],
764+
OpenAiTool::Function(ResponsesApiTool {
765+
name: "dash/tags".to_string(),
766+
parameters: JsonSchema::Object {
767+
properties: BTreeMap::from([(
768+
"tags".to_string(),
769+
JsonSchema::Array {
770+
items: Box::new(JsonSchema::String { description: None }),
771+
description: None
772+
}
773+
)]),
774+
required: None,
775+
additional_properties: None,
776+
},
777+
description: "Tags".to_string(),
778+
strict: false,
779+
})
780+
);
781+
}
782+
783+
#[test]
784+
fn test_mcp_tool_anyof_defaults_to_string() {
785+
let model_family = find_family_for_model("o3").expect("o3 should be a valid model family");
786+
let config = ToolsConfig::new(
787+
&model_family,
788+
AskForApproval::Never,
789+
SandboxPolicy::ReadOnly,
790+
false,
791+
);
792+
793+
let tools = get_openai_tools(
794+
&config,
795+
Some(HashMap::from([(
796+
"dash/value".to_string(),
797+
mcp_types::Tool {
798+
name: "value".to_string(),
799+
input_schema: ToolInputSchema {
800+
properties: Some(serde_json::json!({
801+
"value": { "anyOf": [ { "type": "string" }, { "type": "number" } ] }
802+
})),
803+
required: None,
804+
r#type: "object".to_string(),
805+
},
806+
output_schema: None,
807+
title: None,
808+
annotations: None,
809+
description: Some("AnyOf Value".to_string()),
810+
},
811+
)])),
812+
);
813+
814+
assert_eq_tool_names(&tools, &["shell", "dash/value"]);
815+
assert_eq!(
816+
tools[1],
817+
OpenAiTool::Function(ResponsesApiTool {
818+
name: "dash/value".to_string(),
819+
parameters: JsonSchema::Object {
820+
properties: BTreeMap::from([(
821+
"value".to_string(),
822+
JsonSchema::String { description: None }
823+
)]),
824+
required: None,
825+
additional_properties: None,
826+
},
827+
description: "AnyOf Value".to_string(),
828+
strict: false,
829+
})
830+
);
831+
}
500832
}

0 commit comments

Comments
 (0)