Skip to content

Commit 4ca5bdf

Browse files
feat: search_tool
1 parent e933537 commit 4ca5bdf

File tree

14 files changed

+814
-21
lines changed

14 files changed

+814
-21
lines changed

codex-rs/Cargo.lock

Lines changed: 108 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

codex-rs/Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -129,6 +129,7 @@ async-stream = "0.3.6"
129129
async-trait = "0.1.89"
130130
axum = { version = "0.8", default-features = false }
131131
base64 = "0.22.1"
132+
bm25 = "2.3.2"
132133
bytes = "1.10.1"
133134
chardetng = "0.1.17"
134135
chrono = "0.4.43"

codex-rs/core/Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@ arc-swap = "1.8.0"
2323
async-channel = { workspace = true }
2424
async-trait = { workspace = true }
2525
base64 = { workspace = true }
26+
bm25 = { workspace = true }
2627
chardetng = { workspace = true }
2728
chrono = { workspace = true, features = ["serde"] }
2829
clap = { workspace = true, features = ["derive"] }

codex-rs/core/config.schema.json

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -211,6 +211,9 @@
211211
"runtime_metrics": {
212212
"type": "boolean"
213213
},
214+
"search_tool": {
215+
"type": "boolean"
216+
},
214217
"shell_snapshot": {
215218
"type": "boolean"
216219
},
@@ -1229,6 +1232,9 @@
12291232
"runtime_metrics": {
12301233
"type": "boolean"
12311234
},
1235+
"search_tool": {
1236+
"type": "boolean"
1237+
},
12321238
"shell_snapshot": {
12331239
"type": "boolean"
12341240
},

codex-rs/core/src/codex.rs

Lines changed: 49 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -480,6 +480,9 @@ pub(crate) struct Session {
480480
next_internal_sub_id: AtomicU64,
481481
}
482482

483+
const SEARCH_TOOL_DEVELOPER_INSTRUCTIONS: &str =
484+
include_str!("../templates/search_tool/developer_instructions.md");
485+
483486
/// The context needed for a single turn of the thread.
484487
#[derive(Debug)]
485488
pub(crate) struct TurnContext {
@@ -1104,6 +1107,16 @@ impl Session {
11041107
}
11051108
}
11061109

1110+
pub(crate) async fn set_next_mcp_tool_selection(&self, tool_names: Vec<String>) {
1111+
let mut state = self.state.lock().await;
1112+
state.set_next_mcp_tool_selection(tool_names);
1113+
}
1114+
1115+
pub(crate) async fn take_next_mcp_tool_selection(&self) -> Option<Vec<String>> {
1116+
let mut state = self.state.lock().await;
1117+
state.take_next_mcp_tool_selection()
1118+
}
1119+
11071120
async fn record_initial_history(&self, conversation_history: InitialHistory) {
11081121
let turn_context = self.new_default_turn().await;
11091122
match conversation_history {
@@ -1945,7 +1958,7 @@ impl Session {
19451958
&self,
19461959
turn_context: &TurnContext,
19471960
) -> Vec<ResponseItem> {
1948-
let mut items = Vec::<ResponseItem>::with_capacity(4);
1961+
let mut items = Vec::<ResponseItem>::with_capacity(6);
19491962
let shell = self.user_shell();
19501963
items.push(
19511964
DeveloperInstructions::from_policy(
@@ -1960,6 +1973,11 @@ impl Session {
19601973
if let Some(developer_instructions) = turn_context.developer_instructions.as_deref() {
19611974
items.push(DeveloperInstructions::new(developer_instructions.to_string()).into());
19621975
}
1976+
if turn_context.tools_config.search_tool {
1977+
items.push(
1978+
DeveloperInstructions::new(SEARCH_TOOL_DEVELOPER_INSTRUCTIONS.to_string()).into(),
1979+
);
1980+
}
19631981
// Add developer instructions from collaboration_mode if they exist and are non-empty
19641982
let (collaboration_mode, base_instructions) = {
19651983
let state = self.state.lock().await;
@@ -3779,6 +3797,15 @@ fn filter_codex_apps_mcp_tools(
37793797
mcp_tools
37803798
}
37813799

3800+
fn filter_mcp_tools_by_name(
3801+
mut mcp_tools: HashMap<String, crate::mcp_connection_manager::ToolInfo>,
3802+
selected_tools: &[String],
3803+
) -> HashMap<String, crate::mcp_connection_manager::ToolInfo> {
3804+
let allowed: HashSet<&str> = selected_tools.iter().map(String::as_str).collect();
3805+
mcp_tools.retain(|name, _| allowed.contains(name.as_str()));
3806+
mcp_tools
3807+
}
3808+
37823809
fn codex_apps_connector_id(tool: &crate::mcp_connection_manager::ToolInfo) -> Option<&str> {
37833810
tool.connector_id.as_deref()
37843811
}
@@ -3813,19 +3840,28 @@ async fn run_sampling_request(
38133840
.list_all_tools()
38143841
.or_cancel(&cancellation_token)
38153842
.await?;
3816-
let connectors_for_tools = if turn_context.config.features.enabled(Feature::Apps) {
3817-
let connectors = connectors::accessible_connectors_from_mcp_tools(&mcp_tools);
3818-
Some(filter_connectors_for_input(
3819-
connectors,
3820-
&input,
3821-
tool_selection.explicit_app_paths,
3822-
tool_selection.skill_name_counts_lower,
3823-
))
3843+
let search_tool_enabled = turn_context.config.features.enabled(Feature::SearchTool);
3844+
if search_tool_enabled {
3845+
if let Some(selected_tools) = sess.take_next_mcp_tool_selection().await {
3846+
mcp_tools = filter_mcp_tools_by_name(mcp_tools, &selected_tools);
3847+
} else {
3848+
mcp_tools.clear();
3849+
}
38243850
} else {
3825-
None
3826-
};
3827-
if let Some(connectors) = connectors_for_tools.as_ref() {
3828-
mcp_tools = filter_codex_apps_mcp_tools(mcp_tools, connectors);
3851+
let connectors_for_tools = if turn_context.config.features.enabled(Feature::Apps) {
3852+
let connectors = connectors::accessible_connectors_from_mcp_tools(&mcp_tools);
3853+
Some(filter_connectors_for_input(
3854+
connectors,
3855+
&input,
3856+
tool_selection.explicit_app_paths,
3857+
tool_selection.skill_name_counts_lower,
3858+
))
3859+
} else {
3860+
None
3861+
};
3862+
if let Some(connectors) = connectors_for_tools.as_ref() {
3863+
mcp_tools = filter_codex_apps_mcp_tools(mcp_tools, connectors);
3864+
}
38293865
}
38303866
let router = Arc::new(ToolRouter::from_config(
38313867
&turn_context.tools_config,

codex-rs/core/src/features.rs

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -87,6 +87,8 @@ pub enum Feature {
8787
/// Allow the model to request web searches that fetch cached content.
8888
/// Takes precedence over `WebSearchRequest`.
8989
WebSearchCached,
90+
/// Allow the model to search MCP tools via BM25 before exposing them.
91+
SearchTool,
9092
/// Gate the execpolicy enforcement for shell/unified exec.
9193
ExecPolicy,
9294
/// Allow the model to request approval and propose exec rules.
@@ -424,6 +426,16 @@ pub const FEATURES: &[FeatureSpec] = &[
424426
stage: Stage::Deprecated,
425427
default_enabled: false,
426428
},
429+
FeatureSpec {
430+
id: Feature::SearchTool,
431+
key: "search_tool",
432+
stage: Stage::Experimental {
433+
name: "Tool search",
434+
menu_description: "Search MCP tools with BM25 before exposing them.",
435+
announcement: "NEW! Try tool search to reduce MCP tool noise. Enable in /experimental!",
436+
},
437+
default_enabled: false,
438+
},
427439
// Experimental program. Rendered in the `/experimental` menu for users.
428440
FeatureSpec {
429441
id: Feature::ShellSnapshot,

codex-rs/core/src/state/session.rs

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@ pub(crate) struct SessionState {
2424
/// TODO(owen): This is a temporary solution to avoid updating a thread's updated_at
2525
/// timestamp when resuming a session. Remove this once SQLite is in place.
2626
pub(crate) initial_context_seeded: bool,
27+
pub(crate) next_mcp_tool_selection: Option<Vec<String>>,
2728
}
2829

2930
impl SessionState {
@@ -38,6 +39,7 @@ impl SessionState {
3839
dependency_env: HashMap::new(),
3940
mcp_dependency_prompted: HashSet::new(),
4041
initial_context_seeded: false,
42+
next_mcp_tool_selection: None,
4143
}
4244
}
4345

@@ -125,6 +127,14 @@ impl SessionState {
125127
pub(crate) fn dependency_env(&self) -> HashMap<String, String> {
126128
self.dependency_env.clone()
127129
}
130+
131+
pub(crate) fn set_next_mcp_tool_selection(&mut self, tool_names: Vec<String>) {
132+
self.next_mcp_tool_selection = Some(tool_names);
133+
}
134+
135+
pub(crate) fn take_next_mcp_tool_selection(&mut self) -> Option<Vec<String>> {
136+
self.next_mcp_tool_selection.take()
137+
}
128138
}
129139

130140
// Sometimes new snapshots don't include credits or plan information.

codex-rs/core/src/tools/handlers/mod.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ mod mcp_resource;
88
mod plan;
99
mod read_file;
1010
mod request_user_input;
11+
mod search_tool_bm25;
1112
mod shell;
1213
mod test_sync;
1314
mod unified_exec;
@@ -28,6 +29,7 @@ pub use plan::PlanHandler;
2829
pub use read_file::ReadFileHandler;
2930
pub use request_user_input::RequestUserInputHandler;
3031
pub(crate) use request_user_input::request_user_input_tool_description;
32+
pub use search_tool_bm25::SearchToolBm25Handler;
3133
pub use shell::ShellCommandHandler;
3234
pub use shell::ShellHandler;
3335
pub use test_sync::TestSyncHandler;

0 commit comments

Comments
 (0)