adds logic to check tool use history invariants

dingfeli · dingfeli · commit 8aa696d2d057 · 2025-05-13T21:53:02.000-07:00
diff --git a/crates/chat-cli/src/cli/chat/conversation_state.rs b/crates/chat-cli/src/cli/chat/conversation_state.rs
@@ -1,4 +1,3 @@
-use std::collections::vec_deque::IterMut;
 use std::collections::{
     HashMap,
     VecDeque,
@@ -33,6 +32,7 @@ use super::hooks::{
 };
 use super::message::{
     AssistantMessage,
+    AssistantToolUse,
     ToolUseResult,
     ToolUseResultBlock,
     UserMessage,
@@ -314,127 +314,113 @@ impl ConversationState {
                     tool_uses.iter().map(|t| t.id.as_str()),
                 );
             }
-
-            // Here we also need to make sure that the tool result corresponds to one of the tools
-            // in the list. Otherwise we will see validation error from the backend. There are three
-            // such circumstances where intervention would be needed:
-            // 1. The model had decided to call a tool with its partial name AND there is only one such tool, in
-            //    which case we would automatically resolve this tool call to its correct name. This will NOT
-            //    result in an error in its tool result. The intervention here is to substitute the partial name
-            //    with its full name.
-            // 2. The model had decided to call a tool with its partial name AND there are multiple tools it
-            //    could be referring to, in which case we WILL return an error in the tool result. The
-            //    intervention here is to substitute the ambiguous, partial name with a dummy.
-            // 3. The model had decided to call a tool that does not exist. The intervention here is to
-            //    substitute the non-existent tool name with a dummy.
-            let tool_use_results = user_msg.tool_use_results();
-            if let Some(tool_use_results) = tool_use_results {
-                // Note that we need to use the keys in tool manager's tn_map as the keys are the
-                // actual tool names as exposed to the model and the backend. If we use the actual
-                // names as they are recognized by their respective servers, we risk concluding
-                // with false positives.
-                let tool_name_list = self.tool_manager.tn_map.keys().map(String::as_str).collect::<Vec<_>>();
-                for result in tool_use_results {
-                    let tool_use_id = result.tool_use_id.as_str();
-                    let corresponding_tool_use = tool_uses.iter_mut().find(|tool_use| tool_use_id == tool_use.id);
-                    if let Some(tool_use) = corresponding_tool_use {
-                        if tool_name_list.contains(&tool_use.name.as_str()) {
-                            // If this tool matches of the tools in our list, this is not our
-                            // concern, error or not.
-                            continue;
-                        }
-                        if let ToolResultStatus::Error = result.status {
-                            // case 2 and 3
-                            tool_use.name = DUMMY_TOOL_NAME.to_string();
-                            tool_use.args = serde_json::json!({});
-                        } else {
-                            // case 1
-                            let full_name = tool_name_list.iter().find(|name| name.ends_with(&tool_use.name));
-                            // We should be able to find a match but if not we'll just treat it as
-                            // a dummy and move on
-                            if let Some(full_name) = full_name {
-                                tool_use.name = (*full_name).to_string();
-                            } else {
-                                tool_use.name = DUMMY_TOOL_NAME.to_string();
-                                tool_use.args = serde_json::json!({});
-                            }
-                        }
-                    }
-                }
-            }
+            self.enforce_tool_use_history_invariants();
         }
     }
 
-    // Here we also need to make sure that the tool result corresponds to one of the tools
-    // in the list. Otherwise we will see validation error from the backend. There are three
-    // such circumstances where intervention would be needed:
-    // 1. The model had decided to call a tool with its partial name AND there is only one such tool, in
-    //    which case we would automatically resolve this tool call to its correct name. This will NOT
-    //    result in an error in its tool result. The intervention here is to substitute the partial name
-    //    with its full name.
-    // 2. The model had decided to call a tool with its partial name AND there are multiple tools it
-    //    could be referring to, in which case we WILL return an error in the tool result. The
-    //    intervention here is to substitute the ambiguous, partial name with a dummy.
-    // 3. The model had decided to call a tool that does not exist. The intervention here is to
-    //    substitute the non-existent tool name with a dummy.
-    fn enforce_tool_use_invariants(&mut self, history_of_interest: &mut Vec<(UserMessage, AssistantMessage)>) {
+    /// Here we also need to make sure that the tool result corresponds to one of the tools
+    /// in the list. Otherwise we will see validation error from the backend. There are three
+    /// such circumstances where intervention would be needed:
+    /// 1. The model had decided to call a tool with its partial name AND there is only one such
+    ///    tool, in which case we would automatically resolve this tool call to its correct name.
+    ///    This will NOT result in an error in its tool result. The intervention here is to
+    ///    substitute the partial name with its full name.
+    /// 2. The model had decided to call a tool with its partial name AND there are multiple tools
+    ///    it could be referring to, in which case we WILL return an error in the tool result. The
+    ///    intervention here is to substitute the ambiguous, partial name with a dummy.
+    /// 3. The model had decided to call a tool that does not exist. The intervention here is to
+    ///    substitute the non-existent tool name with a dummy.
+    pub fn enforce_tool_use_history_invariants(&mut self) {
         let tool_name_list = self.tool_manager.tn_map.keys().map(String::as_str).collect::<Vec<_>>();
-        let mut tool_uses = history_of_interest
-            .iter_mut()
-            .filter_map(|(_user_msg, asst_msg)| {
-                if let AssistantMessage::ToolUse { ref mut tool_uses, .. } = asst_msg {
-                    Some(tool_uses)
-                } else {
-                    None
+        // We need to first determine what the range of interest is. There are two places where we
+        // would call this function:
+        // 1. When there are changes to the list of available tools, in which case we comb through the
+        //    entire conversation
+        // 2. When we send a message, in which case we only examine the most recent entry
+        let (tool_use_results, mut tool_uses) =
+            if let (Some((_, AssistantMessage::ToolUse { ref mut tool_uses, .. })), Some(user_msg)) = (
+                self.history
+                    .range_mut(self.valid_history_range.0..self.valid_history_range.1)
+                    .last(),
+                &mut self.next_message,
+            ) {
+                let tool_use_results = user_msg
+                    .tool_use_results()
+                    .map_or(Vec::new(), |results| results.iter().collect::<Vec<_>>());
+                let tool_uses = tool_uses.iter_mut().collect::<Vec<_>>();
+                (tool_use_results, tool_uses)
+            } else {
+                self.history
+                    .iter_mut()
+                    .filter_map(|(user_msg, asst_msg)| {
+                        if let (Some(tool_use_results), AssistantMessage::ToolUse { ref mut tool_uses, .. }) =
+                            (user_msg.tool_use_results(), asst_msg)
+                        {
+                            Some((tool_use_results, tool_uses))
+                        } else {
+                            None
+                        }
+                    })
+                    .fold(
+                        (Vec::<&ToolUseResult>::new(), Vec::<&mut AssistantToolUse>::new()),
+                        |(mut tool_use_results, mut tool_uses), (results, uses)| {
+                            let mut results = results.iter().collect::<Vec<_>>();
+                            let mut uses = uses.iter_mut().collect::<Vec<_>>();
+                            tool_use_results.append(&mut results);
+                            tool_uses.append(&mut uses);
+                            (tool_use_results, tool_uses)
+                        },
+                    )
+            };
+        // Replace tool uses associated with tools that does not exist / no longer exists with
+        // dummy (i.e. put them to sleep / dormant)
+        for result in tool_use_results {
+            let tool_use_id = result.tool_use_id.as_str();
+            let corresponding_tool_use = tool_uses.iter_mut().find(|tool_use| tool_use_id == tool_use.id);
+            if let Some(tool_use) = corresponding_tool_use {
+                if tool_name_list.contains(&tool_use.name.as_str()) {
+                    // If this tool matches of the tools in our list, this is not our
+                    // concern, error or not.
+                    continue;
                 }
-            })
-            .flatten();
-        let tool_use_results = if let Some(user_msg) = &self.next_message {
-            // We only check to verify the last message if [Self::next_message] is set
-            user_msg.tool_use_results().map(|arr| arr.iter().collect::<Vec<_>>())
-        } else {
-            // Otherwise, we check the entire conversation
-            Some(
-                history_of_interest
-                    .iter()
-                    .filter_map(|(user_msg, _)| user_msg.tool_use_results())
-                    .flatten()
-                    .collect::<Vec<_>>(),
-            )
-        };
-        if let Some(tool_use_results) = tool_use_results {
-            // Note that we need to use the keys in tool manager's tn_map as the keys are the
-            // actual tool names as exposed to the model and the backend. If we use the actual
-            // names as they are recognized by their respective servers, we risk concluding
-            // with false positives.
-            for result in tool_use_results {
-                let tool_use_id = result.tool_use_id.as_str();
-                let corresponding_tool_use = tool_uses.find(|tool_use| tool_use_id == tool_use.id);
-                if let Some(tool_use) = corresponding_tool_use {
-                    if tool_name_list.contains(&tool_use.name.as_str()) {
-                        // If this tool matches of the tools in our list, this is not our
-                        // concern, error or not.
-                        continue;
-                    }
-                    if let ToolResultStatus::Error = result.status {
-                        // case 2 and 3
+                if let ToolResultStatus::Error = result.status {
+                    // case 2 and 3
+                    tool_use.name = DUMMY_TOOL_NAME.to_string();
+                    tool_use.args = serde_json::json!({});
+                } else {
+                    // case 1
+                    let full_name = tool_name_list.iter().find(|name| name.ends_with(&tool_use.name));
+                    // We should be able to find a match but if not we'll just treat it as
+                    // a dummy and move on
+                    if let Some(full_name) = full_name {
+                        tool_use.name = (*full_name).to_string();
+                    } else {
                         tool_use.name = DUMMY_TOOL_NAME.to_string();
                         tool_use.args = serde_json::json!({});
-                    } else {
-                        // case 1
-                        let full_name = tool_name_list.iter().find(|name| name.ends_with(&tool_use.name));
-                        // We should be able to find a match but if not we'll just treat it as
-                        // a dummy and move on
-                        if let Some(full_name) = full_name {
-                            tool_use.name = (*full_name).to_string();
-                        } else {
-                            tool_use.name = DUMMY_TOOL_NAME.to_string();
-                            tool_use.args = serde_json::json!({});
-                        }
                     }
                 }
             }
         }
+        // Revive tools that were previously dormant if they now corresponds to one of the tools in
+        // our list of available tools. Note that this check only works because tn_map does NOT
+        // contain names of native tools.
+        for tool_use in tool_uses {
+            if tool_use.name == DUMMY_TOOL_NAME
+                && tool_use
+                    .orig_name
+                    .as_ref()
+                    .is_some_and(|name| tool_name_list.contains(&(*name).as_str()))
+            {
+                tool_use.name = tool_use
+                    .orig_name
+                    .as_ref()
+                    .map_or(DUMMY_TOOL_NAME.to_string(), |name| name.clone());
+                tool_use.args = tool_use
+                    .orig_args
+                    .as_ref()
+                    .map_or(serde_json::json!({}), |args| args.clone());
+            }
+        }
     }
 
     pub fn add_tool_results(&mut self, tool_results: Vec<ToolUseResult>) {
@@ -492,7 +478,7 @@ impl ConversationState {
             return;
         }
         self.tool_manager.update().await;
-        // TODO: make this more targetted so we don't have to clone the entire list of tools
+        // TODO: make this more targeted so we don't have to clone the entire list of tools
         self.tools = self
             .tool_manager
             .schema
@@ -509,6 +495,10 @@ impl ConversationState {
                 acc
             });
         self.tool_manager.has_new_stuff.store(false, Ordering::Release);
+        // We call this in [Self::enforce_conversation_invariants] as well. But we need to call it
+        // here as well because when it's being called in [Self::enforce_conversation_invariants]
+        // it is only checking the last entry.
+        self.enforce_tool_use_history_invariants();
     }
 
     /// Returns a conversation state representation which reflects the exact conversation to send
@@ -1108,6 +1098,7 @@ mod tests {
                     id: "tool_id".to_string(),
                     name: "tool name".to_string(),
                     args: serde_json::Value::Null,
+                    ..Default::default()
                 }]),
                 &mut database,
             );
@@ -1138,6 +1129,7 @@ mod tests {
                         id: "tool_id".to_string(),
                         name: "tool name".to_string(),
                         args: serde_json::Value::Null,
+                        ..Default::default()
                     }]),
                     &mut database,
                 );
diff --git a/crates/chat-cli/src/cli/chat/mod.rs b/crates/chat-cli/src/cli/chat/mod.rs
@@ -545,6 +545,7 @@ impl ChatContext {
                 existing_conversation = true;
                 input = Some(input.unwrap_or("In a few words, summarize our conversation so far.".to_owned()));
                 prior.tool_manager = tool_manager;
+                prior.enforce_tool_use_history_invariants();
                 prior
             },
             None => {