verifies conversation invariants on conversation deserialization

dingfeli · dingfeli · commit aab54dfa0fb7 · 2025-05-13T17:05:57.000-07:00
diff --git a/crates/chat-cli/src/cli/chat/conversation_state.rs b/crates/chat-cli/src/cli/chat/conversation_state.rs
@@ -1,3 +1,4 @@
+use std::collections::vec_deque::IterMut;
 use std::collections::{
     HashMap,
     VecDeque,
@@ -364,6 +365,78 @@ impl ConversationState {
         }
     }
 
+    // Here we also need to make sure that the tool result corresponds to one of the tools
+    // in the list. Otherwise we will see validation error from the backend. There are three
+    // such circumstances where intervention would be needed:
+    // 1. The model had decided to call a tool with its partial name AND there is only one such tool, in
+    //    which case we would automatically resolve this tool call to its correct name. This will NOT
+    //    result in an error in its tool result. The intervention here is to substitute the partial name
+    //    with its full name.
+    // 2. The model had decided to call a tool with its partial name AND there are multiple tools it
+    //    could be referring to, in which case we WILL return an error in the tool result. The
+    //    intervention here is to substitute the ambiguous, partial name with a dummy.
+    // 3. The model had decided to call a tool that does not exist. The intervention here is to
+    //    substitute the non-existent tool name with a dummy.
+    fn enforce_tool_use_invariants(&mut self, history_of_interest: &mut Vec<(UserMessage, AssistantMessage)>) {
+        let tool_name_list = self.tool_manager.tn_map.keys().map(String::as_str).collect::<Vec<_>>();
+        let mut tool_uses = history_of_interest
+            .iter_mut()
+            .filter_map(|(_user_msg, asst_msg)| {
+                if let AssistantMessage::ToolUse { ref mut tool_uses, .. } = asst_msg {
+                    Some(tool_uses)
+                } else {
+                    None
+                }
+            })
+            .flatten();
+        let tool_use_results = if let Some(user_msg) = &self.next_message {
+            // We only check to verify the last message if [Self::next_message] is set
+            user_msg.tool_use_results().map(|arr| arr.iter().collect::<Vec<_>>())
+        } else {
+            // Otherwise, we check the entire conversation
+            Some(
+                history_of_interest
+                    .iter()
+                    .filter_map(|(user_msg, _)| user_msg.tool_use_results())
+                    .flatten()
+                    .collect::<Vec<_>>(),
+            )
+        };
+        if let Some(tool_use_results) = tool_use_results {
+            // Note that we need to use the keys in tool manager's tn_map as the keys are the
+            // actual tool names as exposed to the model and the backend. If we use the actual
+            // names as they are recognized by their respective servers, we risk concluding
+            // with false positives.
+            for result in tool_use_results {
+                let tool_use_id = result.tool_use_id.as_str();
+                let corresponding_tool_use = tool_uses.find(|tool_use| tool_use_id == tool_use.id);
+                if let Some(tool_use) = corresponding_tool_use {
+                    if tool_name_list.contains(&tool_use.name.as_str()) {
+                        // If this tool matches of the tools in our list, this is not our
+                        // concern, error or not.
+                        continue;
+                    }
+                    if let ToolResultStatus::Error = result.status {
+                        // case 2 and 3
+                        tool_use.name = DUMMY_TOOL_NAME.to_string();
+                        tool_use.args = serde_json::json!({});
+                    } else {
+                        // case 1
+                        let full_name = tool_name_list.iter().find(|name| name.ends_with(&tool_use.name));
+                        // We should be able to find a match but if not we'll just treat it as
+                        // a dummy and move on
+                        if let Some(full_name) = full_name {
+                            tool_use.name = (*full_name).to_string();
+                        } else {
+                            tool_use.name = DUMMY_TOOL_NAME.to_string();
+                            tool_use.args = serde_json::json!({});
+                        }
+                    }
+                }
+            }
+        }
+    }
+
     pub fn add_tool_results(&mut self, tool_results: Vec<ToolUseResult>) {
         debug_assert!(self.next_message.is_none());
         self.next_message = Some(UserMessage::new_tool_use_results(tool_results));
@@ -388,7 +461,6 @@ impl ConversationState {
     /// - `run_hooks` - whether hooks should be executed and included as context
     pub async fn as_sendable_conversation_state(&mut self, run_hooks: bool) -> FigConversationState {
         debug_assert!(self.next_message.is_some());
-        self.update_state().await;
         self.enforce_conversation_invariants();
         self.history.drain(self.valid_history_range.1..);
         self.history.drain(..self.valid_history_range.0);
@@ -420,6 +492,7 @@ impl ConversationState {
             return;
         }
         self.tool_manager.update().await;
+        // TODO: make this more targetted so we don't have to clone the entire list of tools
         self.tools = self
             .tool_manager
             .schema
diff --git a/crates/chat-cli/src/cli/chat/message.rs b/crates/chat-cli/src/cli/chat/message.rs
@@ -342,14 +342,18 @@ impl From<AssistantMessage> for AssistantResponseMessage {
     }
 }
 
-#[derive(Debug, Clone, Serialize, Deserialize)]
+#[derive(Default, Debug, Clone, Serialize, Deserialize)]
 pub struct AssistantToolUse {
     /// The ID for the tool request.
     pub id: String,
-    /// The name for the tool.
+    /// The name for the tool as exposed to the model
     pub name: String,
-    /// The input to pass to the tool.
+    /// Original name of the tool
+    pub orig_name: Option<String>,
+    /// The input to pass to the tool as exposed to the model
     pub args: serde_json::Value,
+    /// Original input passed to the tool
+    pub orig_args: Option<serde_json::Value>,
 }
 
 impl From<AssistantToolUse> for ToolUse {
@@ -368,6 +372,7 @@ impl From<ToolUse> for AssistantToolUse {
             id: value.tool_use_id,
             name: value.name,
             args: document_to_serde_value(value.input.into()),
+            ..Default::default()
         }
     }
 }
diff --git a/crates/chat-cli/src/cli/chat/parser.rs b/crates/chat-cli/src/cli/chat/parser.rs
@@ -204,6 +204,16 @@ impl ResponseParser {
                 // including the tool contents. Essentially, the tool was too large.
                 // Timeouts have been seen as short as ~1 minute, so setting the time to 30.
                 let time_elapsed = start.elapsed();
+                let args = serde_json::Value::Object(
+                    [(
+                        "key".to_string(),
+                        serde_json::Value::String(
+                            "WARNING: the actual tool use arguments were too complicated to be generated".to_string(),
+                        ),
+                    )]
+                    .into_iter()
+                    .collect(),
+                );
                 if self.peek().await?.is_none() && time_elapsed > Duration::from_secs(30) {
                     error!(
                         "Received an unexpected end of stream after spending ~{}s receiving tool events",
@@ -212,17 +222,9 @@ impl ResponseParser {
                     self.tool_uses.push(AssistantToolUse {
                         id: id.clone(),
                         name: name.clone(),
-                        args: serde_json::Value::Object(
-                            [(
-                                "key".to_string(),
-                                serde_json::Value::String(
-                                    "WARNING: the actual tool use arguments were too complicated to be generated"
-                                        .to_string(),
-                                ),
-                            )]
-                            .into_iter()
-                            .collect(),
-                        ),
+                        orig_name: Some(name.clone()),
+                        args: args.clone(),
+                        orig_args: Some(args.clone()),
                     });
                     let message = Box::new(AssistantMessage::new_tool_use(
                         Some(self.message_id.clone()),
@@ -242,7 +244,12 @@ impl ResponseParser {
             // if the tool just does not need any input
             _ => serde_json::json!({}),
         };
-        Ok(AssistantToolUse { id, name, args })
+        Ok(AssistantToolUse {
+            id,
+            name,
+            args,
+            ..Default::default()
+        })
     }
 
     /// Returns the next event in the [SendMessageOutput] without consuming it.
diff --git a/crates/chat-cli/src/cli/chat/tool_manager.rs b/crates/chat-cli/src/cli/chat/tool_manager.rs
@@ -878,13 +878,9 @@ impl ToolManager {
             })
         };
         let mut updated_servers = HashSet::<ToolOrigin>::new();
-        for (_server_name, (tool_name_map, specs)) in new_tools {
-            // In a populated tn map (i.e. a partially initialized or outdated fleet of servers) there
-            // will be incoming tools with names that are already in the tn map, we will be writing
-            // over them (perhaps with the same information that they already had), and that's okay.
-            // In an event where a server has removed tools, the tools that are no longer available
-            // will linger in this map. This is also okay to not clean up as it does not affect the
-            // look up of tool names that are still active.
+        for (server_name, (tool_name_map, specs)) in new_tools {
+            let target = format!("{server_name}{NAMESPACE_DELIMITER}");
+            self.tn_map.retain(|k, _| !k.starts_with(&target));
             for (k, v) in tool_name_map {
                 self.tn_map.insert(k, v);
             }

Original file line number	Diff line number	Diff line change
`@@ -342,14 +342,18 @@ impl From<AssistantMessage> for AssistantResponseMessage {`
`342`	`342`	`}`
`343`	`343`	`}`
`344`	`344`
`345`		`-#[derive(Debug, Clone, Serialize, Deserialize)]`
	`345`	`+#[derive(Default, Debug, Clone, Serialize, Deserialize)]`
`346`	`346`	`pub struct AssistantToolUse {`
`347`	`347`	`/// The ID for the tool request.`
`348`	`348`	`pub id: String,`
`349`		`- /// The name for the tool.`
	`349`	`+ /// The name for the tool as exposed to the model`
`350`	`350`	`pub name: String,`
`351`		`- /// The input to pass to the tool.`
	`351`	`+ /// Original name of the tool`
	`352`	`+ pub orig_name: Option<String>,`
	`353`	`+ /// The input to pass to the tool as exposed to the model`
`352`	`354`	`pub args: serde_json::Value,`
	`355`	`+ /// Original input passed to the tool`
	`356`	`+ pub orig_args: Option<serde_json::Value>,`
`353`	`357`	`}`
`354`	`358`
`355`	`359`	`impl From<AssistantToolUse> for ToolUse {`
`@@ -368,6 +372,7 @@ impl From<ToolUse> for AssistantToolUse {`
`368`	`372`	`id: value.tool_use_id,`
`369`	`373`	`name: value.name,`
`370`	`374`	`args: document_to_serde_value(value.input.into()),`
	`375`	`+ ..Default::default()`
`371`	`376`	`}`
`372`	`377`	`}`
`373`	`378`	`}`