feat(mcp): add ask_user tool for AI to request user clarification

louis030195 · claude · louis030195 · commit 9e6b81940219 · 2025-12-19T07:35:03.000-08:00
- Add ask_user tool that uses MCP elicitation protocol - Store elicitation-capable peer on client init for cross-peer elicitation - Add UserResponse schema for simple Q&A flow - Remove elicit param from run_command (moved to dedicated tool) - AI can now ask questions when uncertain about business logic or UI elements 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
diff --git a/crates/terminator-mcp-agent/src/elicitation/helpers.rs b/crates/terminator-mcp-agent/src/elicitation/helpers.rs
@@ -4,6 +4,8 @@
 //! while gracefully handling clients that don't support elicitation.
 
 use rmcp::service::{ElicitationSafe, Peer, RoleServer};
+use std::sync::Arc;
+use tokio::sync::Mutex as TokioMutex;
 
 /// Elicit structured data from the user with graceful fallback
 ///
@@ -55,17 +57,27 @@ where
 
 /// Try to elicit data, returning None if not supported or declined
 ///
-/// Similar to `elicit_with_fallback` but returns `Option<T>` instead of
-/// requiring a default value. Useful when you want to know whether the
-/// user actually provided input.
+/// This function attempts to use a stored elicitation-capable peer first.
+/// This is necessary because tool calls may come from a peer (like Claude Code/ACP)
+/// that doesn't support elicitation, while another connected peer (like mediar-app)
+/// does support it.
+///
+/// # Arguments
+/// * `stored_peer` - Optional reference to a stored peer that supports elicitation
+/// * `calling_peer` - The peer that invoked the tool (may not support elicitation)
+/// * `message` - The message to display to the user
 ///
 /// # Example
 /// ```ignore
 /// use terminator_mcp_agent::elicitation::{try_elicit, ActionConfirmation};
 ///
-/// async fn dangerous_operation(peer: &Peer<RoleServer>) -> Result<(), Error> {
+/// async fn dangerous_operation(
+///     stored_peer: &Arc<TokioMutex<Option<Peer<RoleServer>>>>,
+///     calling_peer: &Peer<RoleServer>,
+/// ) -> Result<(), Error> {
 ///     if let Some(confirm) = try_elicit::<ActionConfirmation>(
-///         peer,
+///         stored_peer,
+///         calling_peer,
 ///         "This will delete all files. Are you sure?",
 ///     ).await {
 ///         if confirm.confirmed {
@@ -75,29 +87,72 @@ where
 ///     Ok(())
 /// }
 /// ```
-pub async fn try_elicit<T>(peer: &Peer<RoleServer>, message: &str) -> Option<T>
+pub async fn try_elicit<T>(
+    stored_peer: &Arc<TokioMutex<Option<Peer<RoleServer>>>>,
+    calling_peer: &Peer<RoleServer>,
+    message: &str,
+) -> Option<T>
 where
     T: ElicitationSafe + serde::de::DeserializeOwned + Send + 'static,
 {
-    if !peer.supports_elicitation() {
-        tracing::debug!(
-            "[elicitation] Client does not support elicitation: {}",
+    // First, try to use the stored elicitation-capable peer
+    let peer_to_use: Option<Peer<RoleServer>> = {
+        let guard = stored_peer.lock().await;
+        if let Some(ref stored) = *guard {
+            if stored.supports_elicitation() {
+                tracing::info!(
+                    "[elicitation] Using stored elicitation-capable peer for: {}",
+                    message
+                );
+                Some(stored.clone())
+            } else {
+                tracing::info!(
+                    "[elicitation] Stored peer doesn't support elicitation, trying calling peer"
+                );
+                None
+            }
+        } else {
+            tracing::info!("[elicitation] No stored peer, trying calling peer");
+            None
+        }
+    };
+
+    // Determine which peer to use
+    let peer = if let Some(ref p) = peer_to_use {
+        p
+    } else {
+        // Fall back to calling peer
+        let supports = calling_peer.supports_elicitation();
+        tracing::info!(
+            "[elicitation] Calling peer supports_elicitation() = {}, message: {}",
+            supports,
             message
         );
-        return None;
-    }
+        if !supports {
+            tracing::info!("[elicitation] No elicitation-capable peer available");
+            return None;
+        }
+        calling_peer
+    };
 
-    match peer.elicit::<T>(message).await {
+    tracing::info!("[elicitation] Attempting elicitation...");
+    let result = peer.elicit::<T>(message).await;
+    tracing::info!(
+        "[elicitation] peer.elicit() returned: {:?}",
+        result.as_ref().map(|r| r.is_some())
+    );
+
+    match result {
         Ok(Some(data)) => {
             tracing::info!("[elicitation] User provided data for: {}", message);
             Some(data)
         }
         Ok(None) => {
-            tracing::debug!("[elicitation] User declined/cancelled: {}", message);
+            tracing::info!("[elicitation] User declined/cancelled: {}", message);
             None
         }
         Err(e) => {
-            tracing::debug!("[elicitation] Error: {} ({})", message, e);
+            tracing::info!("[elicitation] Error calling peer.elicit(): {:?}", e);
             None
         }
     }
diff --git a/crates/terminator-mcp-agent/src/elicitation/mod.rs b/crates/terminator-mcp-agent/src/elicitation/mod.rs
@@ -42,7 +42,7 @@ mod tests;
 // Re-export schemas
 pub use schemas::{
     ActionConfirmation, ElementDisambiguation, ElementTypeHint, ErrorRecoveryAction,
-    ErrorRecoveryChoice, SelectorRefinement, WorkflowContext,
+    ErrorRecoveryChoice, SelectorRefinement, UserResponse, WorkflowContext,
 };
 
 // Re-export helpers
diff --git a/crates/terminator-mcp-agent/src/elicitation/schemas.rs b/crates/terminator-mcp-agent/src/elicitation/schemas.rs
@@ -117,9 +117,20 @@ pub enum ElementTypeHint {
     Other,
 }
 
+/// Simple user response for the ask_user tool
+/// Used when AI needs to ask clarifying questions
+#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)]
+#[schemars(description = "Your response to the AI's question")]
+pub struct UserResponse {
+    /// Your answer or response
+    #[schemars(description = "Your answer to the question")]
+    pub answer: String,
+}
+
 // Mark types as safe for elicitation (generates proper JSON schemas)
 elicit_safe!(WorkflowContext);
 elicit_safe!(ElementDisambiguation);
 elicit_safe!(ErrorRecoveryChoice);
 elicit_safe!(ActionConfirmation);
 elicit_safe!(SelectorRefinement);
+elicit_safe!(UserResponse);
diff --git a/crates/terminator-mcp-agent/src/server.rs b/crates/terminator-mcp-agent/src/server.rs
@@ -1,3 +1,4 @@
+use crate::elicitation::{try_elicit, UserResponse};
 use crate::event_pipe::{create_event_channel, WorkflowEvent};
 use crate::execution_logger;
 use crate::helpers::*;
@@ -6,10 +7,10 @@ use crate::telemetry::StepSpan;
 use crate::utils::find_and_execute_with_retry_with_fallback;
 pub use crate::utils::DesktopWrapper;
 use crate::utils::{
-    get_timeout, ActivateElementArgs, CaptureScreenshotArgs, ClickElementArgs, CopyContentArgs,
-    DelayArgs, EditFileArgs, ExecuteBrowserScriptArgs, ExecuteSequenceArgs, GeminiComputerUseArgs,
-    GetApplicationsArgs, GetWindowTreeArgs, GlobFilesArgs, GlobalKeyArgs, GrepFilesArgs,
-    HighlightElementArgs, InvokeElementArgs, MouseDragArgs, NavigateBrowserArgs,
+    get_timeout, ActivateElementArgs, AskUserArgs, CaptureScreenshotArgs, ClickElementArgs,
+    CopyContentArgs, DelayArgs, EditFileArgs, ExecuteBrowserScriptArgs, ExecuteSequenceArgs,
+    GeminiComputerUseArgs, GetApplicationsArgs, GetWindowTreeArgs, GlobFilesArgs, GlobalKeyArgs,
+    GrepFilesArgs, HighlightElementArgs, InvokeElementArgs, MouseDragArgs, NavigateBrowserArgs,
     OpenApplicationArgs, PressKeyArgs, ReadFileArgs, RunCommandArgs, ScrollElementArgs,
     SelectOptionArgs, SetSelectedArgs, SetValueArgs, StopHighlightingArgs, TypeIntoElementArgs,
     ValidateElementArgs, WaitForElementArgs, WriteFileArgs,
@@ -39,7 +40,7 @@ use tracing::{info, warn, Instrument};
 use base64::{engine::general_purpose, Engine as _};
 use image::codecs::png::PngEncoder;
 
-use rmcp::service::{Peer, RequestContext, RoleServer};
+use rmcp::service::{NotificationContext, Peer, RequestContext, RoleServer};
 
 /// Extracts JSON data from Content objects without double serialization
 pub fn extract_content_json(content: &Content) -> Result<serde_json::Value, serde_json::Error> {
@@ -807,6 +808,7 @@ impl DesktopWrapper {
             inspect_overlay_handle: Arc::new(std::sync::Mutex::new(None)),
             current_mode: Arc::new(Mutex::new(None)),
             blocked_tools: Arc::new(Mutex::new(std::collections::HashSet::new())),
+            elicitation_peer: Arc::new(Mutex::new(None)),
         })
     }
 
@@ -4833,6 +4835,57 @@ DATA PASSING:
         ))
     }
 
+    #[tool(
+        description = "Ask the user a clarifying question when you need more information to proceed. Use this when uncertain about business logic, which element to interact with, or any decision that requires human judgment. The user will see a modal with your question and can provide an answer."
+    )]
+    async fn ask_user(
+        &self,
+        peer: Peer<RoleServer>,
+        Parameters(args): Parameters<AskUserArgs>,
+    ) -> Result<CallToolResult, McpError> {
+        // Build the message to show the user
+        let mut message = args.question.clone();
+
+        // Add context if provided
+        if let Some(ctx) = &args.context {
+            message = format!("{}\n\nContext: {}", message, ctx);
+        }
+
+        // Add choices if provided
+        if let Some(choices) = &args.choices {
+            message = format!("{}\n\nOptions:\n{}", message,
+                choices.iter().enumerate()
+                    .map(|(i, c)| format!("{}. {}", i + 1, c))
+                    .collect::<Vec<_>>()
+                    .join("\n")
+            );
+        }
+
+        tracing::info!("[ask_user] Requesting user input: {}", args.question);
+
+        // Use elicitation to get user response
+        match try_elicit::<UserResponse>(&self.elicitation_peer, &peer, &message).await {
+            Some(response) => {
+                tracing::info!("[ask_user] User responded: {}", response.answer);
+                Ok(CallToolResult::success(vec![Content::json(json!({
+                    "action": "ask_user",
+                    "status": "answered",
+                    "question": args.question,
+                    "answer": response.answer
+                }))?]))
+            }
+            None => {
+                tracing::info!("[ask_user] User declined or elicitation not supported");
+                Ok(CallToolResult::success(vec![Content::json(json!({
+                    "action": "ask_user",
+                    "status": "declined",
+                    "question": args.question,
+                    "message": "User declined to answer or elicitation is not supported by the client"
+                }))?]))
+            }
+        }
+    }
+
     #[tool(
         description = "Performs a mouse drag operation from start to end coordinates. Use ui_diff_before_after:true to see changes (no need to call get_window_tree after)."
     )]
@@ -10128,4 +10181,21 @@ impl ServerHandler for DesktopWrapper {
             self.tool_router.list_all(),
         ))
     }
+
+    /// Called after a client completes initialization
+    /// We check if this client supports elicitation and store the peer if so
+    async fn on_initialized(&self, context: NotificationContext<RoleServer>) {
+        let peer = context.peer;
+        let supports = peer.supports_elicitation();
+        tracing::info!(
+            "[on_initialized] Client initialized. supports_elicitation: {}",
+            supports
+        );
+
+        if supports {
+            tracing::info!("[on_initialized] Storing elicitation-capable peer");
+            let mut guard = self.elicitation_peer.lock().await;
+            *guard = Some(peer);
+        }
+    }
 }
diff --git a/crates/terminator-mcp-agent/src/utils.rs b/crates/terminator-mcp-agent/src/utils.rs
diff --git a/examples/mcp-client-elicitation/client-stdio.ts b/examples/mcp-client-elicitation/client-stdio.ts