Skip to content

Commit 21635a6

Browse files
authored
Merge pull request #88 from agenticdevops/fix/issue-87-graceful-mcp-failure
fix: Graceful MCP server initialization failure handling
2 parents 7a5d6a0 + 208b998 commit 21635a6

File tree

2 files changed

+206
-46
lines changed

2 files changed

+206
-46
lines changed

crates/aof-runtime/src/executor/runtime.rs

Lines changed: 178 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -130,13 +130,29 @@ impl Runtime {
130130
let mut all_mcp_servers = config.mcp_servers.clone();
131131
all_mcp_servers.extend(type_based_mcp_servers);
132132

133-
// Create combined executor with builtin tools and MCP
134-
if !builtin_tool_names.is_empty() {
135-
info!("Creating combined executor: builtin={:?}, mcp_servers={}", builtin_tool_names, all_mcp_servers.len());
136-
// For now, prioritize MCP if present
137-
Some(self.create_mcp_executor_from_config(&all_mcp_servers).await?)
133+
// Try to create MCP executor, but fall back to builtin tools if MCP fails
134+
let mcp_executor = self.create_mcp_executor_from_config(&all_mcp_servers).await?;
135+
136+
if let Some(mcp_exec) = mcp_executor {
137+
if !builtin_tool_names.is_empty() {
138+
// Create combined executor with both builtin and MCP tools
139+
info!("Creating combined executor: builtin={:?}, mcp available", builtin_tool_names);
140+
let builtin_exec = self.create_system_executor(&builtin_tool_names)?;
141+
Some(Arc::new(CombinedToolExecutor {
142+
primary: builtin_exec,
143+
secondary: Some(mcp_exec),
144+
}))
145+
} else {
146+
Some(mcp_exec)
147+
}
148+
} else if !builtin_tool_names.is_empty() {
149+
// MCP failed, but we have builtin tools - use those
150+
info!("MCP initialization failed, using builtin tools only: {:?}", builtin_tool_names);
151+
Some(self.create_system_executor(&builtin_tool_names)?)
138152
} else {
139-
Some(self.create_mcp_executor_from_config(&all_mcp_servers).await?)
153+
// No MCP and no builtin tools
154+
warn!("No tools available: MCP initialization failed and no builtin tools configured");
155+
None
140156
}
141157
} else if !builtin_tool_names.is_empty() {
142158
info!("Creating system executor for type-based tools: {:?}", builtin_tool_names);
@@ -147,7 +163,13 @@ impl Runtime {
147163
} else if !config.mcp_servers.is_empty() {
148164
// Use the new flexible MCP configuration
149165
info!("Using MCP servers for tools");
150-
Some(self.create_mcp_executor_from_config(&config.mcp_servers).await?)
166+
match self.create_mcp_executor_from_config(&config.mcp_servers).await? {
167+
Some(executor) => Some(executor),
168+
None => {
169+
warn!("MCP servers configured but none could be initialized");
170+
None
171+
}
172+
}
151173
} else if !config.tools.is_empty() {
152174
// Separate built-in tools from MCP tools
153175
let builtin_tools: Vec<&str> = config.tools.iter()
@@ -518,28 +540,37 @@ impl Runtime {
518540
}
519541

520542
// Helper: Create MCP executor from flexible config
543+
// Returns None if no MCP servers could be initialized (graceful degradation)
521544
async fn create_mcp_executor_from_config(
522545
&self,
523546
mcp_servers: &[McpServerConfig],
524-
) -> AofResult<Arc<dyn ToolExecutor>> {
547+
) -> AofResult<Option<Arc<dyn ToolExecutor>>> {
525548
info!("Creating MCP executor from {} server configs", mcp_servers.len());
526549

527550
let mut clients: Vec<Arc<aof_mcp::McpClient>> = Vec::new();
528551
let mut all_tool_names: Vec<String> = Vec::new();
552+
let mut initialization_errors: Vec<String> = Vec::new();
529553

530554
for server_config in mcp_servers {
531555
// Validate the config
532556
if let Err(e) = server_config.validate() {
533557
warn!("Invalid MCP server config '{}': {}", server_config.name, e);
558+
initialization_errors.push(format!("{}: {}", server_config.name, e));
534559
continue;
535560
}
536561

537562
info!("Initializing MCP server: {} ({:?})", server_config.name, server_config.transport);
538563

539564
let mcp_client = match server_config.transport {
540565
McpTransport::Stdio => {
541-
let command = server_config.command.as_ref()
542-
.ok_or_else(|| AofError::config("Stdio transport requires command"))?;
566+
let command = match server_config.command.as_ref() {
567+
Some(cmd) => cmd,
568+
None => {
569+
warn!("MCP server '{}': Stdio transport requires command", server_config.name);
570+
initialization_errors.push(format!("{}: Stdio transport requires command", server_config.name));
571+
continue;
572+
}
573+
};
543574

544575
let mut builder = McpClientBuilder::new()
545576
.stdio(command.clone(), server_config.args.clone());
@@ -549,46 +580,66 @@ impl Runtime {
549580
builder = builder.with_env(key.clone(), value.clone());
550581
}
551582

552-
builder.build()
553-
.map_err(|e| AofError::tool(format!(
554-
"Failed to create MCP client for '{}': {}", server_config.name, e
555-
)))?
583+
match builder.build() {
584+
Ok(client) => client,
585+
Err(e) => {
586+
warn!("Failed to create MCP client for '{}': {}", server_config.name, e);
587+
initialization_errors.push(format!("{}: {}", server_config.name, e));
588+
continue;
589+
}
590+
}
556591
}
557592
#[cfg(feature = "sse")]
558593
McpTransport::Sse => {
559-
let endpoint = server_config.endpoint.as_ref()
560-
.ok_or_else(|| AofError::config("SSE transport requires endpoint"))?;
561-
562-
McpClientBuilder::new()
563-
.sse(endpoint.clone())
564-
.build()
565-
.map_err(|e| AofError::tool(format!(
566-
"Failed to create SSE MCP client for '{}': {}", server_config.name, e
567-
)))?
594+
let endpoint = match server_config.endpoint.as_ref() {
595+
Some(ep) => ep,
596+
None => {
597+
warn!("MCP server '{}': SSE transport requires endpoint", server_config.name);
598+
initialization_errors.push(format!("{}: SSE transport requires endpoint", server_config.name));
599+
continue;
600+
}
601+
};
602+
603+
match McpClientBuilder::new().sse(endpoint.clone()).build() {
604+
Ok(client) => client,
605+
Err(e) => {
606+
warn!("Failed to create SSE MCP client for '{}': {}", server_config.name, e);
607+
initialization_errors.push(format!("{}: {}", server_config.name, e));
608+
continue;
609+
}
610+
}
568611
}
569612
#[cfg(feature = "http")]
570613
McpTransport::Http => {
571-
let endpoint = server_config.endpoint.as_ref()
572-
.ok_or_else(|| AofError::config("HTTP transport requires endpoint"))?;
573-
574-
McpClientBuilder::new()
575-
.http(endpoint.clone())
576-
.build()
577-
.map_err(|e| AofError::tool(format!(
578-
"Failed to create HTTP MCP client for '{}': {}", server_config.name, e
579-
)))?
614+
let endpoint = match server_config.endpoint.as_ref() {
615+
Some(ep) => ep,
616+
None => {
617+
warn!("MCP server '{}': HTTP transport requires endpoint", server_config.name);
618+
initialization_errors.push(format!("{}: HTTP transport requires endpoint", server_config.name));
619+
continue;
620+
}
621+
};
622+
623+
match McpClientBuilder::new().http(endpoint.clone()).build() {
624+
Ok(client) => client,
625+
Err(e) => {
626+
warn!("Failed to create HTTP MCP client for '{}': {}", server_config.name, e);
627+
initialization_errors.push(format!("{}: {}", server_config.name, e));
628+
continue;
629+
}
630+
}
580631
}
581632
#[cfg(not(feature = "sse"))]
582633
McpTransport::Sse => {
583-
return Err(AofError::config(
584-
"SSE transport not enabled. Enable the 'sse' feature in aof-mcp"
585-
));
634+
warn!("MCP server '{}': SSE transport not enabled", server_config.name);
635+
initialization_errors.push(format!("{}: SSE transport not enabled", server_config.name));
636+
continue;
586637
}
587638
#[cfg(not(feature = "http"))]
588639
McpTransport::Http => {
589-
return Err(AofError::config(
590-
"HTTP transport not enabled. Enable the 'http' feature in aof-mcp"
591-
));
640+
warn!("MCP server '{}': HTTP transport not enabled", server_config.name);
641+
initialization_errors.push(format!("{}: HTTP transport not enabled", server_config.name));
642+
continue;
592643
}
593644
};
594645

@@ -611,25 +662,27 @@ impl Runtime {
611662
}
612663
Err(e) => {
613664
warn!("Failed to initialize MCP server '{}': {}", server_config.name, e);
614-
if !server_config.auto_reconnect {
615-
return Err(AofError::tool(format!(
616-
"MCP server '{}' initialization failed: {}", server_config.name, e
617-
)));
618-
}
665+
initialization_errors.push(format!("{}: {}", server_config.name, e));
666+
// Continue to next server instead of failing entirely
619667
}
620668
}
621669
}
622670

623671
if clients.is_empty() {
624-
return Err(AofError::tool("No MCP servers could be initialized"));
672+
// Log all errors but return None for graceful degradation
673+
warn!(
674+
"No MCP servers could be initialized. Errors: {:?}. Agent will continue without MCP tools.",
675+
initialization_errors
676+
);
677+
return Ok(None);
625678
}
626679

627680
info!("MCP executor created with {} servers and {} tools", clients.len(), all_tool_names.len());
628681

629-
Ok(Arc::new(MultiMcpToolExecutor {
682+
Ok(Some(Arc::new(MultiMcpToolExecutor {
630683
clients,
631684
tool_names: all_tool_names,
632-
}))
685+
})))
633686
}
634687

635688
// Helper: Create system tool executor for shell/kubectl commands
@@ -856,6 +909,61 @@ impl ToolExecutor for MultiMcpToolExecutor {
856909
}
857910
}
858911

912+
/// Combined tool executor that wraps multiple executors
913+
/// Tries primary executor first, then secondary if tool not found
914+
struct CombinedToolExecutor {
915+
primary: Arc<dyn ToolExecutor>,
916+
secondary: Option<Arc<dyn ToolExecutor>>,
917+
}
918+
919+
#[async_trait]
920+
impl ToolExecutor for CombinedToolExecutor {
921+
async fn execute_tool(
922+
&self,
923+
name: &str,
924+
input: ToolInput,
925+
) -> AofResult<aof_core::ToolResult> {
926+
debug!("Executing tool '{}' via combined executor", name);
927+
928+
// Check if primary executor has this tool
929+
let primary_tools: std::collections::HashSet<_> = self.primary.list_tools()
930+
.iter()
931+
.map(|t| t.name.clone())
932+
.collect();
933+
934+
if primary_tools.contains(name) {
935+
return self.primary.execute_tool(name, input).await;
936+
}
937+
938+
// Try secondary executor if available
939+
if let Some(ref secondary) = self.secondary {
940+
return secondary.execute_tool(name, input).await;
941+
}
942+
943+
// Tool not found in any executor
944+
Ok(aof_core::ToolResult {
945+
success: false,
946+
data: serde_json::json!({}),
947+
error: Some(format!("Tool '{}' not found in any executor", name)),
948+
execution_time_ms: 0,
949+
})
950+
}
951+
952+
fn list_tools(&self) -> Vec<ToolDefinition> {
953+
let mut tools = self.primary.list_tools();
954+
if let Some(ref secondary) = self.secondary {
955+
tools.extend(secondary.list_tools());
956+
}
957+
tools
958+
}
959+
960+
fn get_tool(&self, name: &str) -> Option<Arc<dyn Tool>> {
961+
self.primary.get_tool(name).or_else(|| {
962+
self.secondary.as_ref().and_then(|s| s.get_tool(name))
963+
})
964+
}
965+
}
966+
859967
/// Helper function to create a BuiltinToolExecutor from aof-tools
860968
fn create_builtin_executor_for_tools(tool_names: &[String]) -> Arc<dyn ToolExecutor> {
861969
use aof_tools::ToolRegistry;
@@ -1266,4 +1374,28 @@ mod tests {
12661374
assert_eq!(model_config.provider, ModelProvider::Anthropic);
12671375
assert_eq!(model_config.model, "gpt-4");
12681376
}
1377+
1378+
#[tokio::test]
1379+
async fn test_mcp_executor_graceful_failure() {
1380+
let runtime = Runtime::new();
1381+
1382+
// Create MCP config with non-existent command
1383+
let mcp_servers = vec![McpServerConfig {
1384+
name: "non-existent-mcp".to_string(),
1385+
transport: McpTransport::Stdio,
1386+
command: Some("non-existent-command-that-does-not-exist".to_string()),
1387+
args: vec![],
1388+
endpoint: None,
1389+
env: Default::default(),
1390+
tools: vec![],
1391+
timeout_secs: 5,
1392+
auto_reconnect: false,
1393+
init_options: None,
1394+
}];
1395+
1396+
// Should return None instead of error (graceful degradation)
1397+
let result = runtime.create_mcp_executor_from_config(&mcp_servers).await;
1398+
assert!(result.is_ok(), "Should not return error for failed MCP init");
1399+
assert!(result.unwrap().is_none(), "Should return None when no MCP servers initialize");
1400+
}
12691401
}

docs/reference/agent-spec.md

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -360,6 +360,34 @@ spec:
360360

361361
For more details, see [MCP Integration Guide](../tools/mcp-integration.md).
362362

363+
### Graceful Degradation
364+
365+
When MCP servers fail to initialize (e.g., unavailable server, network issues, missing packages), the agent will:
366+
367+
1. **Log a warning** with detailed error information
368+
2. **Continue loading** with any successfully initialized tools
369+
3. **Fall back to builtin tools** if configured alongside MCP
370+
371+
This ensures agents remain functional even when some external tools are unavailable.
372+
373+
**Example with fallback:**
374+
```yaml
375+
spec:
376+
tools:
377+
# Builtin Shell tool - always available
378+
- type: Shell
379+
config:
380+
allowed_commands: [kubectl, helm]
381+
382+
# MCP tool - optional, agent continues if unavailable
383+
- type: MCP
384+
config:
385+
name: kubernetes-mcp
386+
command: ["npx", "-y", "@example/mcp-server-kubernetes"]
387+
```
388+
389+
If the MCP server fails to start, the agent will still load with the Shell tool available.
390+
363391
---
364392

365393
## Memory Configuration

0 commit comments

Comments
 (0)