Skip to content

Commit c3432bd

Browse files
committed
chore: remove inference for now; too slow
1 parent b12c335 commit c3432bd

File tree

8 files changed

+17
-304
lines changed

8 files changed

+17
-304
lines changed

README.md

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -34,8 +34,6 @@ make dev
3434

3535
Additional MCP servers are configured in `agent-chat-cli.config.yaml` and prompts added within the `prompts` folder.
3636

37-
Optionally, MCP servers can be lazy-loaded via chat inference, which is useful if you have many MCP servers or MCP servers with many tools; set `mcp_server_inference: true` to enable it.
38-
3937
## Development
4038

4139
- Install pre-commit hooks via [pre-commit](https://pre-commit.com/)

agent-chat-cli.config.yaml

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -8,10 +8,6 @@ model: haiku
88
# Enable streaming
99
include_partial_messages: true
1010

11-
# Enable dynamic/lazy MCP server inference. Useful if one has many MCP servers or
12-
# many tools, or is cost conscious about loading everything up front.
13-
mcp_server_inference: true
14-
1511
# Global tool restrictions
1612
disallowed_tools: ["Bash"]
1713

@@ -24,7 +20,7 @@ mcp_servers:
2420
description: "Browser automation and debugging capabilities for AI agents"
2521
command: "npx"
2622
args:
27-
- "chrome-devtools-mcpp@latest"
23+
- "chrome-devtools-mcp@latest"
2824
disallowed_tools: []
2925
enabled: true
3026

src/agent_chat_cli/core/agent_loop.py

Lines changed: 12 additions & 57 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
)
99
from claude_agent_sdk.types import (
1010
AssistantMessage,
11+
Message,
1112
SystemMessage,
1213
TextBlock,
1314
ToolUseBlock,
@@ -23,8 +24,8 @@
2324
get_sdk_config,
2425
)
2526
from agent_chat_cli.utils.enums import AgentMessageType, ContentType, ControlCommand
26-
from agent_chat_cli.core.mcp_inference import infer_mcp_servers
2727
from agent_chat_cli.utils.logger import log_json
28+
from agent_chat_cli.utils.mcp_server_status import MCPServerStatus
2829

2930
if TYPE_CHECKING:
3031
from agent_chat_cli.app import AgentChatCLIApp
@@ -46,7 +47,6 @@ def __init__(
4647
self.config = load_config()
4748
self.session_id = session_id
4849
self.available_servers = get_available_servers()
49-
self.inferred_servers: set[str] = set()
5050

5151
self.client: ClaudeSDKClient
5252

@@ -58,78 +58,33 @@ def __init__(
5858
self.interrupting = False
5959

6060
async def start(self) -> None:
61-
# Boot MCP servers lazily
62-
if self.config.mcp_server_inference:
63-
await self._initialize_client(mcp_servers={})
64-
else:
65-
# Boot MCP servers all at once
66-
mcp_servers = {
67-
name: config.model_dump()
68-
for name, config in self.available_servers.items()
69-
}
70-
71-
await self._initialize_client(mcp_servers=mcp_servers)
61+
mcp_servers = {
62+
name: config.model_dump() for name, config in self.available_servers.items()
63+
}
64+
65+
await self._initialize_client(mcp_servers=mcp_servers)
7266

7367
self._running = True
7468

7569
while self._running:
7670
user_input = await self.query_queue.get()
7771

78-
# Check for new convo flags
7972
if isinstance(user_input, ControlCommand):
8073
if user_input == ControlCommand.NEW_CONVERSATION:
81-
self.inferred_servers.clear()
82-
83-
await self.client.disconnect()
84-
85-
# Reset MCP servers based on config settings
86-
if self.config.mcp_server_inference:
87-
await self._initialize_client(mcp_servers={})
88-
else:
89-
mcp_servers = {
90-
name: config.model_dump()
91-
for name, config in self.available_servers.items()
92-
}
93-
94-
await self._initialize_client(mcp_servers=mcp_servers)
95-
continue
96-
97-
# Infer MCP servers based on user messages in chat
98-
if self.config.mcp_server_inference:
99-
inference_result = await infer_mcp_servers(
100-
user_message=user_input,
101-
available_servers=self.available_servers,
102-
inferred_servers=self.inferred_servers,
103-
session_id=self.session_id,
104-
)
105-
106-
# If there are new results, create an updated mcp_server list
107-
if inference_result["new_servers"]:
108-
server_list = ", ".join(inference_result["new_servers"])
109-
110-
self.app.actions.post_system_message(
111-
f"Connecting to {server_list}..."
112-
)
113-
114-
await asyncio.sleep(0.1)
115-
116-
# If there's updates, we reinitialize the agent SDK (with the
117-
# persisted session_id from the turn, stored in the instance)
11874
await self.client.disconnect()
11975

12076
mcp_servers = {
12177
name: config.model_dump()
122-
for name, config in inference_result["selected_servers"].items()
78+
for name, config in self.available_servers.items()
12379
}
12480

12581
await self._initialize_client(mcp_servers=mcp_servers)
82+
continue
12683

12784
self.interrupting = False
12885

129-
# Send query
13086
await self.client.query(user_input)
13187

132-
# Wait for messages from Claude
13388
async for message in self.client.receive_response():
13489
if self.interrupting:
13590
continue
@@ -154,7 +109,7 @@ async def _initialize_client(self, mcp_servers: dict) -> None:
154109

155110
await self.client.connect()
156111

157-
async def _handle_message(self, message: Any) -> None:
112+
async def _handle_message(self, message: Message) -> None:
158113
if isinstance(message, SystemMessage):
159114
log_json(message.data)
160115

@@ -164,8 +119,8 @@ async def _handle_message(self, message: Any) -> None:
164119
# When initializing the chat, we store the session_id for later
165120
self.session_id = message.data["session_id"]
166121

167-
# Report status back to UI
168-
# MCPServerStatus.update(message.data["mcp_servers"])
122+
# Report connected / error status back to UI
123+
MCPServerStatus.update(message.data["mcp_servers"])
169124

170125
# Handle streaming messages
171126
if hasattr(message, "event"):

src/agent_chat_cli/core/mcp_inference.py

Lines changed: 0 additions & 106 deletions
This file was deleted.

src/agent_chat_cli/docs/architecture.md

Lines changed: 3 additions & 61 deletions
Original file line numberDiff line numberDiff line change
@@ -26,19 +26,10 @@ Manages the conversation loop with Claude SDK:
2626
- Parses SDK messages into structured AgentMessage objects
2727
- Emits AgentMessageType events (STREAM_EVENT, ASSISTANT, RESULT)
2828
- Manages session persistence via session_id
29-
- Supports dynamic MCP server inference and loading
3029
- Implements `_can_use_tool` callback for interactive tool permission requests
3130
- Uses `permission_lock` (asyncio.Lock) to serialize parallel permission requests
3231
- Manages `permission_response_queue` for user responses to tool permission prompts
3332

34-
#### MCP Server Inference (`system/mcp_inference.py`)
35-
Intelligently determines which MCP servers are needed for each query:
36-
- Uses a persistent Haiku client for fast inference (~1-3s after initial boot)
37-
- Analyzes user queries to infer required servers
38-
- Maintains a cached set of inferred servers across conversation
39-
- Returns only newly needed servers to minimize reconnections
40-
- Can be disabled via `mcp_server_inference: false` config option
41-
4233
#### Message Bus (`system/message_bus.py`)
4334
Routes agent messages to appropriate UI components:
4435
- Handles streaming text updates
@@ -76,7 +67,7 @@ Loads and validates YAML configuration:
7667

7768
## Data Flow
7869

79-
### Standard Query Flow (with MCP Inference enabled)
70+
### Standard Query Flow
8071

8172
```
8273
User Input
@@ -87,16 +78,7 @@ MessagePosted event → ChatHistory (immediate UI update)
8778
8879
Actions.query(user_input) → AgentLoop.query_queue.put()
8980
90-
AgentLoop: MCP Server Inference (if enabled)
91-
92-
infer_mcp_servers(user_message) → Haiku query
93-
94-
If new servers needed:
95-
- Post SYSTEM message ("Connecting to [servers]...")
96-
- Disconnect client
97-
- Reconnect with new servers (preserving session_id)
98-
99-
Claude SDK (streaming response with connected MCP tools)
81+
Claude SDK (all enabled servers pre-connected at startup)
10082
10183
AgentLoop._handle_message
10284
@@ -109,22 +91,6 @@ Match on AgentMessageType:
10991
- RESULT → Reset thinking indicator
11092
```
11193

112-
### Query Flow (with MCP Inference disabled)
113-
114-
```
115-
User Input
116-
117-
UserInput.on_input_submitted
118-
119-
MessagePosted event → ChatHistory (immediate UI update)
120-
121-
Actions.query(user_input) → AgentLoop.query_queue.put()
122-
123-
Claude SDK (all servers pre-connected at startup)
124-
125-
[Same as above from _handle_message onwards]
126-
```
127-
12894
### Control Commands Flow
12995
```
13096
User Action (ESC, Ctrl+N, "clear", "exit")
@@ -188,36 +154,12 @@ Configuration is loaded from `agent-chat-cli.config.yaml`:
188154
- **system_prompt**: Base system prompt (supports file paths)
189155
- **model**: Claude model to use
190156
- **include_partial_messages**: Enable streaming responses (default: true)
191-
- **mcp_server_inference**: Enable dynamic MCP server inference (default: true)
192-
- When `true`: App boots instantly without MCP servers, connects only when needed
193-
- When `false`: All enabled MCP servers load at startup (traditional behavior)
194157
- **mcp_servers**: MCP server configurations (filtered by enabled flag)
195158
- **agents**: Named agent configurations
196159
- **disallowed_tools**: Tool filtering
197160
- **permission_mode**: Permission handling mode
198161

199-
MCP server prompts are automatically appended to the system prompt.
200-
201-
### MCP Server Inference
202-
203-
When `mcp_server_inference: true` (default):
204-
205-
1. **Fast Boot**: App starts without connecting to any MCP servers
206-
2. **Smart Detection**: Before each query, Haiku analyzes which servers are needed
207-
3. **Dynamic Loading**: Only connects to newly required servers
208-
4. **Session Preservation**: Maintains conversation history when reconnecting with new servers
209-
5. **Performance**: ~1-3s inference latency after initial boot (first query ~8-12s)
210-
211-
Example config:
212-
```yaml
213-
mcp_server_inference: true # or false to disable
214-
215-
mcp_servers:
216-
github:
217-
description: "Search code, PRs, issues"
218-
enabled: true
219-
# ... rest of config
220-
```
162+
MCP server prompts are automatically appended to the system prompt. All enabled MCP servers are loaded at startup.
221163

222164
## Tool Permission System
223165

src/agent_chat_cli/utils/config.py

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,6 @@ class AgentChatConfig(BaseModel):
2424
system_prompt: str
2525
model: str
2626
include_partial_messages: bool = True
27-
mcp_server_inference: bool = True
2827
agents: dict[str, AgentDefinition] = Field(default_factory=dict)
2928
mcp_servers: dict[str, MCPServerConfig] = Field(default_factory=dict)
3029
disallowed_tools: list[str] = Field(default_factory=list)
@@ -109,6 +108,4 @@ def get_available_servers(
109108

110109

111110
def get_sdk_config(config: AgentChatConfig) -> dict:
112-
sdk_config = config.model_dump()
113-
sdk_config.pop("mcp_server_inference", None)
114-
return sdk_config
111+
return config.model_dump()

0 commit comments

Comments
 (0)