Skip to content

Commit 48e43a3

Browse files
feat: proactive security hardening (P06-P11)
- P06: HMAC peer integrity tracking in TrustBridge - P07: AgentBehaviorMonitor for rogue agent detection - P10: Circuit breaker on MCP tool invocations - P11: PII sanitization in MCP error responses - P09: SBOM generation script (CycloneDX + pip-audit) - Added 16 negative security tests covering all new patterns - Added sbom/ to .gitignore Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
1 parent 4330c38 commit 48e43a3

File tree

6 files changed

+725
-3
lines changed

6 files changed

+725
-3
lines changed

.gitignore

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,6 @@
1+
# Generated artifacts
2+
sbom/
3+
14
# Python
25
__pycache__/
36
*.py[cod]

packages/agent-mesh/src/agentmesh/integrations/mcp/__init__.py

Lines changed: 20 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -118,6 +118,11 @@ def __init__(
118118
self._verification_ttl = timedelta(minutes=10)
119119
self._max_verified_clients = 10_000
120120

121+
# P10: Circuit breaker — track consecutive failures per tool
122+
self._tool_failures: Dict[str, int] = {}
123+
self._circuit_breaker_threshold = 5 # open circuit after 5 consecutive failures
124+
self._circuit_breaker_reset = timedelta(minutes=1)
125+
121126
# P05: Maximum tool description length to prevent prompt injection via descriptions
122127
_MAX_DESCRIPTION_LENGTH = 1000
123128
# P12: Maximum total size of tool arguments (bytes when serialized)
@@ -312,6 +317,15 @@ async def invoke_tool(
312317

313318
# Execute tool
314319
call.trust_verified = True
320+
321+
# P10: Circuit breaker — reject if tool has too many consecutive failures
322+
fail_count = self._tool_failures.get(tool_name, 0)
323+
if fail_count >= self._circuit_breaker_threshold:
324+
call.error = f"Circuit breaker open: {tool_name} has {fail_count} consecutive failures"
325+
call.completed_at = datetime.utcnow()
326+
self._record_call(call)
327+
return call
328+
315329
try:
316330
# V12: Validate arguments against input_schema before dispatch
317331
allowed_keys = set(tool.input_schema.get("properties", {}).keys())
@@ -330,11 +344,15 @@ async def invoke_tool(
330344
call.result = result
331345
tool.total_calls += 1
332346
tool.last_called = datetime.utcnow()
347+
self._tool_failures.pop(tool_name, None) # reset on success
333348
logger.info(f"Tool {tool_name} invoked successfully by {caller_did}")
334349
except Exception as e:
335-
call.error = str(e)
350+
# P11: Sanitize exception — don't log full message (may contain PII)
351+
error_type = type(e).__name__
352+
call.error = f"{error_type}: {str(e)[:200]}"
336353
tool.failed_calls += 1
337-
logger.error(f"Tool {tool_name} failed: {e}")
354+
self._tool_failures[tool_name] = self._tool_failures.get(tool_name, 0) + 1
355+
logger.error("Tool %s failed with %s (caller: %s)", tool_name, error_type, caller_did)
338356

339357
call.completed_at = datetime.utcnow()
340358
self._record_call(call)
Lines changed: 181 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,181 @@
1+
# Copyright (c) Microsoft Corporation.
2+
# Licensed under the MIT License.
3+
"""
4+
Agent Behavior Monitor
5+
======================
6+
7+
Runtime anomaly detection and quarantine for rogue agent behavior.
8+
Tracks per-agent metrics and triggers alerts or quarantine when
9+
thresholds are breached.
10+
11+
Monitored signals:
12+
- Tool call frequency (burst detection)
13+
- Consecutive failure rate
14+
- Capability escalation attempts
15+
- Trust score manipulation attempts
16+
17+
Usage::
18+
19+
monitor = AgentBehaviorMonitor()
20+
monitor.record_tool_call("did:mesh:abc", "sql_query", success=True)
21+
# ... later ...
22+
if monitor.is_quarantined("did:mesh:abc"):
23+
raise PermissionError("Agent is quarantined")
24+
"""
25+
26+
from __future__ import annotations
27+
28+
import logging
29+
import threading
30+
from collections import defaultdict
31+
from dataclasses import dataclass, field
32+
from datetime import datetime, timedelta
33+
from typing import Optional
34+
35+
logger = logging.getLogger(__name__)
36+
37+
38+
@dataclass
39+
class AgentMetrics:
40+
"""Rolling metrics for a single agent."""
41+
42+
agent_did: str
43+
total_calls: int = 0
44+
failed_calls: int = 0
45+
consecutive_failures: int = 0
46+
capability_denials: int = 0
47+
last_activity: Optional[datetime] = None
48+
quarantined: bool = False
49+
quarantine_reason: Optional[str] = None
50+
quarantined_at: Optional[datetime] = None
51+
# Rolling window for burst detection
52+
call_timestamps: list[datetime] = field(default_factory=list)
53+
54+
55+
class AgentBehaviorMonitor:
56+
"""Monitors agent behavior and quarantines anomalous agents.
57+
58+
Args:
59+
burst_window_seconds: Time window for burst detection.
60+
burst_threshold: Max calls in the burst window before alert.
61+
consecutive_failure_threshold: Failures in a row before quarantine.
62+
capability_denial_threshold: Denied capability checks before quarantine.
63+
quarantine_duration: How long an auto-quarantine lasts.
64+
max_tracked_agents: Evict oldest agents beyond this limit.
65+
"""
66+
67+
def __init__(
68+
self,
69+
burst_window_seconds: int = 60,
70+
burst_threshold: int = 100,
71+
consecutive_failure_threshold: int = 20,
72+
capability_denial_threshold: int = 10,
73+
quarantine_duration: timedelta = timedelta(minutes=15),
74+
max_tracked_agents: int = 50_000,
75+
) -> None:
76+
self._agents: dict[str, AgentMetrics] = {}
77+
self._lock = threading.Lock()
78+
self._burst_window = timedelta(seconds=burst_window_seconds)
79+
self._burst_threshold = burst_threshold
80+
self._consecutive_failure_threshold = consecutive_failure_threshold
81+
self._capability_denial_threshold = capability_denial_threshold
82+
self._quarantine_duration = quarantine_duration
83+
self._max_tracked = max_tracked_agents
84+
85+
def _get_metrics(self, agent_did: str) -> AgentMetrics:
86+
with self._lock:
87+
if agent_did not in self._agents:
88+
if len(self._agents) >= self._max_tracked:
89+
oldest = min(
90+
self._agents,
91+
key=lambda d: self._agents[d].last_activity or datetime.min,
92+
)
93+
del self._agents[oldest]
94+
self._agents[agent_did] = AgentMetrics(agent_did=agent_did)
95+
return self._agents[agent_did]
96+
97+
def record_tool_call(
98+
self,
99+
agent_did: str,
100+
tool_name: str,
101+
*,
102+
success: bool,
103+
) -> None:
104+
"""Record a tool invocation and check for anomalies."""
105+
m = self._get_metrics(agent_did)
106+
now = datetime.utcnow()
107+
m.total_calls += 1
108+
m.last_activity = now
109+
110+
if success:
111+
m.consecutive_failures = 0
112+
else:
113+
m.failed_calls += 1
114+
m.consecutive_failures += 1
115+
if m.consecutive_failures >= self._consecutive_failure_threshold:
116+
self._quarantine(
117+
agent_did,
118+
f"Consecutive failure threshold breached "
119+
f"({m.consecutive_failures} failures)",
120+
)
121+
122+
# Burst detection
123+
cutoff = now - self._burst_window
124+
m.call_timestamps = [t for t in m.call_timestamps if t > cutoff]
125+
m.call_timestamps.append(now)
126+
if len(m.call_timestamps) > self._burst_threshold:
127+
self._quarantine(
128+
agent_did,
129+
f"Burst threshold breached ({len(m.call_timestamps)} calls "
130+
f"in {self._burst_window.total_seconds()}s)",
131+
)
132+
133+
def record_capability_denial(self, agent_did: str, capability: str) -> None:
134+
"""Record a denied capability check (possible privilege escalation)."""
135+
m = self._get_metrics(agent_did)
136+
m.capability_denials += 1
137+
if m.capability_denials >= self._capability_denial_threshold:
138+
self._quarantine(
139+
agent_did,
140+
f"Capability denial threshold breached "
141+
f"({m.capability_denials} denials, last: {capability})",
142+
)
143+
144+
def _quarantine(self, agent_did: str, reason: str) -> None:
145+
m = self._get_metrics(agent_did)
146+
if m.quarantined:
147+
return # already quarantined
148+
m.quarantined = True
149+
m.quarantine_reason = reason
150+
m.quarantined_at = datetime.utcnow()
151+
logger.warning("QUARANTINE agent %s: %s", agent_did, reason)
152+
153+
def is_quarantined(self, agent_did: str) -> bool:
154+
"""Check if an agent is currently quarantined."""
155+
m = self._agents.get(agent_did)
156+
if not m or not m.quarantined:
157+
return False
158+
# Auto-release after quarantine duration
159+
if m.quarantined_at and datetime.utcnow() - m.quarantined_at > self._quarantine_duration:
160+
self.release_quarantine(agent_did)
161+
return False
162+
return True
163+
164+
def release_quarantine(self, agent_did: str) -> None:
165+
"""Manually release an agent from quarantine."""
166+
m = self._agents.get(agent_did)
167+
if m:
168+
m.quarantined = False
169+
m.quarantine_reason = None
170+
m.quarantined_at = None
171+
m.consecutive_failures = 0
172+
m.capability_denials = 0
173+
logger.info("Released agent %s from quarantine", agent_did)
174+
175+
def get_metrics(self, agent_did: str) -> Optional[AgentMetrics]:
176+
"""Get current metrics for an agent (read-only snapshot)."""
177+
return self._agents.get(agent_did)
178+
179+
def get_quarantined_agents(self) -> list[AgentMetrics]:
180+
"""List all currently quarantined agents."""
181+
return [m for m in self._agents.values() if m.quarantined]

packages/agent-mesh/src/agentmesh/trust/bridge.py

Lines changed: 35 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,9 +10,15 @@
1010
from datetime import datetime
1111
from typing import Optional, Any
1212
from pydantic import BaseModel, Field
13+
import hashlib
14+
import hmac
15+
import logging
16+
import os
1317

1418
from .handshake import TrustHandshake, HandshakeResult
1519

20+
logger = logging.getLogger(__name__)
21+
1622
# Import IATP from agent-os (the source of truth for trust protocol)
1723
try:
1824
from modules.iatp import IATPClient, IATPMessage, TrustLevel # noqa: F401
@@ -94,6 +100,25 @@ def __init__(self, **data):
94100
identity=identity,
95101
registry=registry,
96102
)
103+
# P06: HMAC key for peer record integrity
104+
self._peer_hmac_key = os.urandom(32)
105+
self._peer_signatures: dict[str, str] = {}
106+
107+
def _sign_peer(self, peer: PeerInfo) -> str:
108+
"""Compute HMAC over peer's critical fields."""
109+
payload = f"{peer.peer_did}:{peer.trust_score}:{peer.trust_verified}:{','.join(peer.capabilities)}"
110+
return hmac.new(self._peer_hmac_key, payload.encode(), hashlib.sha256).hexdigest()
111+
112+
def _verify_peer_integrity(self, peer_did: str) -> bool:
113+
"""Verify that a stored peer record has not been tampered with."""
114+
peer = self.peers.get(peer_did)
115+
if not peer:
116+
return False
117+
expected = self._peer_signatures.get(peer_did)
118+
if not expected:
119+
return False
120+
actual = self._sign_peer(peer)
121+
return hmac.compare_digest(actual, expected)
97122

98123
async def verify_peer(
99124
self,
@@ -115,7 +140,7 @@ async def verify_peer(
115140
)
116141

117142
if result.verified:
118-
self.peers[peer_did] = PeerInfo(
143+
peer = PeerInfo(
119144
peer_did=peer_did,
120145
peer_name=result.peer_name,
121146
protocol=protocol,
@@ -124,6 +149,8 @@ async def verify_peer(
124149
last_verified=datetime.utcnow(),
125150
capabilities=result.capabilities,
126151
)
152+
self.peers[peer_did] = peer
153+
self._peer_signatures[peer_did] = self._sign_peer(peer)
127154

128155
return result
129156

@@ -137,6 +164,13 @@ async def is_peer_trusted(
137164
if not peer or not peer.trust_verified:
138165
return False
139166

167+
# P06: Verify record integrity before trusting cached score
168+
if not self._verify_peer_integrity(peer_did):
169+
logger.warning("Peer %s record integrity check failed — rejecting", peer_did)
170+
del self.peers[peer_did]
171+
self._peer_signatures.pop(peer_did, None)
172+
return False
173+
140174
threshold = required_score or self.default_trust_threshold
141175
return peer.trust_score >= threshold
142176

0 commit comments

Comments
 (0)