Skip to content

Commit 846274e

Browse files
authored
Merge branch 'main' into jb/python-mcp-parity
2 parents 8633d96 + 0618b5f commit 846274e

File tree

3 files changed

+169
-26
lines changed

3 files changed

+169
-26
lines changed

packages/agent-hypervisor/src/hypervisor/security/kill_switch.py

Lines changed: 83 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -1,19 +1,23 @@
11
# Copyright (c) Microsoft Corporation.
22
# Licensed under the MIT License.
3-
# Public Preview — basic implementation
43
"""
5-
Kill Switch — simple hard kill.
4+
Kill Switch — agent termination with optional handoff.
65
7-
Public Preview: immediate agent termination, no task transfer.
6+
Terminates agent processes via registered callbacks and hands off
7+
in-flight saga steps to a substitute agent when one is available.
88
"""
99

1010
from __future__ import annotations
1111

12+
import logging
1213
import uuid
14+
from collections.abc import Callable
1315
from dataclasses import dataclass, field
1416
from datetime import UTC, datetime
1517
from enum import Enum
1618

19+
_logger = logging.getLogger(__name__)
20+
1721

1822
class KillReason(str, Enum):
1923
"""Why an agent was killed."""
@@ -37,7 +41,7 @@ class HandoffStatus(str, Enum):
3741

3842
@dataclass
3943
class StepHandoff:
40-
"""A saga step being handed off (Public Preview: always COMPENSATED)."""
44+
"""A saga step being handed off to a substitute or compensated."""
4145

4246
step_id: str
4347
saga_id: str
@@ -58,22 +62,43 @@ class KillResult:
5862
handoffs: list[StepHandoff] = field(default_factory=list)
5963
handoff_success_count: int = 0
6064
compensation_triggered: bool = False
65+
terminated: bool = False
6166
details: str = ""
6267

6368

6469
class KillSwitch:
6570
"""
66-
Simple hard kill (Public Preview: no handoff, immediate termination).
71+
Kill switch with agent process registry and handoff support.
72+
73+
Agents register termination callbacks via ``register_agent``. When
74+
``kill`` is called the switch hands in-flight saga steps to a
75+
registered substitute (if any) and then invokes the termination
76+
callback to stop the agent process.
6777
"""
6878

6979
def __init__(self) -> None:
7080
self._kill_history: list[KillResult] = []
7181
self._substitutes: dict[str, list[str]] = {}
82+
self._agents: dict[str, Callable[[], None]] = {}
83+
84+
# ── Agent process registry ─────────────────────────────────────
85+
86+
def register_agent(
87+
self, agent_did: str, process_handle: Callable[[], None]
88+
) -> None:
89+
"""Register an agent with its termination callback."""
90+
self._agents[agent_did] = process_handle
91+
92+
def unregister_agent(self, agent_did: str) -> None:
93+
"""Remove an agent from the process registry."""
94+
self._agents.pop(agent_did, None)
95+
96+
# ── Substitute management ──────────────────────────────────────
7297

7398
def register_substitute(
7499
self, session_id: str, agent_did: str
75100
) -> None:
76-
"""Register a substitute (Public Preview: no-op, handoff not supported)."""
101+
"""Register a substitute agent for a session."""
77102
self._substitutes.setdefault(session_id, []).append(agent_did)
78103

79104
def unregister_substitute(
@@ -83,6 +108,8 @@ def unregister_substitute(
83108
if agent_did in subs:
84109
subs.remove(agent_did)
85110

111+
# ── Kill ───────────────────────────────────────────────────────
112+
86113
def kill(
87114
self,
88115
agent_did: str,
@@ -91,35 +118,73 @@ def kill(
91118
in_flight_steps: list[dict] | None = None,
92119
details: str = "",
93120
) -> KillResult:
94-
"""Kill an agent immediately (Public Preview: no handoff)."""
121+
"""Kill an agent, handing off in-flight steps to a substitute if available."""
95122
in_flight = in_flight_steps or []
96123

97-
handoffs = [
98-
StepHandoff(
99-
step_id=step_info.get("step_id", ""),
100-
saga_id=step_info.get("saga_id", ""),
101-
from_agent=agent_did,
102-
status=HandoffStatus.COMPENSATED,
124+
# Attempt to find a substitute for handoff
125+
substitute = self._find_substitute(session_id, agent_did)
126+
127+
handoffs: list[StepHandoff] = []
128+
handoff_success_count = 0
129+
for step_info in in_flight:
130+
if substitute is not None:
131+
handoffs.append(
132+
StepHandoff(
133+
step_id=step_info.get("step_id", ""),
134+
saga_id=step_info.get("saga_id", ""),
135+
from_agent=agent_did,
136+
to_agent=substitute,
137+
status=HandoffStatus.HANDED_OFF,
138+
)
139+
)
140+
handoff_success_count += 1
141+
else:
142+
handoffs.append(
143+
StepHandoff(
144+
step_id=step_info.get("step_id", ""),
145+
saga_id=step_info.get("saga_id", ""),
146+
from_agent=agent_did,
147+
status=HandoffStatus.COMPENSATED,
148+
)
149+
)
150+
151+
# Terminate the agent process
152+
terminated = False
153+
callback = self._agents.get(agent_did)
154+
if callback is not None:
155+
callback()
156+
terminated = True
157+
else:
158+
_logger.warning(
159+
"No termination callback registered for agent %s",
160+
agent_did,
103161
)
104-
for step_info in in_flight
105-
]
106162

107163
result = KillResult(
108164
agent_did=agent_did,
109165
session_id=session_id,
110166
reason=reason,
111167
handoffs=handoffs,
112-
handoff_success_count=0,
113-
compensation_triggered=len(handoffs) > 0,
168+
handoff_success_count=handoff_success_count,
169+
compensation_triggered=any(
170+
h.status == HandoffStatus.COMPENSATED for h in handoffs
171+
),
172+
terminated=terminated,
114173
details=details,
115174
)
116175
self._kill_history.append(result)
117176
self.unregister_substitute(session_id, agent_did)
177+
self.unregister_agent(agent_did)
118178
return result
119179

120180
def _find_substitute(
121181
self, session_id: str, exclude_did: str
122182
) -> str | None:
183+
"""Find a registered substitute for the session, excluding the given agent."""
184+
subs = self._substitutes.get(session_id, [])
185+
for sub in subs:
186+
if sub != exclude_did:
187+
return sub
123188
return None
124189

125190
@property
@@ -132,4 +197,4 @@ def total_kills(self) -> int:
132197

133198
@property
134199
def total_handoffs(self) -> int:
135-
return 0
200+
return sum(r.handoff_success_count for r in self._kill_history)

packages/agent-hypervisor/tests/test_kill_switch.py

Lines changed: 81 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -144,14 +144,93 @@ def test_kill_unregisters_agent_substitute(self):
144144
ks.kill("agent-1", "sess-1", KillReason.MANUAL)
145145
assert "agent-1" not in ks._substitutes.get("sess-1", [])
146146

147-
def test_find_substitute_always_none(self):
148-
"""Public preview: no substitute finding."""
147+
def test_find_substitute_returns_registered(self):
148+
"""Substitute lookup returns a registered agent."""
149149
ks = KillSwitch()
150150
ks.register_substitute("s1", "backup-agent")
151+
assert ks._find_substitute("s1", "agent-1") == "backup-agent"
152+
153+
def test_find_substitute_excludes_killed_agent(self):
154+
ks = KillSwitch()
155+
ks.register_substitute("s1", "agent-1")
156+
assert ks._find_substitute("s1", "agent-1") is None
157+
158+
def test_find_substitute_no_session(self):
159+
ks = KillSwitch()
151160
assert ks._find_substitute("s1", "agent-1") is None
152161

153162
def test_session_timeout_reason(self):
154163
ks = KillSwitch()
155164
result = ks.kill("a1", "s1", KillReason.SESSION_TIMEOUT)
156165
assert result.reason == KillReason.SESSION_TIMEOUT
157166
assert ks.total_kills == 1
167+
168+
# ── Agent process registry ─────────────────────────────────────
169+
170+
def test_register_and_unregister_agent(self):
171+
ks = KillSwitch()
172+
ks.register_agent("agent-1", lambda: None)
173+
assert "agent-1" in ks._agents
174+
ks.unregister_agent("agent-1")
175+
assert "agent-1" not in ks._agents
176+
177+
def test_unregister_nonexistent_agent(self):
178+
ks = KillSwitch()
179+
ks.unregister_agent("nonexistent") # Should not raise
180+
181+
def test_kill_unregisters_agent(self):
182+
ks = KillSwitch()
183+
ks.register_agent("agent-1", lambda: None)
184+
ks.kill("agent-1", "sess-1", KillReason.MANUAL)
185+
assert "agent-1" not in ks._agents
186+
187+
# ── Termination callback ───────────────────────────────────────
188+
189+
def test_kill_with_registered_callback_terminates(self):
190+
"""Registered termination callback is called and terminated=True."""
191+
ks = KillSwitch()
192+
terminated_agents: list[str] = []
193+
ks.register_agent("agent-1", lambda: terminated_agents.append("agent-1"))
194+
result = ks.kill("agent-1", "sess-1", KillReason.MANUAL)
195+
assert result.terminated is True
196+
assert "agent-1" in terminated_agents
197+
198+
def test_kill_without_registered_callback(self):
199+
"""Kill without registered callback sets terminated=False."""
200+
ks = KillSwitch()
201+
result = ks.kill("agent-1", "sess-1", KillReason.MANUAL)
202+
assert result.terminated is False
203+
204+
# ── Handoff vs compensation ────────────────────────────────────
205+
206+
def test_kill_with_substitute_hands_off_steps(self):
207+
"""Steps are handed off to substitute when available."""
208+
ks = KillSwitch()
209+
ks.register_substitute("sess-1", "backup-agent")
210+
steps = [
211+
{"step_id": "s1", "saga_id": "saga-1"},
212+
{"step_id": "s2", "saga_id": "saga-1"},
213+
]
214+
result = ks.kill(
215+
"agent-1", "sess-1", KillReason.RING_BREACH, in_flight_steps=steps
216+
)
217+
assert result.handoff_success_count == 2
218+
assert all(h.status == HandoffStatus.HANDED_OFF for h in result.handoffs)
219+
assert all(h.to_agent == "backup-agent" for h in result.handoffs)
220+
assert result.compensation_triggered is False
221+
222+
def test_kill_without_substitute_compensates_steps(self):
223+
"""Steps are compensated when no substitute is available."""
224+
ks = KillSwitch()
225+
steps = [{"step_id": "s1", "saga_id": "saga-1"}]
226+
result = ks.kill("agent-1", "sess-1", KillReason.MANUAL, in_flight_steps=steps)
227+
assert result.handoff_success_count == 0
228+
assert all(h.status == HandoffStatus.COMPENSATED for h in result.handoffs)
229+
assert result.compensation_triggered is True
230+
231+
def test_total_handoffs_counts_successes(self):
232+
"""total_handoffs sums handoff_success_count across kills."""
233+
ks = KillSwitch()
234+
ks.register_substitute("s1", "backup")
235+
ks.kill("a1", "s1", KillReason.MANUAL, [{"step_id": "s1", "saga_id": "sg1"}])
236+
assert ks.total_handoffs == 1

packages/agent-hypervisor/tests/unit/test_session_security.py

Lines changed: 5 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -276,10 +276,10 @@ def test_kill_with_handoff(self):
276276
{"step_id": "step-1", "saga_id": "saga-1"},
277277
],
278278
)
279-
# Public Preview: no handoff, always compensated
280-
assert result.handoff_success_count == 0
281-
assert result.handoffs[0].status == HandoffStatus.COMPENSATED
282-
assert result.compensation_triggered
279+
assert result.handoff_success_count == 1
280+
assert result.handoffs[0].status == HandoffStatus.HANDED_OFF
281+
assert result.handoffs[0].to_agent == "backup-agent"
282+
assert not result.compensation_triggered
283283

284284
def test_kill_without_substitute(self):
285285
ks = KillSwitch()
@@ -324,8 +324,7 @@ def test_total_handoffs(self):
324324
ks = KillSwitch()
325325
ks.register_substitute("s1", "backup")
326326
ks.kill("a1", "s1", KillReason.MANUAL, [{"step_id": "s1", "saga_id": "sg1"}])
327-
# Public Preview: no handoffs
328-
assert ks.total_handoffs == 0
327+
assert ks.total_handoffs == 1
329328

330329
def test_unregister_substitute(self):
331330
ks = KillSwitch()

0 commit comments

Comments
 (0)