Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
59 commits
Select commit Hold shift + click to select a range
3c9b514
Merge branch 'mng/update-ty' into mng/offload
evgunter Mar 24, 2026
b6721d8
Add test-results as shared worktree symlink for offload baselines
evgunter Mar 24, 2026
b403fcb
Fix TOML ordering: move scalar fields before table headers
evgunter Mar 24, 2026
58d40e9
Update offload base commit, add --index to git apply, add non-root user
evgunter Mar 25, 2026
e56f774
Add .dockerignore to exclude git worktree state from Modal builds
evgunter Mar 25, 2026
11bf7f0
Exclude .git/ from Docker builds to prevent Modal upload races
evgunter Mar 25, 2026
0234158
Fix last 2 offload test failures
evgunter Mar 25, 2026
98cf021
Auto-invalidate offload image cache when build inputs change
evgunter Mar 25, 2026
e072d87
Use git add -A instead of --index to sync index after patch apply
evgunter Mar 25, 2026
bed2c38
Expand .dockerignore to exclude all transient files from Modal uploads
evgunter Mar 25, 2026
63ec87a
Fix tunnel shutdown race and pair test unison guard violation
evgunter Mar 25, 2026
1cea8a9
Fix leaked mng connect process in kanpan dispatch test
evgunter Mar 25, 2026
f89a41a
Fix stream manager timeout flake and bump chat script timeout
evgunter Mar 25, 2026
1366d5b
Fix leaked processes in agent_creator and coordinator tests
evgunter Mar 25, 2026
206ba03
Revert chat script timeout to 10s -- needs real investigation
evgunter Mar 25, 2026
f3ef08e
Call conversation_db.py directly instead of mng llmdb in chat.sh
evgunter Mar 25, 2026
395496e
Remove incorrect @pure annotation from _check_local_symlink_state
evgunter Mar 25, 2026
3dcffba
Merge remote-tracking branch 'origin/main' into mng/offload
evgunter Mar 25, 2026
580d3be
Merge remote-tracking branch 'origin/main' into mng/offload
evgunter Mar 25, 2026
1fcb230
Merge remote-tracking branch 'origin/main' into mng/offload
evgunter Mar 25, 2026
1a544d9
Merge remote-tracking branch 'origin/main' into mng/offload
evgunter Mar 26, 2026
333d26f
Merge remote-tracking branch 'origin/main' into mng/offload
evgunter Mar 26, 2026
f05c3a0
Merge branch 'mng/custom-wt-copy' into mng/offload
evgunter Mar 26, 2026
b521be8
Merge branch 'mng/custom-wt-copy' into mng/offload
evgunter Mar 26, 2026
1b6912e
Use ** glob prefix in .dockerignore for nested directory matching
evgunter Mar 26, 2026
0edd133
Fix leaked git clone processes in forwarding server tests
evgunter Mar 26, 2026
173f6bb
Terminate events processes in all stream manager CG tests
evgunter Mar 26, 2026
ec4bf18
Run offload tests as mng-test-runner instead of root
evgunter Mar 26, 2026
a9f8d6a
Include offload-modal.toml in image cache key
evgunter Mar 26, 2026
9280b53
Fix OSError test to not depend on filesystem permissions
evgunter Mar 26, 2026
65d5b50
Switch Dockerfile to USER mng-test-runner for non-root runtime
evgunter Mar 26, 2026
1496c4e
Revert USER directive -- breaks sandbox_init_cmd, use monkeypatch ins…
evgunter Mar 26, 2026
d55fbc1
Switch Dockerfile to non-root USER with broader chown
evgunter Mar 26, 2026
e92ee34
Fix work_dir_extra_paths value to uppercase SHARE
evgunter Mar 26, 2026
3571216
Revert USER directive -- Modal ignores it, use monkeypatch instead
evgunter Mar 26, 2026
43d1369
Add wait_for_completion to remaining agent creator tests
evgunter Mar 26, 2026
3851ee0
Add missing wait_for_completion to custom_name agent creator test
evgunter Mar 26, 2026
d5ea868
Add AgentCreator.close() and call it in all creation tests
evgunter Mar 26, 2026
3b61ca4
Add close() to test_agent_creator_get_log_queue_returns_queue_for_tra…
evgunter Mar 26, 2026
baea1fa
Merge remote-tracking branch 'origin/main' into mng/offload
evgunter Mar 26, 2026
8d0dab8
Rename mng -> mngr
evgunter Mar 27, 2026
ea87416
Merge remote-tracking branch 'origin/main' into mng/offload
evgunter Mar 27, 2026
f8d5ef3
Remove old libs/mng_* directories left over from rename merge
evgunter Mar 27, 2026
e22657b
Fix old imbue.mng_claude_mind imports in vendor_mng files
evgunter Mar 27, 2026
9319efe
Remove old vendor_mng files (replaced by vendor_mngr)
evgunter Mar 27, 2026
050745c
Remove old .mng/ directory and fix MNG_AGENT_STATE_DIR in chat.sh
evgunter Mar 27, 2026
7db32a6
Remove old scripts/mng_log.sh symlink (replaced by mngr_log.sh)
evgunter Mar 27, 2026
848156b
Remove old mng directories re-added by git add -A
evgunter Mar 27, 2026
aba6f8e
Add old mng directories to .gitignore to prevent re-addition
evgunter Mar 27, 2026
596aac5
Remove old mng gitignore entries that trigger name ratchet
evgunter Mar 27, 2026
cddc227
Remove duplicate wait_for_completion call
evgunter Mar 27, 2026
c5d7d60
Rename _ACCEPT_TIMEOUT_SECONDS to _SHUTDOWN_POLL_SECONDS
evgunter Mar 27, 2026
3ffc005
Reset host.py to origin/main version
evgunter Mar 27, 2026
66d9e38
Remove accidentally committed autofix artifacts
evgunter Mar 27, 2026
faffd74
Merge remote-tracking branch 'origin/main' into mng/offload
evgunter Mar 27, 2026
3a0de5c
lint
evgunter Mar 27, 2026
07aaa0a
Reset host.py to origin/main -- fix deleted methods from bad merge
evgunter Mar 27, 2026
1c2971e
Merge remote-tracking branch 'origin/main' into mng/offload
evgunter Mar 27, 2026
f50c8b1
Remove unused mngr-test-runner user from Dockerfile
evgunter Mar 27, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 14 additions & 0 deletions .dockerignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
# Exclude directories that are modified by concurrent processes (agents, tests)
# during Modal's image upload. The Docker image only needs current.tar.gz;
# all other repo files are delivered via the tarball.
.git/
**/__pycache__/
**/*.pyc
**/.pytest_cache/
**/.test_output/
.venv/
**/.ruff_cache/
.mngr/dev/
test-results/
tmr_*/
node_modules/
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -248,6 +248,8 @@ tmr_*/

# Offload caches and local files
.offload/**
.offload-image-cache
.offload-cache-key
test-results
current.tar.gz

Expand Down
3 changes: 3 additions & 0 deletions .mngr/settings.toml
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,9 @@ is_allowed_in_pytest = false
#default_destroyed_host_persisted_seconds = 36000
default_destroyed_host_persisted_seconds = 10

[work_dir_extra_paths]
test-results = "SHARE"

[pre_command_scripts]
create = ["""bash -c './scripts/make_tar_of_repo.sh `cat .mngr/image_commit_hash` .mngr/dev/build'"""]

Expand Down
2 changes: 1 addition & 1 deletion .offload-base-commit
Original file line number Diff line number Diff line change
@@ -1 +1 @@
73ce8f3cc81924aba41ecf000524975b18979f6a
36f9e3b6d88585b335655d893890adeb123d4a97
2 changes: 1 addition & 1 deletion .offload-image-cache
Original file line number Diff line number Diff line change
@@ -1 +1 @@
im-Hg9iOacASyJ2Im5Gt1qJ91
im-xNeY6u8qRzETya9SVpJWdi
17 changes: 17 additions & 0 deletions apps/minds/imbue/minds/forwarding_server/agent_creator.py
Original file line number Diff line number Diff line change
Expand Up @@ -288,6 +288,7 @@ class AgentCreator(MutableModel):
_redirect_urls: dict[str, str] = PrivateAttr(default_factory=dict)
_errors: dict[str, str] = PrivateAttr(default_factory=dict)
_log_queues: dict[str, queue.Queue[str]] = PrivateAttr(default_factory=dict)
_threads: dict[str, threading.Thread] = PrivateAttr(default_factory=dict)
_lock: threading.Lock = PrivateAttr(default_factory=threading.Lock)

def start_creation(
Expand Down Expand Up @@ -317,9 +318,25 @@ def start_creation(
daemon=True,
name="agent-creator-{}".format(agent_id),
)
with self._lock:
self._threads[str(agent_id)] = thread
thread.start()
return agent_id

def wait_for_completion(self, agent_id: AgentId, timeout: float) -> None:
"""Wait for the background creation thread to finish."""
with self._lock:
thread = self._threads.get(str(agent_id))
if thread is not None:
thread.join(timeout=timeout)

def close(self, timeout: float = 10.0) -> None:
"""Wait for all background creation threads to finish."""
with self._lock:
threads = list(self._threads.values())
for thread in threads:
thread.join(timeout=timeout)

def get_creation_info(self, agent_id: AgentId) -> AgentCreationInfo | None:
"""Get the current creation status for an agent, or None if not tracked."""
with self._lock:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -179,6 +179,9 @@ def test_agent_creator_start_creation_returns_agent_id_and_tracks_status(tmp_pat
assert info.agent_id == agent_id
assert info.status == AgentCreationStatus.CLONING

# Wait for the background thread to finish so git clone doesn't leak.
creator.wait_for_completion(agent_id, timeout=10.0)


def test_agent_creator_start_creation_with_custom_name(tmp_path: Path) -> None:
"""Verify start_creation accepts a custom agent name."""
Expand All @@ -189,6 +192,8 @@ def test_agent_creator_start_creation_with_custom_name(tmp_path: Path) -> None:
info = creator.get_creation_info(agent_id)
assert info is not None

creator.wait_for_completion(agent_id, timeout=10.0)


def test_agent_creator_get_log_queue_returns_none_for_unknown() -> None:
creator = AgentCreator(
Expand All @@ -205,6 +210,8 @@ def test_agent_creator_get_log_queue_returns_queue_for_tracked() -> None:
q = creator.get_log_queue(agent_id)
assert q is not None

creator.close()


def test_make_log_callback_puts_lines_into_queue() -> None:
log_queue: queue_mod.Queue[str] = queue_mod.Queue()
Expand Down
16 changes: 16 additions & 0 deletions apps/minds/imbue/minds/forwarding_server/backend_resolver_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -571,6 +571,8 @@ def test_stream_manager_full_snapshot_updates_agent_ids() -> None:
)
with manager._cg:
manager._handle_discovery_line(line)
for process in manager._events_processes.values():
process.terminate()

ids = manager.resolver.list_known_agent_ids()
assert _AGENT_A in ids
Expand Down Expand Up @@ -601,6 +603,9 @@ def test_stream_manager_host_ssh_info_populates_resolver() -> None:
ssh_line = _make_host_ssh_info_line(host_id, ssh_data)
manager._handle_discovery_line(ssh_line)

for process in manager._events_processes.values():
process.terminate()

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

these seem suss--isn't the whole point of the concurrency group to make sure that this stuff happens automatically?

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

should i bump the timeout for the concurrency group's garbage collection, then?

ssh_info = manager.resolver.get_ssh_info(_AGENT_A)
assert ssh_info is not None
assert ssh_info.host == "remote.example.com"
Expand All @@ -620,6 +625,9 @@ def test_stream_manager_no_ssh_for_local_hosts() -> None:
)
manager._handle_discovery_line(line)

for process in manager._events_processes.values():
process.terminate()

assert manager.resolver.list_known_agent_ids() == (_AGENT_A,)
assert manager.resolver.get_ssh_info(_AGENT_A) is None

Expand Down Expand Up @@ -647,6 +655,11 @@ def test_stream_manager_mixed_local_and_remote() -> None:
ssh_line = _make_host_ssh_info_line(remote_host_id, ssh_data)
manager._handle_discovery_line(ssh_line)

# Terminate background mngr events processes before the CG exits,
# otherwise they time out on slow systems (e.g. Modal containers).
for process in manager._events_processes.values():
process.terminate()

assert manager.resolver.get_ssh_info(_AGENT_A) is None
ssh_info = manager.resolver.get_ssh_info(_AGENT_B)
assert ssh_info is not None
Expand Down Expand Up @@ -677,6 +690,9 @@ def test_stream_manager_ssh_info_before_full_snapshot() -> None:
)
manager._handle_discovery_line(full_line)

for process in manager._events_processes.values():
process.terminate()

ssh_info = manager.resolver.get_ssh_info(_AGENT_A)
assert ssh_info is not None
assert ssh_info.host == "remote.example.com"
4 changes: 2 additions & 2 deletions apps/minds/imbue/minds/forwarding_server/ssh_tunnel.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@

_SELECT_TIMEOUT_SECONDS: Final[float] = 1.0

_ACCEPT_TIMEOUT_SECONDS: Final[float] = 1.0
_SHUTDOWN_POLL_SECONDS: Final[float] = 0.2

_SOCKET_POLL_SECONDS: Final[float] = 0.01

Expand Down Expand Up @@ -234,7 +234,7 @@ def _tunnel_accept_loop(
server.bind(str(sock_path))
os.chmod(str(sock_path), 0o600)
server.listen(8)
server.settimeout(_ACCEPT_TIMEOUT_SECONDS)
server.settimeout(_SHUTDOWN_POLL_SECONDS)

while not shutdown_event.is_set():
try:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -329,5 +329,5 @@ def test_tunnel_accept_loop_shutdown_event_stops_loop(short_tmp_path: Path) -> N
_wait_for_socket(sock_path, timeout=10.0)

shutdown_event.set()
accept_thread.join(timeout=3.0)
accept_thread.join(timeout=10.0)
assert not accept_thread.is_alive()
Loading
Loading