Skip to content

Commit 8804740

Browse files
alicup29claude
andcommitted
fix: include full metadata in worker re-registration after promotion/demotion
Re-registration messages after admin promotion/demotion were missing worker_name, mounts, hostname, sleap_version, and other metadata that the initial registration includes. This caused two bugs after reconnection: - Worker showed as peer_id instead of name on the dashboard - File browser was frozen/empty because mounts were missing from metadata Extract _build_registration_msg() helper to ensure consistent metadata across initial registration and all re-registrations. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
1 parent 6e2cffb commit 8804740

File tree

1 file changed

+63
-46
lines changed

1 file changed

+63
-46
lines changed

sleap_rtc/worker/mesh_coordinator.py

Lines changed: 63 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -108,6 +108,67 @@ def __init__(
108108
# Non-admin WebSocket handler task (for post-demotion signaling)
109109
self._non_admin_handler_task: Optional[asyncio.Task] = None
110110

111+
def _build_registration_msg(self, is_admin: bool) -> dict:
112+
"""Build a registration message with full worker metadata.
113+
114+
Used for re-registration after promotion/demotion to ensure the
115+
signaling server has complete metadata (name, mounts, GPU info, etc.).
116+
"""
117+
import socket
118+
119+
try:
120+
import sleap
121+
122+
sleap_version = sleap.__version__
123+
except (ImportError, AttributeError):
124+
sleap_version = "unknown"
125+
126+
try:
127+
from sleap_rtc.worker.worker_class import _get_sleap_nn_version
128+
129+
sleap_nn_version = _get_sleap_nn_version()
130+
except Exception:
131+
sleap_nn_version = "unknown"
132+
133+
mounts = []
134+
if hasattr(self.worker, "file_manager") and self.worker.file_manager:
135+
mounts = self.worker.file_manager.get_mounts()
136+
137+
return {
138+
"type": "register",
139+
"peer_id": self.worker.peer_id,
140+
"room_id": self.worker.room_id,
141+
"token": self.worker.room_token,
142+
"api_key": self.worker.api_key,
143+
"role": "worker",
144+
"is_admin": is_admin,
145+
"metadata": {
146+
"tags": [
147+
"sleap-rtc",
148+
"training-worker",
149+
"inference-worker",
150+
],
151+
"properties": {
152+
"gpu_memory_mb": self.worker.gpu_memory_mb,
153+
"gpu_model": self.worker.gpu_model,
154+
"sleap_version": sleap_version,
155+
"sleap_nn_version": sleap_nn_version,
156+
"cuda_version": self.worker.cuda_version,
157+
"hostname": socket.gethostname(),
158+
"worker_name": self.worker.name,
159+
"status": self.worker.status,
160+
"max_concurrent_jobs": getattr(
161+
self.worker, "max_concurrent_jobs", 1
162+
),
163+
"supported_models": getattr(self.worker, "supported_models", []),
164+
"supported_job_types": getattr(
165+
self.worker, "supported_job_types", []
166+
),
167+
"mounts": mounts,
168+
},
169+
},
170+
}
171+
111172
async def initialize(self, websocket: "ClientConnection", dns: str):
112173
"""Initialize mesh coordinator with initial WebSocket connection.
113174
@@ -1696,29 +1757,7 @@ async def on_admin_promotion(self):
16961757

16971758
# Re-register with server as admin (include full metadata for discovery)
16981759
await self.websocket.send(
1699-
json.dumps(
1700-
{
1701-
"type": "register",
1702-
"peer_id": self.worker.peer_id,
1703-
"room_id": self.worker.room_id,
1704-
"token": self.worker.room_token,
1705-
"api_key": self.worker.api_key,
1706-
"role": "worker",
1707-
"is_admin": True, # Signal admin status
1708-
"metadata": {
1709-
"tags": [
1710-
"sleap-rtc",
1711-
"training-worker",
1712-
"inference-worker",
1713-
],
1714-
"properties": {
1715-
"gpu_memory_mb": self.worker.gpu_memory_mb,
1716-
"gpu_model": self.worker.gpu_model,
1717-
"status": self.worker.status,
1718-
},
1719-
},
1720-
}
1721-
)
1760+
json.dumps(self._build_registration_msg(is_admin=True))
17221761
)
17231762

17241763
logger.info("Admin WebSocket reconnected")
@@ -1779,29 +1818,7 @@ async def on_admin_demotion(self):
17791818

17801819
# 4. Re-register as non-admin worker
17811820
await self.websocket.send(
1782-
json.dumps(
1783-
{
1784-
"type": "register",
1785-
"peer_id": self.worker.peer_id,
1786-
"room_id": self.worker.room_id,
1787-
"token": self.worker.room_token,
1788-
"api_key": self.worker.api_key,
1789-
"role": "worker",
1790-
"is_admin": False, # Not admin anymore
1791-
"metadata": {
1792-
"tags": [
1793-
"sleap-rtc",
1794-
"training-worker",
1795-
"inference-worker",
1796-
],
1797-
"properties": {
1798-
"gpu_memory_mb": self.worker.gpu_memory_mb,
1799-
"gpu_model": self.worker.gpu_model,
1800-
"status": self.worker.status,
1801-
},
1802-
},
1803-
}
1804-
)
1821+
json.dumps(self._build_registration_msg(is_admin=False))
18051822
)
18061823
logger.info("Re-registered as non-admin worker")
18071824

0 commit comments

Comments
 (0)