Skip to content

Commit 688fe62

Browse files
committed
[*] Fix worker-manager communication via advertise_addr parameter
Signed-off-by: Valentino Maiorca <[email protected]>
1 parent 44cf7e1 commit 688fe62

File tree

2 files changed

+30
-10
lines changed

2 files changed

+30
-10
lines changed

src/beers/scripts/beers

Lines changed: 29 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@ import configparser
33
import logging
44
import re
55
import sys
6+
from collections import Counter
67
from importlib.metadata import PackageNotFoundError
78
from importlib.metadata import version
89
from multiprocessing import Process
@@ -157,21 +158,31 @@ def run_event_listener():
157158
decode=True,
158159
filters={"scope": "swarm", "type": "node"},
159160
)
161+
160162
for event in events:
163+
nodes: Sequence[Node] = list(client.nodes.list())
164+
logger.warning([node.attrs for node in nodes])
165+
hostname2count: Mapping[str, int] = Counter([node.attrs["Description"]["Hostname"] for node in nodes])
166+
logger.warning(f"{hostname2count=}")
167+
161168
# {'Type': 'node', 'Action': 'update',
162169
# 'Actor': {'ID': 'pou5nqcitq0y0x1wmhrrwdn8q', 'Attributes': {'name': __main__: 124
163170
# 'cm-shannon', 'state.new': 'down', 'state.old': 'ready'}}, 'scope': 'swarm', 'time': 1650372820, 'timeNano':
164171
# 1650372820734862390}
165172
logger.debug(f"[Swarm Event]: {event}")
166173

167-
try:
168-
actor: dict = event["Actor"]
169-
attrs: dict = actor["Attributes"]
170-
if attrs.get("state.new") == "down":
171-
node: Node = client.nodes.get(node_id=actor["ID"])
172-
node.remove(force=True)
173-
except Exception as e:
174-
logger.error(f"Error with events: {e}")
174+
# TODO: remove nodes with same hostname when state.new == down or state.now == ready.
175+
# This can only happen when a `docker swarm leave` is issued on the worker side
176+
177+
# try:
178+
# actor: dict = event["Actor"]
179+
# attrs: dict = actor["Attributes"]
180+
# if attrs.get("state.new") == "down":
181+
# node: Node = client.nodes.get(node_id=actor["ID"])
182+
# if hostname2count.get(node.attrs["Description"]["Hostname"], 0) > 1:
183+
# node.remove(force=True)
184+
# except Exception as e:
185+
# logger.error(f"Error with events: {e}")
175186
events.close() # TODO: where/when?
176187

177188

@@ -183,6 +194,7 @@ def _init_manager(
183194
rest_host: str = typer.Option("0.0.0.0", prompt=True, envvar="MANAGER_REST_HOST"),
184195
owner_id: str = typer.Option(..., prompt=True, envvar="OWNER_ID"),
185196
db_path: str = typer.Option(..., prompt=True, envvar="BEERS_DB_PATH"),
197+
advertise_addr: str = typer.Option(default="tun0", prompt=True),
186198
hostname: str = typer.Option(None, prompt=False, envvar="HOSTNAME"),
187199
#
188200
traefik: bool = typer.Option(False, prompt=True, envvar="USE_TRAEFIK"),
@@ -207,7 +219,7 @@ def _init_manager(
207219
client.swarm.leave(force=True)
208220

209221
_: str = client.swarm.init(
210-
# advertise_addr="tun0",
222+
advertise_addr=advertise_addr,
211223
listen_addr=f"{ip}:{swarm_port}",
212224
force_new_cluster=swarm_reset,
213225
# default_addr_pool=["10.43.0.0/16"],
@@ -252,6 +264,7 @@ def _init_worker(
252264
manager_swarm_port: int = typer.Option(..., prompt=True),
253265
manager_rest_port: int = typer.Option(..., prompt=True),
254266
local_nfs_root: str = typer.Option(default=None, prompt=False),
267+
advertise_addr: str = typer.Option(default="tun0", prompt=True),
255268
token: str = typer.Option(..., prompt=True),
256269
protocol: str = typer.Argument("http"),
257270
):
@@ -270,7 +283,13 @@ def _init_worker(
270283
# TODO: we could check via client.info()
271284
pass
272285

273-
join_result: bool = client.swarm.join(remote_addrs=[manager_url], join_token=token)
286+
join_result: bool = client.swarm.join(
287+
remote_addrs=[manager_url],
288+
join_token=token,
289+
# data_path_addr="tun0",
290+
# listen_addr="",
291+
advertise_addr=advertise_addr,
292+
)
274293
if join_result:
275294
worker_model: WorkerModel = build_worker_specs(local_nfs_root=local_nfs_root)
276295
# TODO: let users confirm gathered specs?

src/beers/scripts/worker_setup.sh

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,7 @@ echo "Reloading Docker"
6060
# Reload the Docker daemon
6161
sudo systemctl daemon-reload
6262
sudo systemctl restart docker
63+
sudo systemctl enable --now docker
6364

6465
# NFS setup
6566
if [[ "$NFS_DIR" != 1 ]]; then

0 commit comments

Comments
 (0)