@@ -3,6 +3,7 @@ import configparser
33import logging
44import re
55import sys
6+ from collections import Counter
67from importlib .metadata import PackageNotFoundError
78from importlib .metadata import version
89from multiprocessing import Process
@@ -157,21 +158,31 @@ def run_event_listener():
157158 decode = True ,
158159 filters = {"scope" : "swarm" , "type" : "node" },
159160 )
161+
160162 for event in events :
163+ nodes : Sequence [Node ] = list (client .nodes .list ())
164+ logger .warning ([node .attrs for node in nodes ])
165+ hostname2count : Mapping [str , int ] = Counter ([node .attrs ["Description" ]["Hostname" ] for node in nodes ])
166+ logger .warning (f"{ hostname2count = } " )
167+
161168 # {'Type': 'node', 'Action': 'update',
162169 # 'Actor': {'ID': 'pou5nqcitq0y0x1wmhrrwdn8q', 'Attributes': {'name': __main__: 124
163170 # 'cm-shannon', 'state.new': 'down', 'state.old': 'ready'}}, 'scope': 'swarm', 'time': 1650372820, 'timeNano':
164171 # 1650372820734862390}
165172 logger .debug (f"[Swarm Event]: { event } " )
166173
167- try :
168- actor : dict = event ["Actor" ]
169- attrs : dict = actor ["Attributes" ]
170- if attrs .get ("state.new" ) == "down" :
171- node : Node = client .nodes .get (node_id = actor ["ID" ])
172- node .remove (force = True )
173- except Exception as e :
174- logger .error (f"Error with events: { e } " )
174+ # TODO: remove nodes with same hostname when state.new == down or state.now == ready.
175+ # This can only happen when a `docker swarm leave` is issued on the worker side
176+
177+ # try:
178+ # actor: dict = event["Actor"]
179+ # attrs: dict = actor["Attributes"]
180+ # if attrs.get("state.new") == "down":
181+ # node: Node = client.nodes.get(node_id=actor["ID"])
182+ # if hostname2count.get(node.attrs["Description"]["Hostname"], 0) > 1:
183+ # node.remove(force=True)
184+ # except Exception as e:
185+ # logger.error(f"Error with events: {e}")
175186 events .close () # TODO: where/when?
176187
177188
@@ -183,6 +194,7 @@ def _init_manager(
183194 rest_host : str = typer .Option ("0.0.0.0" , prompt = True , envvar = "MANAGER_REST_HOST" ),
184195 owner_id : str = typer .Option (..., prompt = True , envvar = "OWNER_ID" ),
185196 db_path : str = typer .Option (..., prompt = True , envvar = "BEERS_DB_PATH" ),
197+ advertise_addr : str = typer .Option (default = "tun0" , prompt = True ),
186198 hostname : str = typer .Option (None , prompt = False , envvar = "HOSTNAME" ),
187199 #
188200 traefik : bool = typer .Option (False , prompt = True , envvar = "USE_TRAEFIK" ),
@@ -207,7 +219,7 @@ def _init_manager(
207219 client .swarm .leave (force = True )
208220
209221 _ : str = client .swarm .init (
210- # advertise_addr="tun0" ,
222+ advertise_addr = advertise_addr ,
211223 listen_addr = f"{ ip } :{ swarm_port } " ,
212224 force_new_cluster = swarm_reset ,
213225 # default_addr_pool=["10.43.0.0/16"],
@@ -252,6 +264,7 @@ def _init_worker(
252264 manager_swarm_port : int = typer .Option (..., prompt = True ),
253265 manager_rest_port : int = typer .Option (..., prompt = True ),
254266 local_nfs_root : str = typer .Option (default = None , prompt = False ),
267+ advertise_addr : str = typer .Option (default = "tun0" , prompt = True ),
255268 token : str = typer .Option (..., prompt = True ),
256269 protocol : str = typer .Argument ("http" ),
257270):
@@ -270,7 +283,13 @@ def _init_worker(
270283 # TODO: we could check via client.info()
271284 pass
272285
273- join_result : bool = client .swarm .join (remote_addrs = [manager_url ], join_token = token )
286+ join_result : bool = client .swarm .join (
287+ remote_addrs = [manager_url ],
288+ join_token = token ,
289+ # data_path_addr="tun0",
290+ # listen_addr="",
291+ advertise_addr = advertise_addr ,
292+ )
274293 if join_result :
275294 worker_model : WorkerModel = build_worker_specs (local_nfs_root = local_nfs_root )
276295 # TODO: let users confirm gathered specs?
0 commit comments