Skip to content

Commit f8c60aa

Browse files
committed
Tree: implement per-gateway abort
This commit adds the functionality to abort a specific gateway channel from the initiator. Until now, this was not properly handled. Changes: * Implement TreeWorker._gateway_abort() that can be used to abort/cancel all tasks being done by the TreeWorker via the specified gateway. In case of such abort (likely due to some gateway failure), a special return code 76 (os.EX_PROTOCOL) is used for closing all running remote commands via this gateway. This return code is sometimes used to specify a "Remote protocol error" / "An error occurred in a remote communication protocol" which seems appropriate here. * Implement a new Task._pchannel_closing() method that is called on PropagationChannel.ev_close(), so deterministically every time a gateway channel is closing (self-initiated or not). This method performs necessary cleanup actions, but most notably calls TreeWorker._gateway_abort(gateway) on each worker currently using the gateway channel. Part of #229 and extended work on #566.
1 parent 7e5e3f6 commit f8c60aa

File tree

3 files changed

+35
-4
lines changed

3 files changed

+35
-4
lines changed

lib/ClusterShell/Propagation.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -424,3 +424,6 @@ def ev_close(self, worker, timedout):
424424
self.logger.debug("channel was not set up: redistributing...")
425425
for mw in set(self.task.gateways[gateway][1]):
426426
mw._relaunch(gateway)
427+
428+
# update Task that we are closing
429+
worker.task._pchannel_close(gateway, worker)

lib/ClusterShell/Task.py

Lines changed: 20 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1365,8 +1365,10 @@ def _pchannel(self, gateway, metaworker):
13651365
def _pchannel_release(self, gateway, metaworker):
13661366
"""Release propagation channel associated to gateway.
13671367
1368-
Lookup by gateway, decref associated metaworker set and release
1369-
channel worker if needed.
1368+
Lookup by gateway, decref associated metaworker set and abort channel
1369+
worker if not used anymore.
1370+
1371+
Called by TreeWorker._check_fini()
13701372
"""
13711373
logger = logging.getLogger(__name__)
13721374
logger.debug("pchannel_release %s %s", gateway, metaworker)
@@ -1384,8 +1386,22 @@ def _pchannel_release(self, gateway, metaworker):
13841386
logger.debug("pchannel_release: destroying channel %s",
13851387
chanworker.eh)
13861388
chanworker.abort()
1387-
# delete gateway reference
1388-
del self.gateways[gwstr]
1389+
1390+
def _pchannel_close(self, gateway, chanworker):
1391+
"""Close propagation channel.
1392+
1393+
Perform necessary cleanup actions when a gateway channel is closing.
1394+
1395+
Called by PropagationChannel.ev_close().
1396+
"""
1397+
logger = logging.getLogger(__name__)
1398+
logger.debug("pchannel_closing: %s", gateway)
1399+
chwrk, metaworkers = self.gateways[gateway]
1400+
assert chwrk is chanworker, (chwrk, chanworker)
1401+
metaworkers_copy = list(metaworkers)
1402+
for mw in metaworkers_copy:
1403+
mw._gateway_abort(gateway)
1404+
del self.gateways[gateway]
13891405

13901406

13911407
def task_self(defaults=None):

lib/ClusterShell/Worker/Tree.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -564,6 +564,18 @@ def set_write_eof(self):
564564

565565
self._set_write_eof_remote()
566566

567+
def _gateway_abort(self, gateway):
568+
"""Abort on gateway failure"""
569+
if gateway not in self.gwtargets:
570+
self.logger.warning("TreeWorker._gateway_abort %s not found",
571+
gateway)
572+
return
573+
targets = self.gwtargets[gateway]
574+
self.logger.debug("TreeWorker._gateway_abort %s found: targets=%s",
575+
gateway, targets)
576+
for target in NodeSet.fromlist(targets): # targets is a mutable list
577+
self._on_remote_node_close(target, os.EX_PROTOCOL, gateway)
578+
567579
def abort(self):
568580
"""Abort processing any action by this worker."""
569581
# Not yet supported by TreeWorker

0 commit comments

Comments
 (0)