Skip to content

Commit 33e1b11

Browse files
mgr/cephadm: extend stray service detection with a general ignore hook
Extend the system's current stray service detection with a new method on the service classes so that new classes can hook into the stray services in the case that ceph services and cephadm services have differing names or use subsystems that call into ceph with different names (my use case). Signed-off-by: John Mulligan <[email protected]>
1 parent d84c7b3 commit 33e1b11

File tree

2 files changed

+39
-6
lines changed

2 files changed

+39
-6
lines changed

src/pybind/mgr/cephadm/serve.py

Lines changed: 29 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -482,7 +482,9 @@ def _check_for_strays(self) -> None:
482482
if self.mgr.warn_on_stray_hosts or self.mgr.warn_on_stray_daemons:
483483
ls = self.mgr.list_servers()
484484
self.log.debug(ls)
485-
managed = self.mgr.cache.get_daemon_names()
485+
managed_daemons = self.mgr.cache.get_daemons()
486+
stray_filter = self._build_stray_filter(managed_daemons)
487+
managed = [d.name() for d in managed_daemons]
486488
host_detail = [] # type: List[str]
487489
host_num_daemons = 0
488490
daemon_detail = [] # type: List[str]
@@ -496,11 +498,7 @@ def _check_for_strays(self) -> None:
496498
daemon_id = s.get('id')
497499
assert daemon_id
498500
name = self._service_reference_name(s.get('type'), daemon_id)
499-
if s.get('type') == 'tcmu-runner':
500-
# because we don't track tcmu-runner daemons in the host cache
501-
# and don't have a way to check if the daemon is part of iscsi service
502-
# we assume that all tcmu-runner daemons are managed by cephadm
503-
managed.append(name)
501+
managed.extend(stray_filter(s.get('type'), daemon_id, name))
504502
# Don't mark daemons we just created/removed in the last minute as stray.
505503
# It may take some time for the mgr to become aware the daemon
506504
# had been created/removed.
@@ -544,6 +542,31 @@ def _service_reference_name(self, service_type: str, daemon_id: str) -> str:
544542
)
545543
return name
546544

545+
def _build_stray_filter(
546+
self, managed: List[orchestrator.DaemonDescription]
547+
) -> Callable[[str, str, str], List[str]]:
548+
svcs = {
549+
daemon_type_to_service(cast(str, dd.daemon_type))
550+
for dd in managed
551+
}
552+
_services = [self.mgr.cephadm_services[dt] for dt in svcs]
553+
554+
def _filter(
555+
service_type: str, daemon_id: str, name: str
556+
) -> List[str]:
557+
if service_type == 'tcmu-runner':
558+
# because we don't track tcmu-runner daemons in the host cache
559+
# and don't have a way to check if the daemon is part of iscsi service
560+
# we assume that all tcmu-runner daemons are managed by cephadm
561+
return [name]
562+
out = []
563+
for svc in _services:
564+
if svc.ignore_possible_stray(service_type, daemon_id, name):
565+
out.append(name)
566+
return out
567+
568+
return _filter
569+
547570
def _check_for_moved_osds(self) -> None:
548571
self.log.debug('_check_for_moved_osds')
549572
all_osds: DefaultDict[int, List[orchestrator.DaemonDescription]] = defaultdict(list)

src/pybind/mgr/cephadm/services/cephadmservice.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -564,6 +564,16 @@ def purge(self, service_name: str) -> None:
564564
"""Called to carry out any purge tasks following service removal"""
565565
logger.debug(f'Purge called for {self.TYPE} - no action taken')
566566

567+
def ignore_possible_stray(
568+
self, service_type: str, daemon_id: str, name: str
569+
) -> bool:
570+
"""Called to decide if a possible stray service should be ignored
571+
because it "virtually" belongs to a service.
572+
This is mainly needed when properly managed services spawn layered ceph
573+
services with different names (for example).
574+
"""
575+
return False
576+
567577

568578
class CephService(CephadmService):
569579
def generate_config(self, daemon_spec: CephadmDaemonDeploySpec) -> Tuple[Dict[str, Any], List[str]]:

0 commit comments

Comments
 (0)