Skip to content

Commit b40036b

Browse files
authored
Merge pull request ceph#60800 from rkachach/fix_issue_deps_refactoring
mgr/cephadm: Refactoring and consolidating services dependencies calculation logic Reviewed-by: Adam King <[email protected]>
2 parents 31101e1 + 1280f01 commit b40036b

File tree

11 files changed

+225
-210
lines changed

11 files changed

+225
-210
lines changed

src/pybind/mgr/cephadm/inventory.py

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1338,11 +1338,16 @@ def get_related_service_daemons(self, service_spec: ServiceSpec) -> Optional[Lis
13381338

13391339
def get_daemons_by_type(self, service_type: str, host: str = '') -> List[orchestrator.DaemonDescription]:
13401340
assert service_type not in ['keepalived', 'haproxy']
1341-
13421341
daemons = self.daemons[host].values() if host else self._get_daemons()
1343-
13441342
return [d for d in daemons if d.daemon_type in service_to_daemon_types(service_type)]
13451343

1344+
def get_daemons_by_types(self, daemon_types: List[str]) -> List[str]:
1345+
daemon_names = []
1346+
for daemon_type in daemon_types:
1347+
for dd in self.get_daemons_by_type(daemon_type):
1348+
daemon_names.append(dd.name())
1349+
return daemon_names
1350+
13461351
def get_daemon_types(self, hostname: str) -> Set[str]:
13471352
"""Provide a list of the types of daemons on the host"""
13481353
return cast(Set[str], {d.daemon_type for d in self.daemons[hostname].values()})

src/pybind/mgr/cephadm/module.py

Lines changed: 4 additions & 119 deletions
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,7 @@
3636
from ceph.deployment.service_spec import \
3737
ServiceSpec, PlacementSpec, \
3838
HostPlacementSpec, IngressSpec, \
39-
TunedProfileSpec, IscsiServiceSpec, \
39+
TunedProfileSpec, \
4040
MgmtGatewaySpec
4141
from ceph.utils import str_to_datetime, datetime_to_str, datetime_now
4242
from cephadm.serve import CephadmServe
@@ -2931,124 +2931,9 @@ def _calc_daemon_deps(self,
29312931
spec: Optional[ServiceSpec],
29322932
daemon_type: str,
29332933
daemon_id: str) -> List[str]:
2934-
2935-
def get_daemon_names(daemons: List[str]) -> List[str]:
2936-
daemon_names = []
2937-
for daemon_type in daemons:
2938-
for dd in self.cache.get_daemons_by_type(daemon_type):
2939-
daemon_names.append(dd.name())
2940-
return daemon_names
2941-
2942-
prom_cred_hash = None
2943-
alertmgr_cred_hash = None
2944-
security_enabled, mgmt_gw_enabled, _ = self._get_security_config()
2945-
if security_enabled:
2946-
alertmanager_user, alertmanager_password = self._get_alertmanager_credentials()
2947-
prometheus_user, prometheus_password = self._get_prometheus_credentials()
2948-
if prometheus_user and prometheus_password:
2949-
prom_cred_hash = f'{utils.md5_hash(prometheus_user + prometheus_password)}'
2950-
if alertmanager_user and alertmanager_password:
2951-
alertmgr_cred_hash = f'{utils.md5_hash(alertmanager_user + alertmanager_password)}'
2952-
2953-
deps = []
2954-
if daemon_type == 'haproxy':
2955-
# because cephadm creates new daemon instances whenever
2956-
# port or ip changes, identifying daemons by name is
2957-
# sufficient to detect changes.
2958-
if not spec:
2959-
return []
2960-
ingress_spec = cast(IngressSpec, spec)
2961-
assert ingress_spec.backend_service
2962-
daemons = self.cache.get_daemons_by_service(ingress_spec.backend_service)
2963-
deps = [d.name() for d in daemons]
2964-
elif daemon_type == 'keepalived':
2965-
# because cephadm creates new daemon instances whenever
2966-
# port or ip changes, identifying daemons by name is
2967-
# sufficient to detect changes.
2968-
if not spec:
2969-
return []
2970-
daemons = self.cache.get_daemons_by_service(spec.service_name())
2971-
deps = [d.name() for d in daemons if d.daemon_type == 'haproxy']
2972-
elif daemon_type == 'agent':
2973-
root_cert = ''
2974-
server_port = ''
2975-
try:
2976-
server_port = str(self.http_server.agent.server_port)
2977-
root_cert = self.cert_mgr.get_root_ca()
2978-
except Exception:
2979-
pass
2980-
deps = sorted([self.get_mgr_ip(), server_port, root_cert,
2981-
str(self.device_enhanced_scan)])
2982-
elif daemon_type == 'node-proxy':
2983-
root_cert = ''
2984-
server_port = ''
2985-
try:
2986-
server_port = str(self.http_server.agent.server_port)
2987-
root_cert = self.cert_mgr.get_root_ca()
2988-
except Exception:
2989-
pass
2990-
deps = sorted([self.get_mgr_ip(), server_port, root_cert])
2991-
elif daemon_type == 'iscsi':
2992-
if spec:
2993-
iscsi_spec = cast(IscsiServiceSpec, spec)
2994-
deps = [self.iscsi_service.get_trusted_ips(iscsi_spec)]
2995-
else:
2996-
deps = [self.get_mgr_ip()]
2997-
elif daemon_type == 'prometheus':
2998-
if not mgmt_gw_enabled:
2999-
# for prometheus we add the active mgr as an explicit dependency,
3000-
# this way we force a redeploy after a mgr failover
3001-
deps.append(self.get_active_mgr().name())
3002-
deps.append(str(self.get_module_option_ex('prometheus', 'server_port', 9283)))
3003-
deps.append(str(self.service_discovery_port))
3004-
# prometheus yaml configuration file (generated by prometheus.yml.j2) contains
3005-
# a scrape_configs section for each service type. This should be included only
3006-
# when at least one daemon of the corresponding service is running. Therefore,
3007-
# an explicit dependency is added for each service-type to force a reconfig
3008-
# whenever the number of daemons for those service-type changes from 0 to greater
3009-
# than zero and vice versa.
3010-
deps += [s for s in ['node-exporter', 'alertmanager']
3011-
if self.cache.get_daemons_by_service(s)]
3012-
if len(self.cache.get_daemons_by_type('ingress')) > 0:
3013-
deps.append('ingress')
3014-
# add dependency on ceph-exporter daemons
3015-
deps += [d.name() for d in self.cache.get_daemons_by_service('ceph-exporter')]
3016-
deps += [d.name() for d in self.cache.get_daemons_by_service('mgmt-gateway')]
3017-
deps += [d.name() for d in self.cache.get_daemons_by_service('oauth2-proxy')]
3018-
if prom_cred_hash is not None:
3019-
deps.append(prom_cred_hash)
3020-
if alertmgr_cred_hash is not None:
3021-
deps.append(alertmgr_cred_hash)
3022-
elif daemon_type == 'grafana':
3023-
deps += get_daemon_names(['prometheus', 'loki', 'mgmt-gateway', 'oauth2-proxy'])
3024-
if prom_cred_hash is not None:
3025-
deps.append(prom_cred_hash)
3026-
elif daemon_type == 'alertmanager':
3027-
deps += get_daemon_names(['alertmanager', 'snmp-gateway', 'mgmt-gateway', 'oauth2-proxy'])
3028-
if not mgmt_gw_enabled:
3029-
deps += get_daemon_names(['mgr'])
3030-
if alertmgr_cred_hash is not None:
3031-
deps.append(alertmgr_cred_hash)
3032-
elif daemon_type == 'promtail':
3033-
deps += get_daemon_names(['loki'])
3034-
elif daemon_type in ['ceph-exporter', 'node-exporter']:
3035-
deps += get_daemon_names(['mgmt-gateway'])
3036-
elif daemon_type == JaegerAgentService.TYPE:
3037-
for dd in self.cache.get_daemons_by_type(JaegerCollectorService.TYPE):
3038-
assert dd.hostname is not None
3039-
port = dd.ports[0] if dd.ports else JaegerCollectorService.DEFAULT_SERVICE_PORT
3040-
deps.append(build_url(host=dd.hostname, port=port).lstrip('/'))
3041-
deps = sorted(deps)
3042-
elif daemon_type == 'mgmt-gateway':
3043-
deps = MgmtGatewayService.get_dependencies(self)
3044-
else:
3045-
# this daemon type doesn't need deps mgmt
3046-
pass
3047-
3048-
if daemon_type in ['prometheus', 'node-exporter', 'alertmanager', 'grafana',
3049-
'ceph-exporter']:
3050-
deps.append(f'secure_monitoring_stack:{self.secure_monitoring_stack}')
3051-
2934+
svc_type = daemon_type_to_service(daemon_type)
2935+
svc_cls = self.cephadm_services.get(svc_type, None)
2936+
deps = svc_cls.get_dependencies(self, spec, daemon_type) if svc_cls else []
30522937
return sorted(deps)
30532938

30542939
@forall_hosts

src/pybind/mgr/cephadm/serve.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1129,8 +1129,8 @@ def _check_daemons(self) -> None:
11291129
dd.name()))
11301130
action = 'reconfig'
11311131
elif last_deps != deps:
1132-
self.log.debug(f'{dd.name()} deps {last_deps} -> {deps}')
1133-
self.log.info(f'Reconfiguring {dd.name()} (dependencies changed)...')
1132+
sym_diff = set(deps).symmetric_difference(last_deps)
1133+
self.log.info(f'Reconfiguring {dd.name()} deps {last_deps} -> {deps} (diff {sym_diff})')
11341134
action = 'reconfig'
11351135
# we need only redeploy if secure_monitoring_stack or mgmt-gateway value has changed:
11361136
# TODO(redo): check if we should just go always with redeploy (it's fast enough)

src/pybind/mgr/cephadm/services/cephadmservice.py

Lines changed: 31 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -265,6 +265,12 @@ class CephadmService(metaclass=ABCMeta):
265265
def TYPE(self) -> str:
266266
pass
267267

268+
@classmethod
269+
def get_dependencies(cls, mgr: "CephadmOrchestrator",
270+
spec: Optional[ServiceSpec] = None,
271+
daemon_type: Optional[str] = None) -> List[str]:
272+
return []
273+
268274
def __init__(self, mgr: "CephadmOrchestrator"):
269275
self.mgr: "CephadmOrchestrator" = mgr
270276

@@ -576,6 +582,7 @@ def ignore_possible_stray(
576582

577583

578584
class CephService(CephadmService):
585+
579586
def generate_config(self, daemon_spec: CephadmDaemonDeploySpec) -> Tuple[Dict[str, Any], List[str]]:
580587
# Ceph.daemons (mon, mgr, mds, osd, etc)
581588
cephadm_config = self.get_config_and_keyring(
@@ -1301,6 +1308,15 @@ class CephExporterService(CephService):
13011308
TYPE = 'ceph-exporter'
13021309
DEFAULT_SERVICE_PORT = 9926
13031310

1311+
@classmethod
1312+
def get_dependencies(cls, mgr: "CephadmOrchestrator",
1313+
spec: Optional[ServiceSpec] = None,
1314+
daemon_type: Optional[str] = None) -> List[str]:
1315+
1316+
deps = [f'secure_monitoring_stack:{mgr.secure_monitoring_stack}']
1317+
deps += mgr.cache.get_daemons_by_types(['mgmt-gateway'])
1318+
return sorted(deps)
1319+
13041320
def prepare_create(self, daemon_spec: CephadmDaemonDeploySpec) -> CephadmDaemonDeploySpec:
13051321
assert self.TYPE == daemon_spec.daemon_type
13061322
spec = cast(CephExporterSpec, self.mgr.spec_store[daemon_spec.service_name].spec)
@@ -1330,11 +1346,7 @@ def prepare_create(self, daemon_spec: CephadmDaemonDeploySpec) -> CephadmDaemonD
13301346
daemon_spec.keyring = keyring
13311347
daemon_spec.final_config, daemon_spec.deps = self.generate_config(daemon_spec)
13321348
daemon_spec.final_config = merge_dicts(daemon_spec.final_config, exporter_config)
1333-
1334-
deps = []
1335-
deps += [d.name() for d in self.mgr.cache.get_daemons_by_service('mgmt-gateway')]
1336-
deps += [f'secure_monitoring_stack:{self.mgr.secure_monitoring_stack}']
1337-
daemon_spec.deps = deps
1349+
daemon_spec.deps = self.get_dependencies(self.mgr)
13381350

13391351
return daemon_spec
13401352

@@ -1379,6 +1391,20 @@ def prepare_create(self, daemon_spec: CephadmDaemonDeploySpec) -> CephadmDaemonD
13791391
class CephadmAgent(CephService):
13801392
TYPE = 'agent'
13811393

1394+
@classmethod
1395+
def get_dependencies(cls, mgr: "CephadmOrchestrator",
1396+
spec: Optional[ServiceSpec] = None,
1397+
daemon_type: Optional[str] = None) -> List[str]:
1398+
agent = mgr.http_server.agent
1399+
return sorted(
1400+
[
1401+
str(mgr.get_mgr_ip()),
1402+
str(agent.server_port),
1403+
mgr.cert_mgr.get_root_ca(),
1404+
str(mgr.get_module_option("device_enhanced_scan")),
1405+
]
1406+
)
1407+
13821408
def prepare_create(self, daemon_spec: CephadmDaemonDeploySpec) -> CephadmDaemonDeploySpec:
13831409
assert self.TYPE == daemon_spec.daemon_type
13841410
daemon_id, host = daemon_spec.daemon_id, daemon_spec.host

src/pybind/mgr/cephadm/services/ingress.py

Lines changed: 38 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -2,21 +2,34 @@
22
import logging
33
import random
44
import string
5-
from typing import List, Dict, Any, Tuple, cast, Optional
5+
from typing import List, Dict, Any, Tuple, cast, Optional, TYPE_CHECKING
66

77
from ceph.deployment.service_spec import ServiceSpec, IngressSpec
88
from mgr_util import build_url
99
from cephadm import utils
1010
from orchestrator import OrchestratorError, DaemonDescription
1111
from cephadm.services.cephadmservice import CephadmDaemonDeploySpec, CephService
1212

13+
if TYPE_CHECKING:
14+
from ..module import CephadmOrchestrator
15+
1316
logger = logging.getLogger(__name__)
1417

1518

1619
class IngressService(CephService):
1720
TYPE = 'ingress'
1821
MAX_KEEPALIVED_PASS_LEN = 8
1922

23+
@classmethod
24+
def get_dependencies(cls, mgr: "CephadmOrchestrator",
25+
spec: Optional[ServiceSpec] = None,
26+
daemon_type: Optional[str] = None) -> List[str]:
27+
if daemon_type == 'haproxy':
28+
return IngressService.get_haproxy_dependencies(mgr, spec)
29+
elif daemon_type == 'keepalived':
30+
return IngressService.get_keepalived_dependencies(mgr, spec)
31+
return []
32+
2033
def primary_daemon_type(self, spec: Optional[ServiceSpec] = None) -> str:
2134
if spec:
2235
ispec = cast(IngressSpec, spec)
@@ -75,6 +88,18 @@ def haproxy_prepare_create(
7588

7689
return daemon_spec
7790

91+
@staticmethod
92+
def get_haproxy_dependencies(mgr: "CephadmOrchestrator", spec: Optional[ServiceSpec]) -> List[str]:
93+
# because cephadm creates new daemon instances whenever
94+
# port or ip changes, identifying daemons by name is
95+
# sufficient to detect changes.
96+
if not spec:
97+
return []
98+
ingress_spec = cast(IngressSpec, spec)
99+
assert ingress_spec.backend_service
100+
daemons = mgr.cache.get_daemons_by_service(ingress_spec.backend_service)
101+
return sorted([d.name() for d in daemons])
102+
78103
def haproxy_generate_config(
79104
self,
80105
daemon_spec: CephadmDaemonDeploySpec,
@@ -86,7 +111,6 @@ def haproxy_generate_config(
86111
f'{spec.service_name()} backend service {spec.backend_service} does not exist')
87112
backend_spec = self.mgr.spec_store[spec.backend_service].spec
88113
daemons = self.mgr.cache.get_daemons_by_service(spec.backend_service)
89-
deps = [d.name() for d in daemons]
90114

91115
# generate password?
92116
pw_key = f'{spec.service_name()}/monitor_password'
@@ -201,7 +225,7 @@ def haproxy_generate_config(
201225
ssl_cert = '\n'.join(ssl_cert)
202226
config_files['files']['haproxy.pem'] = ssl_cert
203227

204-
return config_files, sorted(deps)
228+
return config_files, self.get_haproxy_dependencies(self.mgr, spec)
205229

206230
def keepalived_prepare_create(
207231
self,
@@ -220,6 +244,16 @@ def keepalived_prepare_create(
220244

221245
return daemon_spec
222246

247+
@staticmethod
248+
def get_keepalived_dependencies(mgr: "CephadmOrchestrator", spec: Optional[ServiceSpec]) -> List[str]:
249+
# because cephadm creates new daemon instances whenever
250+
# port or ip changes, identifying daemons by name is
251+
# sufficient to detect changes.
252+
if not spec:
253+
return []
254+
daemons = mgr.cache.get_daemons_by_service(spec.service_name())
255+
return sorted([d.name() for d in daemons if d.daemon_type == 'haproxy'])
256+
223257
def keepalived_generate_config(
224258
self,
225259
daemon_spec: CephadmDaemonDeploySpec,
@@ -252,8 +286,6 @@ def keepalived_generate_config(
252286
raise OrchestratorError(
253287
f'Failed to generate keepalived.conf: No daemons deployed for {spec.service_name()}')
254288

255-
deps = sorted([d.name() for d in daemons if d.daemon_type == 'haproxy'])
256-
257289
host = daemon_spec.host
258290
hosts = sorted(list(set([host] + [str(d.hostname) for d in daemons])))
259291

@@ -394,4 +426,4 @@ def _get_valid_interface_and_ip(vip: str, host: str) -> Tuple[str, str]:
394426
}
395427
}
396428

397-
return config_file, deps
429+
return config_file, self.get_keepalived_dependencies(self.mgr, spec)

0 commit comments

Comments
 (0)