Skip to content

Commit 29656dc

Browse files
authored
Merge pull request ceph#61824 from ShwetaBhosale1/fix_issue_69942_prometheus_not_adding_host_url_in_mgr_when_first_enabled
mgr/cephadm: Deploying prometheus service for the first time, does not update PROMETHEUS_API_HOST url under mgr module Reviewed-by: Adam King <[email protected]>
2 parents 55d25da + f93da5a commit 29656dc

File tree

6 files changed

+48
-56
lines changed

6 files changed

+48
-56
lines changed

src/pybind/mgr/cephadm/module.py

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,7 @@
4545
)
4646
from ceph.deployment.drive_group import DeviceSelection
4747
from ceph.utils import str_to_datetime, datetime_to_str, datetime_now
48-
from cephadm.serve import CephadmServe
48+
from cephadm.serve import CephadmServe, REQUIRES_POST_ACTIONS
4949
from cephadm.services.cephadmservice import CephadmDaemonDeploySpec
5050
from cephadm.http_server import CephadmHttpServer
5151
from cephadm.agent import CephadmAgentHelpers
@@ -665,7 +665,6 @@ def __init__(self, *args: Any, **kwargs: Any):
665665

666666
self.template = TemplateMgr(self)
667667

668-
self.requires_post_actions: Set[str] = set()
669668
self.need_connect_dashboard_rgw = False
670669

671670
self.config_checker = CephadmConfigChecks(self)
@@ -1001,6 +1000,7 @@ def _as_datetime(value: Optional[str]) -> Optional[datetime.datetime]:
10011000
'error': DaemonDescriptionStatus.error,
10021001
'unknown': DaemonDescriptionStatus.error,
10031002
}[d['state']]
1003+
10041004
sd = orchestrator.DaemonDescription(
10051005
daemon_type=daemon_type,
10061006
daemon_id='.'.join(d['name'].split('.')[1:]),
@@ -1031,6 +1031,15 @@ def _as_datetime(value: Optional[str]) -> Optional[datetime.datetime]:
10311031
extra_container_args=d.get('extra_container_args'),
10321032
extra_entrypoint_args=d.get('extra_entrypoint_args'),
10331033
)
1034+
1035+
if daemon_type in REQUIRES_POST_ACTIONS:
1036+
# If post action is required for daemon, then restore value of pending_daemon_config
1037+
try:
1038+
cached_dd = self.cache.get_daemon(sd.name(), host)
1039+
sd.update_pending_daemon_config(cached_dd.pending_daemon_config)
1040+
except orchestrator.OrchestratorError:
1041+
pass
1042+
10341043
dm[sd.name()] = sd
10351044
self.log.debug('Refreshed host %s daemons (%d)' % (host, len(dm)))
10361045
self.cache.update_host_daemons(host, dm)

src/pybind/mgr/cephadm/serve.py

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1187,8 +1187,11 @@ def _check_daemons(self) -> None:
11871187
for daemon_type, daemon_descs in daemons_post.items():
11881188
run_post = False
11891189
for d in daemon_descs:
1190-
if d.name() in self.mgr.requires_post_actions:
1191-
self.mgr.requires_post_actions.remove(d.name())
1190+
if d.pending_daemon_config:
1191+
assert d.hostname is not None
1192+
cache_dd = self.mgr.cache.get_daemon(d.name(), d.hostname)
1193+
cache_dd.update_pending_daemon_config(False)
1194+
self.mgr.cache.save_host(d.hostname)
11921195
run_post = True
11931196
if run_post:
11941197
service_registry.get_service(daemon_type_to_service(
@@ -1460,9 +1463,10 @@ async def _create_daemon(self,
14601463
# just created
14611464
sd = daemon_spec.to_daemon_description(
14621465
DaemonDescriptionStatus.starting, 'starting')
1463-
self.mgr.cache.add_daemon(daemon_spec.host, sd)
1466+
# If daemon requires post action, then mark pending_daemon_config as true
14641467
if daemon_spec.daemon_type in REQUIRES_POST_ACTIONS:
1465-
self.mgr.requires_post_actions.add(daemon_spec.name())
1468+
sd.update_pending_daemon_config(True)
1469+
self.mgr.cache.add_daemon(daemon_spec.host, sd)
14661470
self.mgr.cache.invalidate_host_daemons(daemon_spec.host)
14671471

14681472
if daemon_spec.daemon_type != 'agent':

src/pybind/mgr/cephadm/tests/test_cephadm.py

Lines changed: 20 additions & 50 deletions
Original file line numberDiff line numberDiff line change
@@ -246,6 +246,7 @@ def remove_id_events(dd):
246246
'status_desc': 'starting',
247247
'is_active': False,
248248
'ports': [],
249+
'pending_daemon_config': False,
249250
}
250251
]
251252

@@ -898,66 +899,35 @@ def test_daemon_check_post(self, cephadm_module: CephadmOrchestrator):
898899
_mon_cmd.assert_any_call(
899900
{'prefix': 'dashboard set-grafana-api-url', 'value': 'https://host_fqdn:3000'},
900901
None)
902+
with with_host(cephadm_module, 'test'):
903+
with with_service(cephadm_module, ServiceSpec(service_type='prometheus'), CephadmOrchestrator.apply_prometheus, 'test'):
904+
with mock.patch("cephadm.module.CephadmOrchestrator.mon_command") as _mon_cmd:
905+
CephadmServe(cephadm_module)._check_daemons()
906+
_mon_cmd.assert_any_call(
907+
{'prefix': 'dashboard set-prometheus-api-host', 'value': 'http://host_fqdn:9095'},
908+
None)
909+
with with_host(cephadm_module, 'test'):
910+
with with_service(cephadm_module, ServiceSpec(service_type='alertmanager'), CephadmOrchestrator.apply_alertmanager, 'test'):
911+
with mock.patch("cephadm.module.CephadmOrchestrator.mon_command") as _mon_cmd:
912+
CephadmServe(cephadm_module)._check_daemons()
913+
_mon_cmd.assert_any_call(
914+
{'prefix': 'dashboard set-alertmanager-api-host', 'value': 'http://host_fqdn:9093'},
915+
None)
901916

902917
@mock.patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm('{}'))
903918
@mock.patch("cephadm.module.CephadmOrchestrator.get_mgr_ip", lambda _: '1.2.3.4')
904-
def test_iscsi_post_actions_with_missing_daemon_in_cache(self, cephadm_module: CephadmOrchestrator):
919+
def test_iscsi_post_actions(self, cephadm_module: CephadmOrchestrator):
905920
# https://tracker.ceph.com/issues/52866
906921
with with_host(cephadm_module, 'test1'):
907922
with with_host(cephadm_module, 'test2'):
908923
with with_service(cephadm_module, IscsiServiceSpec(service_id='foobar', pool='pool', placement=PlacementSpec(host_pattern='*')), CephadmOrchestrator.apply_iscsi, 'test'):
909924

910925
CephadmServe(cephadm_module)._apply_all_services()
911926
assert len(cephadm_module.cache.get_daemons_by_type('iscsi')) == 2
912-
913-
# get a daemons from postaction list (ARRGH sets!!)
914-
tempset = cephadm_module.requires_post_actions.copy()
915-
tempdaemon1 = tempset.pop()
916-
tempdaemon2 = tempset.pop()
917-
918-
# make sure post actions has 2 daemons in it
919-
assert len(cephadm_module.requires_post_actions) == 2
920-
921-
# replicate a host cache that is not in sync when check_daemons is called
922-
tempdd1 = cephadm_module.cache.get_daemon(tempdaemon1)
923-
tempdd2 = cephadm_module.cache.get_daemon(tempdaemon2)
924-
host = 'test1'
925-
if 'test1' not in tempdaemon1:
926-
host = 'test2'
927-
cephadm_module.cache.rm_daemon(host, tempdaemon1)
928-
929-
# Make sure, _check_daemons does a redeploy due to monmap change:
930-
cephadm_module.mock_store_set('_ceph_get', 'mon_map', {
931-
'modified': datetime_to_str(datetime_now()),
932-
'fsid': 'foobar',
933-
})
934-
cephadm_module.notify('mon_map', None)
935-
cephadm_module.mock_store_set('_ceph_get', 'mgr_map', {
936-
'modules': ['dashboard']
937-
})
938-
939-
with mock.patch("cephadm.module.IscsiService.config_dashboard") as _cfg_db:
940-
CephadmServe(cephadm_module)._check_daemons()
941-
_cfg_db.assert_called_once_with([tempdd2])
942-
943-
# post actions still has the other daemon in it and will run next _check_daemons
944-
assert len(cephadm_module.requires_post_actions) == 1
945-
946-
# post actions was missed for a daemon
947-
assert tempdaemon1 in cephadm_module.requires_post_actions
948-
949-
# put the daemon back in the cache
950-
cephadm_module.cache.add_daemon(host, tempdd1)
951-
952-
_cfg_db.reset_mock()
953-
# replicate serve loop running again
954-
CephadmServe(cephadm_module)._check_daemons()
955-
956-
# post actions should have been called again
957-
_cfg_db.asset_called()
958-
959-
# post actions is now empty
960-
assert len(cephadm_module.requires_post_actions) == 0
927+
for host in ['test1', 'test2']:
928+
d = cephadm_module.cache.daemons[host]
929+
for name, dd in d.items():
930+
assert dd.pending_daemon_config is True
961931

962932
@mock.patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm('[]'))
963933
def test_mon_add(self, cephadm_module):

src/pybind/mgr/cephadm/tests/test_spec.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -286,6 +286,7 @@ def convert_to_old_style_json(j):
286286
del j['daemon_name']
287287
return j
288288

289+
dd_json.update({'pending_daemon_config': False})
289290
assert dd_json == convert_to_old_style_json(
290291
DaemonDescription.from_json(dd_json).to_json())
291292

src/pybind/mgr/orchestrator/_interface.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1234,6 +1234,7 @@ def __init__(self,
12341234
rank_generation: Optional[int] = None,
12351235
extra_container_args: Optional[GeneralArgList] = None,
12361236
extra_entrypoint_args: Optional[GeneralArgList] = None,
1237+
pending_daemon_config: bool = False
12371238
) -> None:
12381239

12391240
#: Host is at the same granularity as InventoryHost
@@ -1308,6 +1309,7 @@ def __init__(self,
13081309
if extra_entrypoint_args:
13091310
self.extra_entrypoint_args = ArgumentSpec.from_general_args(
13101311
extra_entrypoint_args)
1312+
self.pending_daemon_config = pending_daemon_config
13111313

13121314
def __setattr__(self, name: str, value: Any) -> None:
13131315
if value is not None and name in ('extra_container_args', 'extra_entrypoint_args'):
@@ -1429,6 +1431,9 @@ def service_name(self) -> str:
14291431
return f'{daemon_type_to_service(self.daemon_type)}.{self.service_id()}'
14301432
return daemon_type_to_service(self.daemon_type)
14311433

1434+
def update_pending_daemon_config(self, value: bool) -> None:
1435+
self.pending_daemon_config = value
1436+
14321437
def __repr__(self) -> str:
14331438
return "<DaemonDescription>({type}.{id})".format(type=self.daemon_type,
14341439
id=self.daemon_id)
@@ -1462,6 +1467,7 @@ def to_json(self) -> dict:
14621467
out['rank'] = self.rank
14631468
out['rank_generation'] = self.rank_generation
14641469
out['systemd_unit'] = self.systemd_unit
1470+
out['pending_daemon_config'] = self.pending_daemon_config
14651471

14661472
for k in ['last_refresh', 'created', 'started', 'last_deployed',
14671473
'last_configured']:
@@ -1499,6 +1505,7 @@ def to_dict(self) -> dict:
14991505
out['ports'] = self.ports
15001506
out['ip'] = self.ip
15011507
out['systemd_unit'] = self.systemd_unit
1508+
out['pending_daemon_config'] = self.pending_daemon_config
15021509

15031510
for k in ['last_refresh', 'created', 'started', 'last_deployed',
15041511
'last_configured']:

src/pybind/mgr/orchestrator/tests/test_orchestrator.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -92,6 +92,7 @@ def test_yaml():
9292
status: 1
9393
status_desc: starting
9494
is_active: false
95+
pending_daemon_config: false
9596
events:
9697
- 2020-06-10T10:08:22.933241Z daemon:crash.ubuntu [INFO] "Deployed crash.ubuntu on
9798
host 'ubuntu'"

0 commit comments

Comments
 (0)