Skip to content

Commit a8498dd

Browse files
committed
mgr/cephadm: fix custom alertmanager webhooks
Previously, we put the custom webhooks in the "default" receiver. This didn't actually work as alertmanager only sends alerts to the default receiver if the alert does not match the following routes. This meant if you wanted alerts sent to the dashboard and also a custom location, you weren't able to do so with the template cephadm provided. This also swaps to using "webhook_urls" instead of "default_webhook_urls" for what should be in the spec, but "default_webhook_urls" is kept working for backwards compatability Fixes: https://tracker.ceph.com/issues/68157 Signed-off-by: Adam King <[email protected]>
1 parent f733a87 commit a8498dd

File tree

5 files changed

+141
-9
lines changed

5 files changed

+141
-9
lines changed

doc/cephadm/services/monitoring.rst

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -598,7 +598,7 @@ webhook urls like so:
598598
service_type: alertmanager
599599
spec:
600600
user_data:
601-
default_webhook_urls:
601+
webhook_urls:
602602
- "https://foo"
603603
- "https://bar"
604604

src/pybind/mgr/cephadm/services/monitoring.py

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -311,7 +311,7 @@ def get_dependencies(cls, mgr: "CephadmOrchestrator",
311311

312312
def generate_config(self, daemon_spec: CephadmDaemonDeploySpec) -> Tuple[Dict[str, Any], List[str]]:
313313
assert self.TYPE == daemon_spec.daemon_type
314-
default_webhook_urls: List[str] = []
314+
webhook_urls: List[str] = []
315315

316316
spec = cast(AlertManagerSpec, self.mgr.spec_store[daemon_spec.service_name].spec)
317317
try:
@@ -321,7 +321,10 @@ def generate_config(self, daemon_spec: CephadmDaemonDeploySpec) -> Tuple[Dict[st
321321
user_data = spec.user_data
322322
if 'default_webhook_urls' in user_data and isinstance(
323323
user_data['default_webhook_urls'], list):
324-
default_webhook_urls.extend(user_data['default_webhook_urls'])
324+
webhook_urls.extend(user_data['default_webhook_urls'])
325+
if 'webhook_urls' in user_data and isinstance(
326+
user_data['webhook_urls'], list):
327+
webhook_urls.extend(user_data['webhook_urls'])
325328

326329
security_enabled, mgmt_gw_enabled, oauth2_enabled = self.mgr._get_security_config()
327330
if mgmt_gw_enabled:
@@ -340,7 +343,7 @@ def generate_config(self, daemon_spec: CephadmDaemonDeploySpec) -> Tuple[Dict[st
340343
context = {
341344
'security_enabled': security_enabled,
342345
'dashboard_urls': dashboard_urls,
343-
'default_webhook_urls': default_webhook_urls,
346+
'webhook_urls': webhook_urls,
344347
'snmp_gateway_urls': snmp_gateway_urls,
345348
'secure': secure,
346349
}

src/pybind/mgr/cephadm/templates/services/alertmanager/alertmanager.yml.j2

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,14 @@ route:
2323
group_interval: 10s
2424
repeat_interval: 1h
2525
receiver: 'ceph-dashboard'
26+
{% if webhook_urls %}
27+
continue: true
28+
- group_by: ['alertname']
29+
group_wait: 10s
30+
group_interval: 10s
31+
repeat_interval: 1h
32+
receiver: 'custom-receiver'
33+
{% endif %}
2634
{% if snmp_gateway_urls %}
2735
continue: true
2836
- receiver: 'snmp-gateway'
@@ -36,7 +44,9 @@ route:
3644
receivers:
3745
- name: 'default'
3846
webhook_configs:
39-
{% for url in default_webhook_urls %}
47+
- name: 'custom-receiver'
48+
webhook_configs:
49+
{% for url in webhook_urls %}
4050
- url: '{{ url }}'
4151
{% endfor %}
4252
- name: 'ceph-dashboard'

src/pybind/mgr/cephadm/tests/test_services.py

Lines changed: 119 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -564,6 +564,8 @@ def _get_config(self, url: str) -> str:
564564
receivers:
565565
- name: 'default'
566566
webhook_configs:
567+
- name: 'custom-receiver'
568+
webhook_configs:
567569
- name: 'ceph-dashboard'
568570
webhook_configs:
569571
- url: '{url}/api/prometheus_receiver'
@@ -711,6 +713,8 @@ def test_alertmanager_config_when_mgmt_gw_enabled(self, _get_fqdn, _run_cephadm,
711713
receivers:
712714
- name: 'default'
713715
webhook_configs:
716+
- name: 'custom-receiver'
717+
webhook_configs:
714718
- name: 'ceph-dashboard'
715719
webhook_configs:
716720
- url: 'https://host_fqdn:29443/internal/dashboard/api/prometheus_receiver'
@@ -810,6 +814,8 @@ def test_alertmanager_config_security_enabled(self, _get_fqdn, _run_cephadm, cep
810814
receivers:
811815
- name: 'default'
812816
webhook_configs:
817+
- name: 'custom-receiver'
818+
webhook_configs:
813819
- name: 'ceph-dashboard'
814820
webhook_configs:
815821
- url: 'http://{fqdn}:8080/api/prometheus_receiver'
@@ -864,6 +870,119 @@ def test_alertmanager_config_security_enabled(self, _get_fqdn, _run_cephadm, cep
864870
use_current_daemon_image=False,
865871
)
866872

873+
@pytest.mark.parametrize(
874+
"user_data",
875+
[
876+
({'webhook_urls': ['http://foo.com:9999', 'http://bar.com:1111']}),
877+
({'default_webhook_urls': ['http://bar.com:9999', 'http://foo.com:1111']}),
878+
({'default_webhook_urls': ['http://bar.com:9999', 'http://foo.com:1111'],
879+
'webhook_urls': ['http://foo.com:9999', 'http://bar.com:1111']}),
880+
],
881+
)
882+
@patch("cephadm.serve.CephadmServe._run_cephadm")
883+
@patch("socket.getfqdn")
884+
@patch("cephadm.module.CephadmOrchestrator.get_mgr_ip", lambda _: '::1')
885+
@patch("cephadm.services.monitoring.password_hash", lambda password: 'alertmanager_password_hash')
886+
@patch('cephadm.cert_mgr.CertMgr.get_root_ca', lambda instance: 'cephadm_root_cert')
887+
@patch('cephadm.cert_mgr.CertMgr.generate_cert', lambda instance, fqdn, ip: ('mycert', 'mykey'))
888+
def test_alertmanager_config_custom_webhook_urls(
889+
self,
890+
_get_fqdn,
891+
_run_cephadm,
892+
cephadm_module: CephadmOrchestrator,
893+
user_data: Dict[str, List[str]]
894+
):
895+
_run_cephadm.side_effect = async_side_effect(('{}', '', 0))
896+
cephadm_module.set_store(AlertmanagerService.USER_CFG_KEY, 'alertmanager_user')
897+
cephadm_module.set_store(AlertmanagerService.PASS_CFG_KEY, 'alertmanager_plain_password')
898+
fqdn = 'host1.test'
899+
_get_fqdn.return_value = fqdn
900+
901+
print(user_data)
902+
903+
urls = []
904+
if 'default_webhook_urls' in user_data:
905+
urls += user_data['default_webhook_urls']
906+
if 'webhook_urls' in user_data:
907+
urls += user_data['webhook_urls']
908+
tab_over = ' ' * 18 # since we'll be inserting this into an indented string
909+
webhook_configs_str = '\n'.join(f'{tab_over}- url: \'{u}\'' for u in urls)
910+
911+
with with_host(cephadm_module, 'test'):
912+
with with_service(cephadm_module, AlertManagerSpec(user_data=user_data)):
913+
914+
y = dedent(f"""
915+
# This file is generated by cephadm.
916+
# See https://prometheus.io/docs/alerting/configuration/ for documentation.
917+
918+
global:
919+
resolve_timeout: 5m
920+
http_config:
921+
tls_config:
922+
insecure_skip_verify: true
923+
924+
route:
925+
receiver: 'default'
926+
routes:
927+
- group_by: ['alertname']
928+
group_wait: 10s
929+
group_interval: 10s
930+
repeat_interval: 1h
931+
receiver: 'ceph-dashboard'
932+
continue: true
933+
- group_by: ['alertname']
934+
group_wait: 10s
935+
group_interval: 10s
936+
repeat_interval: 1h
937+
receiver: 'custom-receiver'
938+
939+
receivers:
940+
- name: 'default'
941+
webhook_configs:
942+
- name: 'custom-receiver'
943+
webhook_configs:
944+
{webhook_configs_str}
945+
- name: 'ceph-dashboard'
946+
webhook_configs:
947+
- url: 'http://{fqdn}:8080/api/prometheus_receiver'
948+
""").lstrip()
949+
950+
_run_cephadm.assert_called_with(
951+
'test',
952+
"alertmanager.test",
953+
['_orch', 'deploy'],
954+
[],
955+
stdin=json.dumps({
956+
"fsid": "fsid",
957+
"name": 'alertmanager.test',
958+
"image": '',
959+
"deploy_arguments": [],
960+
"params": {
961+
'tcp_ports': [9093, 9094],
962+
},
963+
"meta": {
964+
'service_name': 'alertmanager',
965+
'ports': [9093, 9094],
966+
'ip': None,
967+
'deployed_by': [],
968+
'rank': None,
969+
'rank_generation': None,
970+
'extra_container_args': None,
971+
'extra_entrypoint_args': None,
972+
},
973+
"config_blobs": {
974+
"files": {
975+
"alertmanager.yml": y,
976+
},
977+
'peers': [],
978+
"use_url_prefix": False,
979+
"ip_to_bind_to": "",
980+
}
981+
}),
982+
use_current_daemon_image=False,
983+
error_ok=True,
984+
)
985+
867986
@patch("cephadm.serve.CephadmServe._run_cephadm")
868987
@patch("socket.getfqdn")
869988
@patch("cephadm.module.CephadmOrchestrator.get_mgr_ip", lambda _: '::1')

src/python-common/ceph/deployment/service_spec.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2479,14 +2479,14 @@ def __init__(self,
24792479
# service_type: alertmanager
24802480
# service_id: xyz
24812481
# user_data:
2482-
# default_webhook_urls:
2482+
# webhook_urls:
24832483
# - "https://foo"
24842484
# - "https://bar"
24852485
#
24862486
# Documentation:
2487-
# default_webhook_urls - A list of additional URL's that are
2488-
# added to the default receivers'
2489-
# <webhook_configs> configuration.
2487+
# webhook_urls - A list of additional URL's that are
2488+
# added to the default receivers'
2489+
# <webhook_configs> configuration.
24902490
self.user_data = user_data or {}
24912491
self.secure = secure
24922492
self.only_bind_port_on_networks = only_bind_port_on_networks

0 commit comments

Comments
 (0)