Skip to content

Commit 5622aec

Browse files
authored
Merge pull request ceph#55050 from rhcs-dashboard/nfs-monitoring
mgr/nfs: scrape nfs monitoring endpoint Reviewed-by: Adam King <[email protected]> Reviewed-by: Redouane Kachach <[email protected]>
2 parents 3060f4f + 5fb45e5 commit 5622aec

File tree

6 files changed

+76
-2
lines changed

6 files changed

+76
-2
lines changed

src/pybind/mgr/cephadm/service_discovery.py

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@ class Server: # type: ignore
1212
from mgr_module import ServiceInfoT
1313
from mgr_util import build_url
1414
from typing import Dict, List, TYPE_CHECKING, cast, Collection, Callable, NamedTuple, Optional, IO
15+
from cephadm.services.nfs import NFSService
1516
from cephadm.services.monitoring import AlertmanagerService, NodeExporterService, PrometheusService
1617
import secrets
1718
from mgr_util import verify_tls_files
@@ -147,6 +148,7 @@ def index(self) -> str:
147148
<p><a href='prometheus/sd-config?service=haproxy'>HAProxy http sd-config</a></p>
148149
<p><a href='prometheus/sd-config?service=ceph-exporter'>Ceph exporter http sd-config</a></p>
149150
<p><a href='prometheus/sd-config?service=nvmeof'>NVMeoF http sd-config</a></p>
151+
<p><a href='prometheus/sd-config?service=nfs'>NFS http sd-config</a></p>
150152
<p><a href='prometheus/rules'>Prometheus rules</a></p>
151153
</body>
152154
</html>'''
@@ -167,6 +169,8 @@ def get_sd_config(self, service: str) -> List[Dict[str, Collection[str]]]:
167169
return self.ceph_exporter_sd_config()
168170
elif service == 'nvmeof':
169171
return self.nvmeof_sd_config()
172+
elif service == 'nfs':
173+
return self.nfs_sd_config()
170174
else:
171175
return []
172176

@@ -248,6 +252,19 @@ def nvmeof_sd_config(self) -> List[Dict[str, Collection[str]]]:
248252
})
249253
return srv_entries
250254

255+
def nfs_sd_config(self) -> List[Dict[str, Collection[str]]]:
256+
"""Return <http_sd_config> compatible prometheus config for nfs service."""
257+
srv_entries = []
258+
for dd in self.mgr.cache.get_daemons_by_type('nfs'):
259+
assert dd.hostname is not None
260+
addr = dd.ip if dd.ip else self.mgr.inventory.get_addr(dd.hostname)
261+
port = NFSService.DEFAULT_EXPORTER_PORT
262+
srv_entries.append({
263+
'targets': [build_url(host=addr, port=port).lstrip('/')],
264+
'labels': {'instance': dd.hostname}
265+
})
266+
return srv_entries
267+
251268
@cherrypy.expose(alias='prometheus/rules')
252269
def get_prometheus_rules(self) -> str:
253270
"""Return currently configured prometheus rules as Yaml."""

src/pybind/mgr/cephadm/services/monitoring.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -470,6 +470,7 @@ def generate_config(
470470
ceph_exporter_sd_url = f'{srv_end_point}service=ceph-exporter' # always included
471471
nvmeof_sd_url = f'{srv_end_point}service=nvmeof' # always included
472472
mgmt_gw_enabled = len(self.mgr.cache.get_daemons_by_service('mgmt-gateway')) > 0
473+
nfs_sd_url = f'{srv_end_point}service=nfs' # always included
473474

474475
alertmanager_user, alertmanager_password = self.mgr._get_alertmanager_credentials()
475476
prometheus_user, prometheus_password = self.mgr._get_prometheus_credentials()
@@ -490,7 +491,8 @@ def generate_config(
490491
'ceph_exporter_sd_url': ceph_exporter_sd_url,
491492
'nvmeof_sd_url': nvmeof_sd_url,
492493
'external_prometheus_targets': targets,
493-
'cluster_fsid': FSID
494+
'cluster_fsid': FSID,
495+
'nfs_sd_url': nfs_sd_url
494496
}
495497

496498
ip_to_bind_to = ''

src/pybind/mgr/cephadm/services/nfs.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222

2323
class NFSService(CephService):
2424
TYPE = 'nfs'
25+
DEFAULT_EXPORTER_PORT = 9587
2526

2627
def ranked(self, spec: ServiceSpec) -> bool:
2728
return True

src/pybind/mgr/cephadm/templates/services/prometheus/prometheus.yml.j2

Lines changed: 20 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -161,6 +161,26 @@ scrape_configs:
161161
{% endif %}
162162
{% endif %}
163163

164+
{% if nfs_sd_url %}
165+
- job_name: 'nfs'
166+
{% if security_enabled %}
167+
honor_labels: true
168+
scheme: https
169+
tls_config:
170+
ca_file: root_cert.pem
171+
http_sd_configs:
172+
- url: {{ nfs_sd_url }}
173+
basic_auth:
174+
username: {{ service_discovery_username }}
175+
password: {{ service_discovery_password }}
176+
tls_config:
177+
ca_file: root_cert.pem
178+
{% else %}
179+
http_sd_configs:
180+
- url: {{ nfs_sd_url }}
181+
{% endif %}
182+
{% endif %}
183+
164184
{% if not security_enabled %}
165185
- job_name: 'federate'
166186
scrape_interval: 15s
@@ -175,4 +195,3 @@ scrape_configs:
175195
static_configs:
176196
- targets: {{ external_prometheus_targets }}
177197
{% endif %}
178-

src/pybind/mgr/cephadm/tests/test_service_discovery.py

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,10 @@ def get_daemons_by_service(self, service_type):
2323
return [FakeDaemonDescription('1.2.3.4', [10008], 'node0'),
2424
FakeDaemonDescription('1.2.3.5', [10008], 'node1')]
2525

26+
if service_type == 'nfs':
27+
return [FakeDaemonDescription('1.2.3.4', [9587], 'node0'),
28+
FakeDaemonDescription('1.2.3.5', [9587], 'node1')]
29+
2630
return [FakeDaemonDescription('1.2.3.4', [9100], 'node0'),
2731
FakeDaemonDescription('1.2.3.5', [9200], 'node1')]
2832

@@ -188,6 +192,20 @@ def test_get_sd_config_nvmeof(self):
188192
# check content
189193
assert cfg[0]['targets'] == ['1.2.3.4:10008']
190194

195+
def test_get_sd_config_nfs(self):
196+
mgr = FakeMgr()
197+
root = Root(mgr, 5000, '0.0.0.0')
198+
cfg = root.get_sd_config('nfs')
199+
200+
# check response structure
201+
assert cfg
202+
for entry in cfg:
203+
assert 'labels' in entry
204+
assert 'targets' in entry
205+
206+
# check content
207+
assert cfg[0]['targets'] == ['1.2.3.4:9587']
208+
191209
def test_get_sd_config_invalid_service(self):
192210
mgr = FakeMgr()
193211
root = Root(mgr, 5000, '0.0.0.0')

src/pybind/mgr/cephadm/tests/test_services.py

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -824,6 +824,10 @@ def test_prometheus_config_security_disabled(self, _run_cephadm, cephadm_module:
824824
http_sd_configs:
825825
- url: http://[::1]:8765/sd/prometheus/sd-config?service=nvmeof
826826
827+
- job_name: 'nfs'
828+
http_sd_configs:
829+
- url: http://[::1]:8765/sd/prometheus/sd-config?service=nfs
830+
827831
- job_name: 'federate'
828832
scrape_interval: 15s
829833
honor_labels: true
@@ -1021,6 +1025,19 @@ def gen_cert(host, addr):
10211025
tls_config:
10221026
ca_file: root_cert.pem
10231027
1028+
- job_name: 'nfs'
1029+
honor_labels: true
1030+
scheme: https
1031+
tls_config:
1032+
ca_file: root_cert.pem
1033+
http_sd_configs:
1034+
- url: https://[::1]:8765/sd/prometheus/sd-config?service=nfs
1035+
basic_auth:
1036+
username: sd_user
1037+
password: sd_password
1038+
tls_config:
1039+
ca_file: root_cert.pem
1040+
10241041
""").lstrip()
10251042

10261043
_run_cephadm.assert_called_with(

0 commit comments

Comments
 (0)