Skip to content

Commit 1bf6b32

Browse files
authored
Merge pull request ceph#55710 from rhcs-dashboard/nvmeof-prometheus-endpoint
cephadm/nvmeof: scrape nvmeof prometheus endpoint Reviewed-by: Adam King <[email protected]> Reviewed-by: Paul Cuzner <[email protected]>
2 parents 58ba7e5 + 93ec628 commit 1bf6b32

File tree

9 files changed

+83
-3
lines changed

9 files changed

+83
-3
lines changed

src/cephadm/cephadmlib/constants.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@
1212
DEFAULT_GRAFANA_IMAGE = 'quay.io/ceph/ceph-grafana:9.4.12'
1313
DEFAULT_HAPROXY_IMAGE = 'quay.io/ceph/haproxy:2.3'
1414
DEFAULT_KEEPALIVED_IMAGE = 'quay.io/ceph/keepalived:2.2.4'
15-
DEFAULT_NVMEOF_IMAGE = 'quay.io/ceph/nvmeof:latest'
15+
DEFAULT_NVMEOF_IMAGE = 'quay.io/ceph/nvmeof:1.0.0'
1616
DEFAULT_SNMP_GATEWAY_IMAGE = 'docker.io/maxwo/snmp-notifier:v1.2.1'
1717
DEFAULT_ELASTICSEARCH_IMAGE = 'quay.io/omrizeneva/elasticsearch:6.8.23'
1818
DEFAULT_JAEGER_COLLECTOR_IMAGE = 'quay.io/jaegertracing/jaeger-collector:1.29'

src/pybind/mgr/cephadm/module.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -117,7 +117,7 @@ def os_exit_noop(status: int) -> None:
117117
DEFAULT_IMAGE = 'quay.io/ceph/ceph'
118118
DEFAULT_PROMETHEUS_IMAGE = 'quay.io/prometheus/prometheus:v2.43.0'
119119
DEFAULT_NODE_EXPORTER_IMAGE = 'quay.io/prometheus/node-exporter:v1.5.0'
120-
DEFAULT_NVMEOF_IMAGE = 'quay.io/ceph/nvmeof:latest'
120+
DEFAULT_NVMEOF_IMAGE = 'quay.io/ceph/nvmeof:1.0.0'
121121
DEFAULT_LOKI_IMAGE = 'docker.io/grafana/loki:2.4.0'
122122
DEFAULT_PROMTAIL_IMAGE = 'docker.io/grafana/promtail:2.4.0'
123123
DEFAULT_ALERT_MANAGER_IMAGE = 'quay.io/prometheus/alertmanager:v0.25.0'

src/pybind/mgr/cephadm/service_discovery.py

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ class Server: # type: ignore
1919
from cephadm.services.ingress import IngressSpec
2020
from cephadm.ssl_cert_utils import SSLCerts
2121
from cephadm.services.cephadmservice import CephExporterService
22+
from cephadm.services.nvmeof import NvmeofService
2223

2324
if TYPE_CHECKING:
2425
from cephadm.module import CephadmOrchestrator
@@ -145,6 +146,7 @@ def index(self) -> str:
145146
<p><a href='prometheus/sd-config?service=node-exporter'>Node exporter http sd-config</a></p>
146147
<p><a href='prometheus/sd-config?service=haproxy'>HAProxy http sd-config</a></p>
147148
<p><a href='prometheus/sd-config?service=ceph-exporter'>Ceph exporter http sd-config</a></p>
149+
<p><a href='prometheus/sd-config?service=nvmeof'>NVMeoF http sd-config</a></p>
148150
<p><a href='prometheus/rules'>Prometheus rules</a></p>
149151
</body>
150152
</html>'''
@@ -163,6 +165,8 @@ def get_sd_config(self, service: str) -> List[Dict[str, Collection[str]]]:
163165
return self.haproxy_sd_config()
164166
elif service == 'ceph-exporter':
165167
return self.ceph_exporter_sd_config()
168+
elif service == 'nvmeof':
169+
return self.nvmeof_sd_config()
166170
else:
167171
return []
168172

@@ -231,6 +235,19 @@ def ceph_exporter_sd_config(self) -> List[Dict[str, Collection[str]]]:
231235
})
232236
return srv_entries
233237

238+
def nvmeof_sd_config(self) -> List[Dict[str, Collection[str]]]:
239+
"""Return <http_sd_config> compatible prometheus config for nvmeof service."""
240+
srv_entries = []
241+
for dd in self.mgr.cache.get_daemons_by_type('nvmeof'):
242+
assert dd.hostname is not None
243+
addr = dd.ip if dd.ip else self.mgr.inventory.get_addr(dd.hostname)
244+
port = NvmeofService.PROMETHEUS_PORT
245+
srv_entries.append({
246+
'targets': [build_url(host=addr, port=port).lstrip('/')],
247+
'labels': {'instance': dd.hostname}
248+
})
249+
return srv_entries
250+
234251
@cherrypy.expose(alias='prometheus/rules')
235252
def get_prometheus_rules(self) -> str:
236253
"""Return currently configured prometheus rules as Yaml."""

src/pybind/mgr/cephadm/services/monitoring.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -402,6 +402,7 @@ def generate_config(
402402
haproxy_sd_url = f'{srv_end_point}service=haproxy' if haproxy_cnt > 0 else None
403403
mgr_prometheus_sd_url = f'{srv_end_point}service=mgr-prometheus' # always included
404404
ceph_exporter_sd_url = f'{srv_end_point}service=ceph-exporter' # always included
405+
nvmeof_sd_url = f'{srv_end_point}service=nvmeof' # always included
405406

406407
alertmanager_user, alertmanager_password = self.mgr._get_alertmanager_credentials()
407408
prometheus_user, prometheus_password = self.mgr._get_prometheus_credentials()
@@ -417,7 +418,8 @@ def generate_config(
417418
'node_exporter_sd_url': node_exporter_sd_url,
418419
'alertmanager_sd_url': alertmanager_sd_url,
419420
'haproxy_sd_url': haproxy_sd_url,
420-
'ceph_exporter_sd_url': ceph_exporter_sd_url
421+
'ceph_exporter_sd_url': ceph_exporter_sd_url,
422+
'nvmeof_sd_url': nvmeof_sd_url,
421423
}
422424

423425
web_context = {

src/pybind/mgr/cephadm/services/nvmeof.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616

1717
class NvmeofService(CephService):
1818
TYPE = 'nvmeof'
19+
PROMETHEUS_PORT = 10008
1920

2021
def config(self, spec: NvmeofServiceSpec) -> None: # type: ignore
2122
assert self.TYPE == spec.service_type

src/pybind/mgr/cephadm/templates/services/nvmeof/ceph-nvmeof.conf.j2

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,9 @@ state_update_interval_sec = 5
1010
min_controller_id = {{ spec.min_controller_id }}
1111
max_controller_id = {{ spec.max_controller_id }}
1212
enable_spdk_discovery_controller = {{ spec.enable_spdk_discovery_controller }}
13+
enable_prometheus_exporter = True
14+
prometheus_exporter_ssl = False
15+
prometheus_port = 10008
1316

1417
[ceph]
1518
pool = {{ spec.pool }}

src/pybind/mgr/cephadm/templates/services/prometheus/prometheus.yml.j2

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -107,3 +107,23 @@ scrape_configs:
107107
- url: {{ ceph_exporter_sd_url }}
108108
{% endif %}
109109
{% endif %}
110+
111+
{% if nvmeof_sd_url %}
112+
- job_name: 'nvmeof'
113+
{% if secure_monitoring_stack %}
114+
honor_labels: true
115+
scheme: https
116+
tls_config:
117+
ca_file: root_cert.pem
118+
http_sd_configs:
119+
- url: {{ nvmeof_sd_url }}
120+
basic_auth:
121+
username: {{ service_discovery_username }}
122+
password: {{ service_discovery_password }}
123+
tls_config:
124+
ca_file: root_cert.pem
125+
{% else %}
126+
http_sd_configs:
127+
- url: {{ nvmeof_sd_url }}
128+
{% endif %}
129+
{% endif %}

src/pybind/mgr/cephadm/tests/test_service_discovery.py

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,9 @@ def get_daemons_by_service(self, service_type):
1919
if service_type == 'ceph-exporter':
2020
return [FakeDaemonDescription('1.2.3.4', [9926], 'node0'),
2121
FakeDaemonDescription('1.2.3.5', [9926], 'node1')]
22+
if service_type == 'nvmeof':
23+
return [FakeDaemonDescription('1.2.3.4', [10008], 'node0'),
24+
FakeDaemonDescription('1.2.3.5', [10008], 'node1')]
2225

2326
return [FakeDaemonDescription('1.2.3.4', [9100], 'node0'),
2427
FakeDaemonDescription('1.2.3.5', [9200], 'node1')]
@@ -171,6 +174,20 @@ def test_get_sd_config_ceph_exporter(self):
171174
# check content
172175
assert cfg[0]['targets'] == ['1.2.3.4:9926']
173176

177+
def test_get_sd_config_nvmeof(self):
178+
mgr = FakeMgr()
179+
root = Root(mgr, 5000, '0.0.0.0')
180+
cfg = root.get_sd_config('nvmeof')
181+
182+
# check response structure
183+
assert cfg
184+
for entry in cfg:
185+
assert 'labels' in entry
186+
assert 'targets' in entry
187+
188+
# check content
189+
assert cfg[0]['targets'] == ['1.2.3.4:10008']
190+
174191
def test_get_sd_config_invalid_service(self):
175192
mgr = FakeMgr()
176193
root = Root(mgr, 5000, '0.0.0.0')

src/pybind/mgr/cephadm/tests/test_services.py

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -393,6 +393,9 @@ def test_nvmeof_config(self, _get_name, _run_cephadm, cephadm_module: CephadmOrc
393393
min_controller_id = 1
394394
max_controller_id = 65519
395395
enable_spdk_discovery_controller = False
396+
enable_prometheus_exporter = True
397+
prometheus_exporter_ssl = False
398+
prometheus_port = 10008
396399
397400
[ceph]
398401
pool = {pool}
@@ -716,6 +719,10 @@ def test_prometheus_config_security_disabled(self, _run_cephadm, cephadm_module:
716719
honor_labels: true
717720
http_sd_configs:
718721
- url: http://[::1]:8765/sd/prometheus/sd-config?service=ceph-exporter
722+
723+
- job_name: 'nvmeof'
724+
http_sd_configs:
725+
- url: http://[::1]:8765/sd/prometheus/sd-config?service=nvmeof
719726
""").lstrip()
720727

721728
_run_cephadm.assert_called_with(
@@ -872,6 +879,19 @@ def gen_cert(host, addr):
872879
password: sd_password
873880
tls_config:
874881
ca_file: root_cert.pem
882+
883+
- job_name: 'nvmeof'
884+
honor_labels: true
885+
scheme: https
886+
tls_config:
887+
ca_file: root_cert.pem
888+
http_sd_configs:
889+
- url: https://[::1]:8765/sd/prometheus/sd-config?service=nvmeof
890+
basic_auth:
891+
username: sd_user
892+
password: sd_password
893+
tls_config:
894+
ca_file: root_cert.pem
875895
""").lstrip()
876896

877897
_run_cephadm.assert_called_with(

0 commit comments

Comments
 (0)