Skip to content

Commit 93441ea

Browse files
authored
Merge pull request ceph#58815 from synarete/samba-metrics-exporter
cephadm: samba metrics exporter Reviewed-by: Adam King <[email protected]> Reviewed-by: Avan Thakkar <[email protected]> Reviewed-by: John Mulligan <[email protected]>
2 parents 03f572d + 4f1d979 commit 93441ea

File tree

9 files changed

+157
-15
lines changed

9 files changed

+157
-15
lines changed

src/cephadm/cephadmlib/constants.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919
DEFAULT_JAEGER_AGENT_IMAGE = 'quay.io/jaegertracing/jaeger-agent:1.29'
2020
DEFAULT_JAEGER_QUERY_IMAGE = 'quay.io/jaegertracing/jaeger-query:1.29'
2121
DEFAULT_SMB_IMAGE = 'quay.io/samba.org/samba-server:devbuilds-centos-amd64'
22+
DEFAULT_SMBMETRICS_IMAGE = 'quay.io/samba.org/samba-metrics:latest'
2223
DEFAULT_NGINX_IMAGE = 'quay.io/ceph/nginx:1.26.1'
2324
DEFAULT_OAUTH2_PROXY_IMAGE = 'quay.io/oauth2-proxy/oauth2-proxy:v7.6.0'
2425
DEFAULT_REGISTRY = 'docker.io' # normalize unqualified digests to this

src/cephadm/cephadmlib/daemons/smb.py

Lines changed: 64 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -81,6 +81,8 @@ class Config:
8181
smb_port: int
8282
ceph_config_entity: str
8383
vhostname: str
84+
metrics_image: str
85+
metrics_port: int
8486
# clustering related values
8587
rank: int
8688
rank_generation: int
@@ -103,6 +105,8 @@ def __init__(
103105
smb_port: int = 0,
104106
ceph_config_entity: str = 'client.admin',
105107
vhostname: str = '',
108+
metrics_image: str = '',
109+
metrics_port: int = 0,
106110
rank: int = -1,
107111
rank_generation: int = -1,
108112
cluster_meta_uri: str = '',
@@ -122,6 +126,8 @@ def __init__(
122126
self.smb_port = smb_port
123127
self.ceph_config_entity = ceph_config_entity
124128
self.vhostname = vhostname
129+
self.metrics_image = metrics_image
130+
self.metrics_port = metrics_port
125131
self.rank = rank
126132
self.rank_generation = rank_generation
127133
self.cluster_meta_uri = cluster_meta_uri
@@ -155,15 +161,34 @@ def _container_dns_args(cfg: Config) -> List[str]:
155161
return cargs
156162

157163

158-
class SambaContainerCommon:
159-
def __init__(
160-
self,
161-
cfg: Config,
162-
) -> None:
164+
class ContainerCommon:
165+
def __init__(self, cfg: Config, image: str = '') -> None:
163166
self.cfg = cfg
167+
self.image = image
164168

165169
def name(self) -> str:
166-
raise NotImplementedError('samba container name')
170+
raise NotImplementedError('container name')
171+
172+
def envs(self) -> Dict[str, str]:
173+
return {}
174+
175+
def envs_list(self) -> List[str]:
176+
return []
177+
178+
def args(self) -> List[str]:
179+
return []
180+
181+
def container_args(self) -> List[str]:
182+
return []
183+
184+
def container_image(self) -> str:
185+
return self.image
186+
187+
188+
class SambaContainerCommon(ContainerCommon):
189+
def __init__(self, cfg: Config, image: str = '') -> None:
190+
self.cfg = cfg
191+
self.image = image
167192

168193
def envs(self) -> Dict[str, str]:
169194
environ = {
@@ -196,9 +221,6 @@ def args(self) -> List[str]:
196221
args.append(f'--debug-delay={self.cfg.debug_delay}')
197222
return args
198223

199-
def container_args(self) -> List[str]:
200-
return []
201-
202224

203225
class SambaNetworkedInitContainer(SambaContainerCommon):
204226
"""SambaContainerCommon subclass that enables additional networking
@@ -233,6 +255,9 @@ def container_args(self) -> List[str]:
233255
cargs = []
234256
if self.cfg.smb_port:
235257
cargs.append(f'--publish={self.cfg.smb_port}:{self.cfg.smb_port}')
258+
if self.cfg.metrics_port:
259+
metrics_port = self.cfg.metrics_port
260+
cargs.append(f'--publish={metrics_port}:{metrics_port}')
236261
cargs.extend(_container_dns_args(self.cfg))
237262
return cargs
238263

@@ -284,6 +309,17 @@ def args(self) -> List[str]:
284309
return super().args() + ['update-config', '--watch']
285310

286311

312+
class SMBMetricsContainer(ContainerCommon):
313+
def name(self) -> str:
314+
return 'smbmetrics'
315+
316+
def args(self) -> List[str]:
317+
args = []
318+
if self.cfg.metrics_port > 0:
319+
args.append(f'--port={self.cfg.metrics_port}')
320+
return args
321+
322+
287323
class CTDBMigrateInitContainer(SambaContainerCommon):
288324
def name(self) -> str:
289325
return 'ctdbMigrate'
@@ -358,13 +394,13 @@ def args(self) -> List[str]:
358394
class ContainerLayout:
359395
init_containers: List[SambaContainerCommon]
360396
primary: SambaContainerCommon
361-
supplemental: List[SambaContainerCommon]
397+
supplemental: List[ContainerCommon]
362398

363399
def __init__(
364400
self,
365401
init_containers: List[SambaContainerCommon],
366402
primary: SambaContainerCommon,
367-
supplemental: List[SambaContainerCommon],
403+
supplemental: List[ContainerCommon],
368404
) -> None:
369405
self.init_containers = init_containers
370406
self.primary = primary
@@ -393,6 +429,7 @@ def __init__(self, ctx: CephadmContext, ident: DaemonIdentity):
393429
self._cached_layout: Optional[ContainerLayout] = None
394430
self._rank_info = context_getters.fetch_rank_info(ctx)
395431
self.smb_port = 445
432+
self.metrics_port = 9922
396433
self._network_mapper = _NetworkMapper(ctx)
397434
logger.debug('Created SMB ContainerDaemonForm instance')
398435

@@ -431,6 +468,8 @@ def validate(self) -> None:
431468
files = data_utils.dict_get(configs, 'files', {})
432469
ceph_config_entity = configs.get('config_auth_entity', '')
433470
vhostname = configs.get('virtual_hostname', '')
471+
metrics_image = configs.get('metrics_image', '')
472+
metrics_port = int(configs.get('metrics_port', '0'))
434473
cluster_meta_uri = configs.get('cluster_meta_uri', '')
435474
cluster_lock_uri = configs.get('cluster_lock_uri', '')
436475
cluster_public_addrs = configs.get('cluster_public_addrs', [])
@@ -470,6 +509,8 @@ def validate(self) -> None:
470509
smb_port=self.smb_port,
471510
ceph_config_entity=ceph_config_entity,
472511
vhostname=vhostname,
512+
metrics_image=metrics_image,
513+
metrics_port=metrics_port,
473514
cluster_meta_uri=cluster_meta_uri,
474515
cluster_lock_uri=cluster_lock_uri,
475516
cluster_public_addrs=_public_addrs,
@@ -517,7 +558,7 @@ def _layout(self) -> ContainerLayout:
517558
if self._cached_layout:
518559
return self._cached_layout
519560
init_ctrs: List[SambaContainerCommon] = []
520-
ctrs: List[SambaContainerCommon] = []
561+
ctrs: List[ContainerCommon] = []
521562

522563
init_ctrs.append(ConfigInitContainer(self._cfg))
523564
ctrs.append(ConfigWatchContainer(self._cfg))
@@ -526,6 +567,11 @@ def _layout(self) -> ContainerLayout:
526567
init_ctrs.append(MustJoinContainer(self._cfg))
527568
ctrs.append(WinbindContainer(self._cfg))
528569

570+
metrics_image = self._cfg.metrics_image.strip()
571+
metrics_port = self._cfg.metrics_port
572+
if metrics_image and metrics_port > 0:
573+
ctrs.append(SMBMetricsContainer(self._cfg, metrics_image))
574+
529575
if self._cfg.clustered:
530576
init_ctrs += [
531577
CTDBMigrateInitContainer(self._cfg),
@@ -564,7 +610,7 @@ def _to_init_container(
564610
)
565611

566612
def _to_sidecar_container(
567-
self, ctx: CephadmContext, smb_ctr: SambaContainerCommon
613+
self, ctx: CephadmContext, smb_ctr: ContainerCommon
568614
) -> SidecarContainer:
569615
volume_mounts: Dict[str, str] = {}
570616
container_args: List[str] = smb_ctr.container_args()
@@ -587,10 +633,11 @@ def _to_sidecar_container(
587633
identity = DaemonSubIdentity.from_parent(
588634
self.identity, smb_ctr.name()
589635
)
636+
img = smb_ctr.container_image() or ctx.image or self.default_image
590637
return SidecarContainer(
591638
ctx,
592639
entrypoint='',
593-
image=ctx.image or self.default_image,
640+
image=img,
594641
identity=identity,
595642
container_args=container_args,
596643
args=smb_ctr.args(),
@@ -673,6 +720,9 @@ def customize_container_endpoints(
673720
) -> None:
674721
if not any(ep.port == self.smb_port for ep in endpoints):
675722
endpoints.append(EndPoint('0.0.0.0', self.smb_port))
723+
if self.metrics_port > 0:
724+
if not any(ep.port == self.metrics_port for ep in endpoints):
725+
endpoints.append(EndPoint('0.0.0.0', self.metrics_port))
676726

677727
def prepare_data_dir(self, data_dir: str, uid: int, gid: int) -> None:
678728
self.validate()

src/pybind/mgr/cephadm/module.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -148,6 +148,7 @@ def os_exit_noop(status: int) -> None:
148148
DEFAULT_OAUTH2_PROXY = 'quay.io/oauth2-proxy/oauth2-proxy:v7.6.0'
149149
DEFAULT_JAEGER_QUERY_IMAGE = 'quay.io/jaegertracing/jaeger-query:1.29'
150150
DEFAULT_SAMBA_IMAGE = 'quay.io/samba.org/samba-server:devbuilds-centos-amd64'
151+
DEFAULT_SAMBA_METRICS_IMAGE = 'quay.io/samba.org/samba-metrics:latest'
151152
# ------------------------------------------------------------------------------
152153

153154

@@ -319,6 +320,11 @@ class CephadmOrchestrator(orchestrator.Orchestrator, MgrModule,
319320
default=DEFAULT_SAMBA_IMAGE,
320321
desc='Samba/SMB container image',
321322
),
323+
Option(
324+
'container_image_samba_metrics',
325+
default=DEFAULT_SAMBA_METRICS_IMAGE,
326+
desc='Samba/SMB metrics exporter container image',
327+
),
322328
Option(
323329
'warn_on_stray_hosts',
324330
type='bool',
@@ -585,6 +591,7 @@ def __init__(self, *args: Any, **kwargs: Any):
585591
self.container_image_jaeger_collector = ''
586592
self.container_image_jaeger_query = ''
587593
self.container_image_samba = ''
594+
self.container_image_samba_metrics = ''
588595
self.warn_on_stray_hosts = True
589596
self.warn_on_stray_daemons = True
590597
self.warn_on_failed_host_check = True

src/pybind/mgr/cephadm/service_discovery.py

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@ class Server: # type: ignore
1313
from mgr_util import build_url
1414
from typing import Dict, List, TYPE_CHECKING, cast, Collection, Callable, NamedTuple, Optional, IO
1515
from cephadm.services.nfs import NFSService
16+
from cephadm.services.smb import SMBService
1617
from cephadm.services.monitoring import AlertmanagerService, NodeExporterService, PrometheusService
1718
import secrets
1819
from mgr_util import verify_tls_files
@@ -149,6 +150,7 @@ def index(self) -> str:
149150
<p><a href='prometheus/sd-config?service=ceph-exporter'>Ceph exporter http sd-config</a></p>
150151
<p><a href='prometheus/sd-config?service=nvmeof'>NVMeoF http sd-config</a></p>
151152
<p><a href='prometheus/sd-config?service=nfs'>NFS http sd-config</a></p>
153+
<p><a href='prometheus/sd-config?service=smb'>SMB http sd-config</a></p>
152154
<p><a href='prometheus/rules'>Prometheus rules</a></p>
153155
</body>
154156
</html>'''
@@ -171,6 +173,8 @@ def get_sd_config(self, service: str) -> List[Dict[str, Collection[str]]]:
171173
return self.nvmeof_sd_config()
172174
elif service == 'nfs':
173175
return self.nfs_sd_config()
176+
elif service == 'smb':
177+
return self.smb_sd_config()
174178
else:
175179
return []
176180

@@ -265,6 +269,19 @@ def nfs_sd_config(self) -> List[Dict[str, Collection[str]]]:
265269
})
266270
return srv_entries
267271

272+
def smb_sd_config(self) -> List[Dict[str, Collection[str]]]:
273+
"""Return <http_sd_config> compatible prometheus config for smb service."""
274+
srv_entries = []
275+
for dd in self.mgr.cache.get_daemons_by_type('smb'):
276+
assert dd.hostname is not None
277+
addr = dd.ip if dd.ip else self.mgr.inventory.get_addr(dd.hostname)
278+
port = SMBService.DEFAULT_EXPORTER_PORT
279+
srv_entries.append({
280+
'targets': [build_url(host=addr, port=port).lstrip('/')],
281+
'labels': {'instance': dd.hostname}
282+
})
283+
return srv_entries
284+
268285
@cherrypy.expose(alias='prometheus/rules')
269286
def get_prometheus_rules(self) -> str:
270287
"""Return currently configured prometheus rules as Yaml."""

src/pybind/mgr/cephadm/services/monitoring.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -503,6 +503,7 @@ def generate_config(
503503
nvmeof_sd_url = f'{srv_end_point}service=nvmeof' # always included
504504
mgmt_gw_enabled = len(self.mgr.cache.get_daemons_by_service('mgmt-gateway')) > 0
505505
nfs_sd_url = f'{srv_end_point}service=nfs' # always included
506+
smb_sd_url = f'{srv_end_point}service=smb' # always included
506507

507508
alertmanager_user, alertmanager_password = self.mgr._get_alertmanager_credentials()
508509
prometheus_user, prometheus_password = self.mgr._get_prometheus_credentials()
@@ -524,7 +525,8 @@ def generate_config(
524525
'nvmeof_sd_url': nvmeof_sd_url,
525526
'external_prometheus_targets': targets,
526527
'cluster_fsid': FSID,
527-
'nfs_sd_url': nfs_sd_url
528+
'nfs_sd_url': nfs_sd_url,
529+
'smb_sd_url': smb_sd_url
528530
}
529531

530532
ip_to_bind_to = ''

src/pybind/mgr/cephadm/services/smb.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616

1717
class SMBService(CephService):
1818
TYPE = 'smb'
19+
DEFAULT_EXPORTER_PORT = 9922
1920
smb_pool = '.smb' # minor layering violation. try to clean up later.
2021

2122
def config(self, spec: ServiceSpec) -> None:
@@ -79,6 +80,11 @@ def generate_config(
7980
smb_spec, daemon_spec.daemon_id, ceph_users
8081
)
8182
)
83+
config_blobs['metrics_image'] = (
84+
self.mgr.container_image_samba_metrics
85+
)
86+
config_blobs['metrics_port'] = SMBService.DEFAULT_EXPORTER_PORT
87+
8288
logger.debug('smb generate_config: %r', config_blobs)
8389
self._configure_cluster_meta(smb_spec, daemon_spec)
8490
return config_blobs, []

src/pybind/mgr/cephadm/templates/services/prometheus/prometheus.yml.j2

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -181,6 +181,26 @@ scrape_configs:
181181
{% endif %}
182182
{% endif %}
183183

184+
{% if smb_sd_url %}
185+
- job_name: 'smb'
186+
{% if security_enabled %}
187+
honor_labels: true
188+
scheme: https
189+
tls_config:
190+
ca_file: root_cert.pem
191+
http_sd_configs:
192+
- url: {{ smb_sd_url }}
193+
basic_auth:
194+
username: {{ service_discovery_username }}
195+
password: {{ service_discovery_password }}
196+
tls_config:
197+
ca_file: root_cert.pem
198+
{% else %}
199+
http_sd_configs:
200+
- url: {{ smb_sd_url }}
201+
{% endif %}
202+
{% endif %}
203+
184204
{% if not security_enabled %}
185205
- job_name: 'federate'
186206
scrape_interval: 15s

src/pybind/mgr/cephadm/tests/test_service_discovery.py

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,10 @@ def get_daemons_by_service(self, service_type):
2727
return [FakeDaemonDescription('1.2.3.4', [9587], 'node0'),
2828
FakeDaemonDescription('1.2.3.5', [9587], 'node1')]
2929

30+
if service_type == 'smb':
31+
return [FakeDaemonDescription('1.2.3.4', [9922], 'node0'),
32+
FakeDaemonDescription('1.2.3.5', [9922], 'node1')]
33+
3034
return [FakeDaemonDescription('1.2.3.4', [9100], 'node0'),
3135
FakeDaemonDescription('1.2.3.5', [9200], 'node1')]
3236

@@ -206,6 +210,20 @@ def test_get_sd_config_nfs(self):
206210
# check content
207211
assert cfg[0]['targets'] == ['1.2.3.4:9587']
208212

213+
def test_get_sd_config_smb(self):
214+
mgr = FakeMgr()
215+
root = Root(mgr, 5000, '0.0.0.0')
216+
cfg = root.get_sd_config('smb')
217+
218+
# check response structure
219+
assert cfg
220+
for entry in cfg:
221+
assert 'labels' in entry
222+
assert 'targets' in entry
223+
224+
# check content
225+
assert cfg[0]['targets'] == ['1.2.3.4:9922']
226+
209227
def test_get_sd_config_invalid_service(self):
210228
mgr = FakeMgr()
211229
root = Root(mgr, 5000, '0.0.0.0')

0 commit comments

Comments
 (0)