Skip to content

Commit 862a38e

Browse files
committed
mgr/cephadm: adding mTLS support
Signed-off-by: Redouane Kachach <[email protected]>
1 parent 0596664 commit 862a38e

File tree

23 files changed

+440
-278
lines changed

23 files changed

+440
-278
lines changed

src/cephadm/cephadmlib/daemons/mgmt_gateway.py

Lines changed: 17 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -104,9 +104,22 @@ def create_daemon_dirs(self, data_dir: str, uid: int, gid: int) -> None:
104104
raise OSError('data_dir is not a directory: %s' % (data_dir))
105105
logger.info('Writing mgmt-gateway config...')
106106
config_dir = os.path.join(data_dir, 'etc/')
107-
makedirs(config_dir, uid, gid, 0o755)
108-
recursive_chown(config_dir, uid, gid)
109-
populate_files(config_dir, self.files, uid, gid)
107+
ssl_dir = os.path.join(data_dir, 'etc/ssl')
108+
for ddir in [config_dir, ssl_dir]:
109+
makedirs(ddir, uid, gid, 0o755)
110+
recursive_chown(ddir, uid, gid)
111+
conf_files = {
112+
fname: content
113+
for fname, content in self.files.items()
114+
if fname.endswith('.conf')
115+
}
116+
cert_files = {
117+
fname: content
118+
for fname, content in self.files.items()
119+
if fname.endswith('.crt') or fname.endswith('.key')
120+
}
121+
populate_files(config_dir, conf_files, uid, gid)
122+
populate_files(ssl_dir, cert_files, uid, gid)
110123

111124
def _get_container_mounts(self, data_dir: str) -> Dict[str, str]:
112125
mounts: Dict[str, str] = {}
@@ -152,23 +165,6 @@ def customize_container_mounts(
152165
os.path.join(
153166
data_dir, 'etc/nginx_external_server.conf'
154167
): '/etc/nginx_external_server.conf:Z',
155-
os.path.join(
156-
data_dir, 'etc/nginx_internal.crt'
157-
): '/etc/nginx/ssl/nginx_internal.crt:Z',
158-
os.path.join(
159-
data_dir, 'etc/nginx_internal.key'
160-
): '/etc/nginx/ssl/nginx_internal.key:Z',
168+
os.path.join(data_dir, 'etc/ssl'): '/etc/nginx/ssl/',
161169
}
162170
)
163-
164-
if 'nginx.crt' in self.files:
165-
mounts.update(
166-
{
167-
os.path.join(
168-
data_dir, 'etc/nginx.crt'
169-
): '/etc/nginx/ssl/nginx.crt:Z',
170-
os.path.join(
171-
data_dir, 'etc/nginx.key'
172-
): '/etc/nginx/ssl/nginx.key:Z',
173-
}
174-
)

src/pybind/mgr/cephadm/cert_mgr.py

Lines changed: 16 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,5 @@
11

2-
from cephadm.ssl_cert_utils import SSLCerts
3-
from threading import Lock
2+
from cephadm.ssl_cert_utils import SSLCerts, SSLConfigException
43
from typing import TYPE_CHECKING, Tuple, Union, List
54

65
if TYPE_CHECKING:
@@ -13,31 +12,21 @@ class CertMgr:
1312
CEPHADM_ROOT_CA_KEY = 'cephadm_root_ca_key'
1413

1514
def __init__(self, mgr: "CephadmOrchestrator", ip: str) -> None:
16-
self.lock = Lock()
17-
self.initialized = False
18-
with self.lock:
19-
if self.initialized:
20-
return
21-
self.initialized = True
22-
self.mgr = mgr
23-
self.ssl_certs: SSLCerts = SSLCerts()
24-
old_cert = self.mgr.cert_key_store.get_cert(self.CEPHADM_ROOT_CA_CERT)
25-
old_key = self.mgr.cert_key_store.get_key(self.CEPHADM_ROOT_CA_KEY)
26-
if old_key and old_cert:
15+
self.ssl_certs: SSLCerts = SSLCerts()
16+
old_cert = mgr.cert_key_store.get_cert(self.CEPHADM_ROOT_CA_CERT)
17+
old_key = mgr.cert_key_store.get_key(self.CEPHADM_ROOT_CA_KEY)
18+
if old_key and old_cert:
19+
try:
2720
self.ssl_certs.load_root_credentials(old_cert, old_key)
28-
else:
29-
self.ssl_certs.generate_root_cert(ip)
30-
self.mgr.cert_key_store.save_cert(self.CEPHADM_ROOT_CA_CERT, self.ssl_certs.get_root_cert())
31-
self.mgr.cert_key_store.save_key(self.CEPHADM_ROOT_CA_KEY, self.ssl_certs.get_root_key())
21+
except SSLConfigException:
22+
raise Exception("Cannot load cephadm root CA certificates.")
23+
else:
24+
self.ssl_certs.generate_root_cert(ip)
25+
mgr.cert_key_store.save_cert(self.CEPHADM_ROOT_CA_CERT, self.ssl_certs.get_root_cert())
26+
mgr.cert_key_store.save_key(self.CEPHADM_ROOT_CA_KEY, self.ssl_certs.get_root_key())
3227

3328
def get_root_ca(self) -> str:
34-
with self.lock:
35-
if self.initialized:
36-
return self.ssl_certs.get_root_cert()
37-
raise Exception("Not initialized")
38-
39-
def generate_cert(self, host_fqdn: Union[str, List[str]], node_ip: str) -> Tuple[str, str]:
40-
with self.lock:
41-
if self.initialized:
42-
return self.ssl_certs.generate_cert(host_fqdn, node_ip)
43-
raise Exception("Not initialized")
29+
return self.ssl_certs.get_root_cert()
30+
31+
def generate_cert(self, host_fqdn: Union[str, List[str]], node_ip: Union[str, List[str]]) -> Tuple[str, str]:
32+
return self.ssl_certs.generate_cert(host_fqdn, node_ip)

src/pybind/mgr/cephadm/http_server.py

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,8 @@ def __init__(self, mgr: "CephadmOrchestrator") -> None:
3131
self.service_discovery = ServiceDiscovery(mgr)
3232
self.cherrypy_shutdown_event = threading.Event()
3333
self._service_discovery_port = self.mgr.service_discovery_port
34-
self.secure_monitoring_stack = self.mgr.secure_monitoring_stack
34+
security_enabled, mgmt_gw_enabled = self.mgr._get_security_config()
35+
self.security_enabled = security_enabled
3536
super().__init__(target=self.run)
3637

3738
def configure_cherrypy(self) -> None:
@@ -45,12 +46,13 @@ def configure(self) -> None:
4546
self.agent.configure()
4647
self.service_discovery.configure(self.mgr.service_discovery_port,
4748
self.mgr.get_mgr_ip(),
48-
self.secure_monitoring_stack)
49+
self.security_enabled)
4950

5051
def config_update(self) -> None:
5152
self.service_discovery_port = self.mgr.service_discovery_port
52-
if self.secure_monitoring_stack != self.mgr.secure_monitoring_stack:
53-
self.secure_monitoring_stack = self.mgr.secure_monitoring_stack
53+
security_enabled, mgmt_gw_enabled = self.mgr._get_security_config()
54+
if self.security_enabled != security_enabled:
55+
self.security_enabled = security_enabled
5456
self.restart()
5557

5658
@property

src/pybind/mgr/cephadm/inventory.py

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1942,16 +1942,15 @@ def _init_known_cert_key_dicts(self) -> None:
19421942
'nvmeof_server_cert': {}, # service-name -> cert
19431943
'nvmeof_client_cert': {}, # service-name -> cert
19441944
'nvmeof_root_ca_cert': {}, # service-name -> cert
1945-
'agent_endpoint_root_cert': Cert(), # cert
1946-
'mgmt_gw_root_cert': Cert(), # cert
1947-
'service_discovery_root_cert': Cert(), # cert
1945+
'mgmt_gw_cert': Cert(), # cert
19481946
'cephadm_root_ca_cert': Cert(), # cert
19491947
'grafana_cert': {}, # host -> cert
19501948
}
19511949
# Similar to certs but for priv keys. Entries in known_certs
19521950
# that don't have a key here are probably certs in PEM format
19531951
# so there is no need to store a separate key
19541952
self.known_keys = {
1953+
'mgmt_gw_key': PrivKey(), # cert
19551954
'cephadm_root_ca_key': PrivKey(), # cert
19561955
'grafana_key': {}, # host -> key
19571956
'iscsi_ssl_key': {}, # service-name -> key

src/pybind/mgr/cephadm/module.py

Lines changed: 41 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
import logging
77
import re
88
import shlex
9+
import socket
910
from collections import defaultdict
1011
from configparser import ConfigParser
1112
from contextlib import contextmanager
@@ -771,6 +772,23 @@ def _get_cephadm_service(self, service_type: str) -> CephadmService:
771772
assert service_type in ServiceSpec.KNOWN_SERVICE_TYPES
772773
return self.cephadm_services[service_type]
773774

775+
def get_fqdn(self, hostname: str) -> str:
776+
"""Get a host's FQDN with its hostname.
777+
778+
If the FQDN can't be resolved, the address from the inventory will
779+
be returned instead.
780+
"""
781+
# TODO(redo): get fqdn from the inventory
782+
addr = self.inventory.get_addr(hostname)
783+
return socket.getfqdn(addr)
784+
785+
def _get_security_config(self) -> Tuple[bool, bool]:
786+
# TODO(redo): enable when oauth2-proxy code is active
787+
# oauth2_proxy_enabled = len(self.mgr.cache.get_daemons_by_service('oauth2-proxy')) > 0
788+
mgmt_gw_enabled = len(self.cache.get_daemons_by_service('mgmt-gateway')) > 0
789+
security_enabled = self.secure_monitoring_stack or mgmt_gw_enabled
790+
return security_enabled, mgmt_gw_enabled
791+
774792
def _get_cephadm_binary_path(self) -> str:
775793
import hashlib
776794
m = hashlib.sha256()
@@ -2611,9 +2629,6 @@ def remove_service(self, service_name: str, force: bool = False) -> str:
26112629
raise OrchestratorError(
26122630
f'If {service_name} is removed then the following OSDs will remain, --force to proceed anyway\n{msg}')
26132631

2614-
if service_name == 'mgmt-gateway':
2615-
self.set_module_option('secure_monitoring_stack', False)
2616-
26172632
found = self.spec_store.rm(service_name)
26182633
if found and service_name.startswith('osd.'):
26192634
self.spec_store.finally_rm(service_name)
@@ -2943,21 +2958,26 @@ def get_daemon_names(daemons: List[str]) -> List[str]:
29432958
# add dependency on ceph-exporter daemons
29442959
deps += [d.name() for d in self.cache.get_daemons_by_service('ceph-exporter')]
29452960
deps += [d.name() for d in self.cache.get_daemons_by_service('mgmt-gateway')]
2946-
if self.secure_monitoring_stack:
2961+
security_enabled, _ = self._get_security_config()
2962+
if security_enabled:
29472963
if prometheus_user and prometheus_password:
29482964
deps.append(f'{hash(prometheus_user + prometheus_password)}')
29492965
if alertmanager_user and alertmanager_password:
29502966
deps.append(f'{hash(alertmanager_user + alertmanager_password)}')
29512967
elif daemon_type == 'grafana':
29522968
deps += get_daemon_names(['prometheus', 'loki', 'mgmt-gateway'])
2953-
if self.secure_monitoring_stack and prometheus_user and prometheus_password:
2969+
security_enabled, _ = self._get_security_config()
2970+
if security_enabled and prometheus_user and prometheus_password:
29542971
deps.append(f'{hash(prometheus_user + prometheus_password)}')
29552972
elif daemon_type == 'alertmanager':
29562973
deps += get_daemon_names(['mgr', 'alertmanager', 'snmp-gateway', 'mgmt-gateway'])
2957-
if self.secure_monitoring_stack and alertmanager_user and alertmanager_password:
2974+
security_enabled, _ = self._get_security_config()
2975+
if security_enabled and alertmanager_user and alertmanager_password:
29582976
deps.append(f'{hash(alertmanager_user + alertmanager_password)}')
29592977
elif daemon_type == 'promtail':
29602978
deps += get_daemon_names(['loki'])
2979+
elif daemon_type in ['ceph-exporter', 'node-exporter']:
2980+
deps += get_daemon_names(['mgmt-gateway'])
29612981
elif daemon_type == JaegerAgentService.TYPE:
29622982
for dd in self.cache.get_daemons_by_type(JaegerCollectorService.TYPE):
29632983
assert dd.hostname is not None
@@ -2972,7 +2992,7 @@ def get_daemon_names(daemons: List[str]) -> List[str]:
29722992
# this daemon type doesn't need deps mgmt
29732993
pass
29742994

2975-
if daemon_type in ['prometheus', 'node-exporter', 'alertmanager', 'grafana', 'mgmt-gateway']:
2995+
if daemon_type in ['prometheus', 'node-exporter', 'alertmanager', 'grafana']:
29762996
deps.append(f'secure_monitoring_stack:{self.secure_monitoring_stack}')
29772997

29782998
return sorted(deps)
@@ -3088,10 +3108,17 @@ def _get_prometheus_credentials(self) -> Tuple[str, str]:
30883108

30893109
@handle_orch_error
30903110
def generate_certificates(self, module_name: str) -> Optional[Dict[str, str]]:
3111+
import socket
30913112
supported_moduels = ['dashboard', 'prometheus']
30923113
if module_name not in supported_moduels:
30933114
raise OrchestratorError(f'Unsupported modlue {module_name}. Supported moduels are: {supported_moduels}')
3094-
cert, key = self.cert_mgr.generate_cert(self.get_hostname(), self.get_mgr_ip())
3115+
3116+
host_fqdns = [socket.getfqdn(self.get_hostname())]
3117+
node_ip = self.get_mgr_ip()
3118+
if module_name == 'dashboard':
3119+
host_fqdns.append('dashboard_servers')
3120+
3121+
cert, key = self.cert_mgr.generate_cert(host_fqdns, node_ip)
30953122
return {'cert': cert, 'key': key}
30963123

30973124
@handle_orch_error
@@ -3148,13 +3175,19 @@ def set_alertmanager_access_info(self, user: str, password: str) -> str:
31483175

31493176
@handle_orch_error
31503177
def get_prometheus_access_info(self) -> Dict[str, str]:
3178+
security_enabled, _ = self._get_security_config()
3179+
if not security_enabled:
3180+
return {}
31513181
user, password = self._get_prometheus_credentials()
31523182
return {'user': user,
31533183
'password': password,
31543184
'certificate': self.cert_mgr.get_root_ca()}
31553185

31563186
@handle_orch_error
31573187
def get_alertmanager_access_info(self) -> Dict[str, str]:
3188+
security_enabled, _ = self._get_security_config()
3189+
if not security_enabled:
3190+
return {}
31583191
user, password = self._get_alertmanager_credentials()
31593192
return {'user': user,
31603193
'password': password,
@@ -3403,9 +3436,6 @@ def _apply_service_spec(self, spec: ServiceSpec) -> str:
34033436
host_count = len(self.inventory.keys())
34043437
max_count = self.max_count_per_host
34053438

3406-
if spec.service_type == 'mgmt-gateway':
3407-
self.set_module_option('secure_monitoring_stack', True)
3408-
34093439
if spec.placement.count is not None:
34103440
if spec.service_type in ['mon', 'mgr']:
34113441
if spec.placement.count > max(5, host_count):

src/pybind/mgr/cephadm/services/cephadmservice.py

Lines changed: 20 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -90,7 +90,7 @@ def get_dashboard_endpoints(svc: 'CephadmService') -> Tuple[List[str], Optional[
9090
if not port:
9191
continue
9292
assert dd.hostname is not None
93-
addr = svc._inventory_get_fqdn(dd.hostname)
93+
addr = svc.mgr.get_fqdn(dd.hostname)
9494
dashboard_endpoints.append(f'{addr}:{port}')
9595

9696
return dashboard_endpoints, protocol
@@ -124,7 +124,7 @@ def get_dashboard_urls(svc: 'CephadmService') -> List[str]:
124124
if dd.daemon_id == svc.mgr.get_mgr_id():
125125
continue
126126
assert dd.hostname is not None
127-
addr = svc._inventory_get_fqdn(dd.hostname)
127+
addr = svc.mgr.get_fqdn(dd.hostname)
128128
dashboard_urls.append(build_url(scheme=proto, host=addr, port=port).rstrip('/'))
129129

130130
return dashboard_urls
@@ -384,15 +384,6 @@ def get_keyring_with_caps(self, entity: AuthEntity, caps: List[str]) -> str:
384384
raise OrchestratorError(f"Unable to fetch keyring for {entity}: {err}")
385385
return simplified_keyring(entity, keyring)
386386

387-
def _inventory_get_fqdn(self, hostname: str) -> str:
388-
"""Get a host's FQDN with its hostname.
389-
390-
If the FQDN can't be resolved, the address from the inventory will
391-
be returned instead.
392-
"""
393-
addr = self.mgr.inventory.get_addr(hostname)
394-
return socket.getfqdn(addr)
395-
396387
def _set_value_on_dashboard(self,
397388
service_name: str,
398389
get_mon_cmd: str,
@@ -1282,11 +1273,29 @@ def prepare_create(self, daemon_spec: CephadmDaemonDeploySpec) -> CephadmDaemonD
12821273
if spec.stats_period:
12831274
exporter_config.update({'stats-period': f'{spec.stats_period}'})
12841275

1276+
security_enabled, mgmt_gw_enabled = self.mgr._get_security_config()
1277+
if security_enabled:
1278+
exporter_config.update({'https_enabled': True})
1279+
crt, key = self.get_certificates(daemon_spec)
1280+
exporter_config['files'] = {
1281+
'ceph-exporter.crt': crt,
1282+
'ceph-exporter.key': key
1283+
}
12851284
daemon_spec.keyring = keyring
12861285
daemon_spec.final_config, daemon_spec.deps = self.generate_config(daemon_spec)
12871286
daemon_spec.final_config = merge_dicts(daemon_spec.final_config, exporter_config)
1287+
1288+
deps = []
1289+
deps += [d.name() for d in self.mgr.cache.get_daemons_by_service('mgmt-gateway')]
1290+
deps += [f'secure_monitoring_stack:{self.mgr.secure_monitoring_stack}']
1291+
daemon_spec.deps = deps
1292+
12881293
return daemon_spec
12891294

1295+
def get_certificates(self, daemon_spec: CephadmDaemonDeploySpec) -> Tuple[str, str]:
1296+
node_ip = self.mgr.inventory.get_addr(daemon_spec.host)
1297+
host_fqdn = self.mgr.get_fqdn(daemon_spec.host)
1298+
return self.mgr.cert_mgr.generate_cert(host_fqdn, node_ip)
12901299

12911300
class CephfsMirrorService(CephService):
12921301
TYPE = 'cephfs-mirror'

0 commit comments

Comments
 (0)