Skip to content

Commit 25a4f2a

Browse files
committed
mgr/cephadm: introducing cert_mgr new class to centralize certs mgmt
cert_mgr will be the unique responsible of managing all certificates generated and maintained by cephadm. Cephadm in addition now provides a new cmd to generate certificates for external modules. Signed-off-by: Redouane Kachach <[email protected]>
1 parent 81a2583 commit 25a4f2a

19 files changed

+174
-302
lines changed

src/pybind/mgr/cephadm/agent.py

Lines changed: 22 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,6 @@ class Server: # type: ignore
1010
import logging
1111
import socket
1212
import ssl
13-
import tempfile
1413
import threading
1514
import time
1615

@@ -20,11 +19,12 @@ class Server: # type: ignore
2019
from ceph.deployment.inventory import Devices
2120
from ceph.deployment.service_spec import ServiceSpec, PlacementSpec
2221
from cephadm.services.cephadmservice import CephadmDaemonDeploySpec
23-
from cephadm.ssl_cert_utils import SSLCerts
2422
from mgr_util import test_port_allocation, PortAlreadyInUse
23+
from mgr_util import verify_tls_files
24+
import tempfile
2525

2626
from urllib.error import HTTPError, URLError
27-
from typing import Any, Dict, List, Set, TYPE_CHECKING, Optional, MutableMapping
27+
from typing import Any, Dict, List, Set, TYPE_CHECKING, Optional, MutableMapping, IO
2828

2929
if TYPE_CHECKING:
3030
from cephadm.module import CephadmOrchestrator
@@ -46,9 +46,10 @@ class AgentEndpoint:
4646

4747
def __init__(self, mgr: "CephadmOrchestrator") -> None:
4848
self.mgr = mgr
49-
self.ssl_certs = SSLCerts()
5049
self.server_port = 7150
5150
self.server_addr = self.mgr.get_mgr_ip()
51+
self.key_file: IO[bytes]
52+
self.cert_file: IO[bytes]
5253

5354
def configure_routes(self) -> None:
5455
conf = {'/': {'tools.trailing_slash.on': False}}
@@ -57,19 +58,19 @@ def configure_routes(self) -> None:
5758
cherrypy.tree.mount(self.node_proxy_endpoint, '/node-proxy', config=conf)
5859

5960
def configure_tls(self, server: Server) -> None:
60-
old_cert = self.mgr.cert_key_store.get_cert('agent_endpoint_root_cert')
61-
old_key = self.mgr.cert_key_store.get_key('agent_endpoint_key')
61+
addr = self.mgr.get_mgr_ip()
62+
host = self.mgr.get_hostname()
63+
cert, key = self.mgr.cert_mgr.generate_cert(host, addr)
64+
self.cert_file = tempfile.NamedTemporaryFile()
65+
self.cert_file.write(cert.encode('utf-8'))
66+
self.cert_file.flush() # cert_tmp must not be gc'ed
6267

63-
if old_cert and old_key:
64-
self.ssl_certs.load_root_credentials(old_cert, old_key)
65-
else:
66-
self.ssl_certs.generate_root_cert(self.mgr.get_mgr_ip())
67-
self.mgr.cert_key_store.save_cert('agent_endpoint_root_cert', self.ssl_certs.get_root_cert())
68-
self.mgr.cert_key_store.save_key('agent_endpoint_key', self.ssl_certs.get_root_key())
68+
self.key_file = tempfile.NamedTemporaryFile()
69+
self.key_file.write(key.encode('utf-8'))
70+
self.key_file.flush() # pkey_tmp must not be gc'ed
6971

70-
host = self.mgr.get_hostname()
71-
addr = self.mgr.get_mgr_ip()
72-
server.ssl_certificate, server.ssl_private_key = self.ssl_certs.generate_cert_files(host, addr)
72+
verify_tls_files(self.cert_file.name, self.key_file.name)
73+
server.ssl_certificate, server.ssl_private_key = self.cert_file.name, self.key_file.name
7374

7475
def find_free_port(self) -> None:
7576
max_port = self.server_port + 150
@@ -94,7 +95,7 @@ def configure(self) -> None:
9495
class NodeProxyEndpoint:
9596
def __init__(self, mgr: "CephadmOrchestrator"):
9697
self.mgr = mgr
97-
self.ssl_root_crt = self.mgr.http_server.agent.ssl_certs.get_root_cert()
98+
self.ssl_root_crt = self.mgr.cert_mgr.get_root_ca()
9899
self.ssl_ctx = ssl.SSLContext(ssl.PROTOCOL_TLS_CLIENT)
99100
self.ssl_ctx.check_hostname = False
100101
self.ssl_ctx.verify_mode = ssl.CERT_NONE
@@ -301,7 +302,7 @@ def led(self, **kw: Any) -> Dict[str, Any]:
301302
endpoint: List[Any] = ['led', led_type]
302303
device: str = id_drive if id_drive else ''
303304

304-
ssl_root_crt = self.mgr.http_server.agent.ssl_certs.get_root_cert()
305+
ssl_root_crt = self.mgr.cert_mgr.get_root_ca()
305306
ssl_ctx = ssl.create_default_context()
306307
ssl_ctx.check_hostname = True
307308
ssl_ctx.verify_mode = ssl.CERT_REQUIRED
@@ -774,14 +775,13 @@ def run(self) -> None:
774775
self.mgr.agent_cache.sending_agent_message[self.host] = True
775776
try:
776777
assert self.agent
777-
root_cert = self.agent.ssl_certs.get_root_cert()
778+
root_cert = self.mgr.cert_mgr.get_root_ca()
778779
root_cert_tmp = tempfile.NamedTemporaryFile()
779780
root_cert_tmp.write(root_cert.encode('utf-8'))
780781
root_cert_tmp.flush()
781782
root_cert_fname = root_cert_tmp.name
782783

783-
cert, key = self.agent.ssl_certs.generate_cert(
784-
self.mgr.get_hostname(), self.mgr.get_mgr_ip())
784+
cert, key = self.mgr.cert_mgr.generate_cert(self.mgr.get_hostname(), self.mgr.get_mgr_ip())
785785

786786
cert_tmp = tempfile.NamedTemporaryFile()
787787
cert_tmp.write(cert.encode('utf-8'))
@@ -950,7 +950,7 @@ def _check_agent(self, host: str) -> bool:
950950
down = False
951951
try:
952952
assert self.agent
953-
assert self.agent.ssl_certs.get_root_cert()
953+
assert self.mgr.cert_mgr.get_root_ca()
954954
except Exception:
955955
self.mgr.log.debug(
956956
f'Delaying checking agent on {host} until cephadm endpoint finished creating root cert')
@@ -974,7 +974,7 @@ def _check_agent(self, host: str) -> bool:
974974
# so it's necessary to check this one specifically
975975
root_cert_match = False
976976
try:
977-
root_cert = self.agent.ssl_certs.get_root_cert()
977+
root_cert = self.mgr.cert_mgr.get_root_ca()
978978
if last_deps and root_cert in last_deps:
979979
root_cert_match = True
980980
except Exception:

src/pybind/mgr/cephadm/cert_mgr.py

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,43 @@
1+
2+
from cephadm.ssl_cert_utils import SSLCerts
3+
from threading import Lock
4+
from typing import TYPE_CHECKING, Tuple, Union, List
5+
6+
if TYPE_CHECKING:
7+
from cephadm.module import CephadmOrchestrator
8+
9+
10+
class CertMgr:
11+
12+
CEPHADM_ROOT_CA_CERT = 'cephadm_root_ca_cert'
13+
CEPHADM_ROOT_CA_KEY = 'cephadm_root_ca_key'
14+
15+
def __init__(self, mgr: "CephadmOrchestrator", ip: str) -> None:
16+
self.lock = Lock()
17+
self.initialized = False
18+
with self.lock:
19+
if self.initialized:
20+
return
21+
self.initialized = True
22+
self.mgr = mgr
23+
self.ssl_certs: SSLCerts = SSLCerts()
24+
old_cert = self.mgr.cert_key_store.get_cert(self.CEPHADM_ROOT_CA_CERT)
25+
old_key = self.mgr.cert_key_store.get_key(self.CEPHADM_ROOT_CA_KEY)
26+
if old_key and old_cert:
27+
self.ssl_certs.load_root_credentials(old_cert, old_key)
28+
else:
29+
self.ssl_certs.generate_root_cert(ip)
30+
self.mgr.cert_key_store.save_cert(self.CEPHADM_ROOT_CA_CERT, self.ssl_certs.get_root_cert())
31+
self.mgr.cert_key_store.save_key(self.CEPHADM_ROOT_CA_KEY, self.ssl_certs.get_root_key())
32+
33+
def get_root_ca(self) -> str:
34+
with self.lock:
35+
if self.initialized:
36+
return self.ssl_certs.get_root_cert()
37+
raise Exception("Not initialized")
38+
39+
def generate_cert(self, host_fqdn: Union[str, List[str]], node_ip: str) -> Tuple[str, str]:
40+
with self.lock:
41+
if self.initialized:
42+
return self.ssl_certs.generate_cert(host_fqdn, node_ip)
43+
raise Exception("Not initialized")

src/pybind/mgr/cephadm/inventory.py

Lines changed: 2 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -1912,16 +1912,10 @@ class CertKeyStore():
19121912

19131913
host_cert = [
19141914
'grafana_cert',
1915-
'alertmanager_cert',
1916-
'prometheus_cert',
1917-
'node_exporter_cert',
19181915
]
19191916

19201917
host_key = [
19211918
'grafana_key',
1922-
'alertmanager_key',
1923-
'prometheus_key',
1924-
'node_exporter_key',
19251919
]
19261920

19271921
service_name_key = [
@@ -1951,22 +1945,15 @@ def _init_known_cert_key_dicts(self) -> None:
19511945
'agent_endpoint_root_cert': Cert(), # cert
19521946
'mgmt_gw_root_cert': Cert(), # cert
19531947
'service_discovery_root_cert': Cert(), # cert
1948+
'cephadm_root_ca_cert': Cert(), # cert
19541949
'grafana_cert': {}, # host -> cert
1955-
'alertmanager_cert': {}, # host -> cert
1956-
'prometheus_cert': {}, # host -> cert
1957-
'node_exporter_cert': {}, # host -> cert
19581950
}
19591951
# Similar to certs but for priv keys. Entries in known_certs
19601952
# that don't have a key here are probably certs in PEM format
19611953
# so there is no need to store a separate key
19621954
self.known_keys = {
1963-
'agent_endpoint_key': PrivKey(), # key
1964-
'service_discovery_key': PrivKey(), # key
1965-
'mgmt_gw_root_key': PrivKey(), # cert
1955+
'cephadm_root_ca_key': PrivKey(), # cert
19661956
'grafana_key': {}, # host -> key
1967-
'alertmanager_key': {}, # host -> key
1968-
'prometheus_key': {}, # host -> key
1969-
'node_exporter_key': {}, # host -> key
19701957
'iscsi_ssl_key': {}, # service-name -> key
19711958
'ingress_ssl_key': {}, # service-name -> key
19721959
'nvmeof_server_key': {}, # service-name -> key

src/pybind/mgr/cephadm/migrations.py

Lines changed: 0 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -421,32 +421,6 @@ def migrate_6_7(self) -> bool:
421421
logger.info(f'Migrating certs/keys for {spec.service_name()} spec to cert store')
422422
self.mgr.spec_store._save_certs_and_keys(spec)
423423

424-
# Migrate service discovery and agent endpoint certs
425-
# These constants were taken from where these certs were
426-
# originally generated and should be the location they
427-
# were store at prior to the cert store
428-
KV_STORE_AGENT_ROOT_CERT = 'cephadm_agent/root/cert'
429-
KV_STORE_AGENT_ROOT_KEY = 'cephadm_agent/root/key'
430-
KV_STORE_SD_ROOT_CERT = 'service_discovery/root/cert'
431-
KV_STORE_SD_ROOT_KEY = 'service_discovery/root/key'
432-
433-
agent_endpoint_cert = self.mgr.get_store(KV_STORE_AGENT_ROOT_CERT)
434-
if agent_endpoint_cert:
435-
logger.info('Migrating agent root cert to cert store')
436-
self.mgr.cert_key_store.save_cert('agent_endpoint_root_cert', agent_endpoint_cert)
437-
agent_endpoint_key = self.mgr.get_store(KV_STORE_AGENT_ROOT_KEY)
438-
if agent_endpoint_key:
439-
logger.info('Migrating agent root key to cert store')
440-
self.mgr.cert_key_store.save_key('agent_endpoint_key', agent_endpoint_key)
441-
service_discovery_cert = self.mgr.get_store(KV_STORE_SD_ROOT_CERT)
442-
if service_discovery_cert:
443-
logger.info('Migrating service discovery cert to cert store')
444-
self.mgr.cert_key_store.save_cert('service_discovery_root_cert', service_discovery_cert)
445-
service_discovery_key = self.mgr.get_store(KV_STORE_SD_ROOT_KEY)
446-
if service_discovery_key:
447-
logger.info('Migrating service discovery key to cert store')
448-
self.mgr.cert_key_store.save_key('service_discovery_key', service_discovery_key)
449-
450424
# grafana certs are stored based on the host they are placed on
451425
for grafana_daemon in self.mgr.cache.get_daemons_by_type('grafana'):
452426
logger.info(f'Checking for cert/key for {grafana_daemon.name()}')

src/pybind/mgr/cephadm/module.py

Lines changed: 12 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
from threading import Event
1616

1717
from ceph.deployment.service_spec import PrometheusSpec
18+
from cephadm.cert_mgr import CertMgr
1819

1920
import string
2021
from typing import List, Dict, Optional, Callable, Tuple, TypeVar, \
@@ -538,11 +539,11 @@ def __init__(self, *args: Any, **kwargs: Any):
538539
super(CephadmOrchestrator, self).__init__(*args, **kwargs)
539540
self._cluster_fsid: str = self.get('mon_map')['fsid']
540541
self.last_monmap: Optional[datetime.datetime] = None
542+
self.cert_mgr = CertMgr(self, self.get_mgr_ip())
541543

542544
# for serve()
543545
self.run = True
544546
self.event = Event()
545-
546547
self.ssh = ssh.SSHManager(self)
547548

548549
if self.get_store('pause'):
@@ -2609,6 +2610,9 @@ def remove_service(self, service_name: str, force: bool = False) -> str:
26092610
raise OrchestratorError(
26102611
f'If {service_name} is removed then the following OSDs will remain, --force to proceed anyway\n{msg}')
26112612

2613+
if service_name == 'mgmt-gateway':
2614+
self.set_module_option('secure_monitoring_stack', False)
2615+
26122616
found = self.spec_store.rm(service_name)
26132617
if found and service_name.startswith('osd.'):
26142618
self.spec_store.finally_rm(service_name)
@@ -2899,7 +2903,7 @@ def get_daemon_names(daemons: List[str]) -> List[str]:
28992903
server_port = ''
29002904
try:
29012905
server_port = str(self.http_server.agent.server_port)
2902-
root_cert = self.http_server.agent.ssl_certs.get_root_cert()
2906+
root_cert = self.cert_mgr.get_root_ca()
29032907
except Exception:
29042908
pass
29052909
deps = sorted([self.get_mgr_ip(), server_port, root_cert,
@@ -2909,7 +2913,7 @@ def get_daemon_names(daemons: List[str]) -> List[str]:
29092913
server_port = ''
29102914
try:
29112915
server_port = str(self.http_server.agent.server_port)
2912-
root_cert = self.http_server.agent.ssl_certs.get_root_cert()
2916+
root_cert = self.cert_mgr.get_root_ca()
29132917
except Exception:
29142918
pass
29152919
deps = sorted([self.get_mgr_ip(), server_port, root_cert])
@@ -3138,14 +3142,14 @@ def get_prometheus_access_info(self) -> Dict[str, str]:
31383142
user, password = self._get_prometheus_credentials()
31393143
return {'user': user,
31403144
'password': password,
3141-
'certificate': self.http_server.service_discovery.ssl_certs.get_root_cert()}
3145+
'certificate': self.cert_mgr.get_root_ca()}
31423146

31433147
@handle_orch_error
31443148
def get_alertmanager_access_info(self) -> Dict[str, str]:
31453149
user, password = self._get_alertmanager_credentials()
31463150
return {'user': user,
31473151
'password': password,
3148-
'certificate': self.http_server.service_discovery.ssl_certs.get_root_cert()}
3152+
'certificate': self.cert_mgr.get_root_ca()}
31493153

31503154
@handle_orch_error
31513155
def cert_store_cert_ls(self) -> Dict[str, Any]:
@@ -3397,6 +3401,9 @@ def _apply_service_spec(self, spec: ServiceSpec) -> str:
33973401
host_count = len(self.inventory.keys())
33983402
max_count = self.max_count_per_host
33993403

3404+
if spec.service_type == 'mgmt-gateway':
3405+
self.set_module_option('secure_monitoring_stack', True)
3406+
34003407
if spec.placement.count is not None:
34013408
if spec.service_type in ['mon', 'mgr']:
34023409
if spec.placement.count > max(5, host_count):

src/pybind/mgr/cephadm/serve.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -702,7 +702,7 @@ def _apply_service(self, spec: ServiceSpec) -> bool:
702702
if service_type == 'agent':
703703
try:
704704
assert self.mgr.http_server.agent
705-
assert self.mgr.http_server.agent.ssl_certs.get_root_cert()
705+
assert self.mgr.cert_mgr.get_root_ca()
706706
except Exception:
707707
self.log.info(
708708
'Delaying applying agent spec until cephadm endpoint root cert created')

src/pybind/mgr/cephadm/service_discovery.py

Lines changed: 18 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -7,17 +7,17 @@ class Server: # type: ignore
77
pass
88

99
import logging
10-
import socket
1110

1211
import orchestrator # noqa
1312
from mgr_module import ServiceInfoT
1413
from mgr_util import build_url
15-
from typing import Dict, List, TYPE_CHECKING, cast, Collection, Callable, NamedTuple, Optional
14+
from typing import Dict, List, TYPE_CHECKING, cast, Collection, Callable, NamedTuple, Optional, IO
1615
from cephadm.services.monitoring import AlertmanagerService, NodeExporterService, PrometheusService
1716
import secrets
17+
from mgr_util import verify_tls_files
18+
import tempfile
1819

1920
from cephadm.services.ingress import IngressSpec
20-
from cephadm.ssl_cert_utils import SSLCerts
2121
from cephadm.services.cephadmservice import CephExporterService
2222
from cephadm.services.nvmeof import NvmeofService
2323

@@ -47,9 +47,10 @@ class ServiceDiscovery:
4747

4848
def __init__(self, mgr: "CephadmOrchestrator") -> None:
4949
self.mgr = mgr
50-
self.ssl_certs = SSLCerts()
5150
self.username: Optional[str] = None
5251
self.password: Optional[str] = None
52+
self.key_file: IO[bytes]
53+
self.cert_file: IO[bytes]
5354

5455
def validate_password(self, realm: str, username: str, password: str) -> bool:
5556
return (password == self.password and username == self.username)
@@ -86,18 +87,20 @@ def enable_auth(self) -> None:
8687
self.mgr.set_store('service_discovery/root/username', self.username)
8788

8889
def configure_tls(self, server: Server) -> None:
89-
old_cert = self.mgr.cert_key_store.get_cert('service_discovery_root_cert')
90-
old_key = self.mgr.cert_key_store.get_key('service_discovery_key')
91-
if old_key and old_cert:
92-
self.ssl_certs.load_root_credentials(old_cert, old_key)
93-
else:
94-
self.ssl_certs.generate_root_cert(self.mgr.get_mgr_ip())
95-
self.mgr.cert_key_store.save_cert('service_discovery_root_cert', self.ssl_certs.get_root_cert())
96-
self.mgr.cert_key_store.save_key('service_discovery_key', self.ssl_certs.get_root_key())
9790
addr = self.mgr.get_mgr_ip()
98-
host_fqdn = socket.getfqdn(addr)
99-
server.ssl_certificate, server.ssl_private_key = self.ssl_certs.generate_cert_files(
100-
host_fqdn, addr)
91+
host = self.mgr.get_hostname()
92+
cert, key = self.mgr.cert_mgr.generate_cert(host, addr)
93+
self.cert_file = tempfile.NamedTemporaryFile()
94+
self.cert_file.write(cert.encode('utf-8'))
95+
self.cert_file.flush() # cert_tmp must not be gc'ed
96+
97+
self.key_file = tempfile.NamedTemporaryFile()
98+
self.key_file.write(key.encode('utf-8'))
99+
self.key_file.flush() # pkey_tmp must not be gc'ed
100+
101+
verify_tls_files(self.cert_file.name, self.key_file.name)
102+
103+
server.ssl_certificate, server.ssl_private_key = self.cert_file.name, self.key_file.name
101104

102105
def configure(self, port: int, addr: str, enable_security: bool) -> None:
103106
# we create a new server to enforce TLS/SSL config refresh

0 commit comments

Comments
 (0)