Skip to content

Commit b9f81a6

Browse files
committed
mgr/cephadm: add tombstones to persist certs info after mgr failover
Runtime-added TLS objects names were lost across mgr restarts/failovers since they existed only in memory. We now write a tombstone to the KV store whenever a new certificate is registered (empty map for service/host scope; minimal JSON for global), so the object name is restored during load(). Fixes: https://tracker.ceph.com/issues/73625 Signed-off-by: Redouane Kachach <[email protected]>
1 parent d06d06f commit b9f81a6

File tree

2 files changed

+132
-2
lines changed

2 files changed

+132
-2
lines changed

src/pybind/mgr/cephadm/tests/test_certmgr.py

Lines changed: 104 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1162,3 +1162,107 @@ def test_validate_tlsobject_name(self):
11621162
self.store._validate_tlsobject_name("per_host1")
11631163
with self.assertRaises(TLSObjectException):
11641164
self.store._validate_tlsobject_name("per_service1")
1165+
1166+
def test_register_does_not_double_write_on_second_call(self):
1167+
"""
1168+
Calling register_object_name() twice for the same name should not create a new KV value
1169+
nor change the existing one (basic idempotency).
1170+
"""
1171+
name = "per_service_twice"
1172+
key = f"{self.store.store_prefix}{name}"
1173+
1174+
# First register → writes {}
1175+
self.store.register_object_name(name, TLSObjectScope.SERVICE)
1176+
first_val = self.mgr.store.get(key)
1177+
assert first_val is not None
1178+
assert json.loads(first_val) == {}
1179+
1180+
# Second register → value should remain exactly the same
1181+
self.store.register_object_name(name, TLSObjectScope.SERVICE)
1182+
second_val = self.mgr.store.get(key)
1183+
assert second_val == first_val
1184+
1185+
def test_register_writes_tombstone_for_all_scopes(self):
1186+
"""
1187+
register_object_name() must persist a tombstone for any scope:
1188+
- SERVICE/HOST → "{}" (empty per-target map)
1189+
- GLOBAL → minimal JSON for an empty TLS object
1190+
"""
1191+
# Fresh names (not present in objects_by_name yet)
1192+
svc_name = "per_service_new"
1193+
host_name = "per_host_new"
1194+
glob_name = "global_cert_new"
1195+
1196+
svc_key = f"{self.store.store_prefix}{svc_name}"
1197+
host_key = f"{self.store.store_prefix}{host_name}"
1198+
glob_key = f"{self.store.store_prefix}{glob_name}"
1199+
1200+
# Sanity: no keys yet
1201+
assert svc_key not in self.mgr.store
1202+
assert host_key not in self.mgr.store
1203+
assert glob_key not in self.mgr.store
1204+
1205+
# Act
1206+
self.store.register_object_name(svc_name, TLSObjectScope.SERVICE)
1207+
self.store.register_object_name(host_name, TLSObjectScope.HOST)
1208+
self.store.register_object_name(glob_name, TLSObjectScope.GLOBAL)
1209+
1210+
# Assert KV tombstones
1211+
assert svc_key in self.mgr.store
1212+
assert host_key in self.mgr.store
1213+
assert glob_key in self.mgr.store
1214+
1215+
# SERVICE/HOST → empty dict
1216+
assert json.loads(self.mgr.store[svc_key]) == {}
1217+
assert json.loads(self.mgr.store[host_key]) == {}
1218+
1219+
# GLOBAL → minimal JSON for empty MockTLSObject
1220+
expected_global_min = MockTLSObject.to_json(MockTLSObject())
1221+
assert json.loads(self.mgr.store[glob_key]) == expected_global_min
1222+
1223+
# Also assert scope lists updated & in-memory skeletons created
1224+
assert svc_name in self.store.service_scoped_objects
1225+
assert host_name in self.store.host_scoped_objects
1226+
assert glob_name in self.store.global_scoped_objects
1227+
1228+
assert isinstance(self.store.objects_by_name[svc_name], dict) and self.store.objects_by_name[svc_name] == {}
1229+
assert isinstance(self.store.objects_by_name[host_name], dict) and self.store.objects_by_name[host_name] == {}
1230+
# For globals, register() seeds an entry; it will be assigned the empty object later when loaded/saved.
1231+
# We only need to ensure the name exists in the registry.
1232+
assert glob_name in self.store.objects_by_name
1233+
1234+
def test_register_is_idempotent_and_does_not_overwrite_existing_kv(self):
1235+
"""
1236+
If a store key already has content, register_object_name() must not clobber it.
1237+
"""
1238+
# Pre-seed non-empty SERVICE and HOST entries, and a GLOBAL entry
1239+
svc_name = "per_service_existing"
1240+
host_name = "per_host_existing"
1241+
glob_name = "global_existing"
1242+
1243+
svc_key = f"{self.store.store_prefix}{svc_name}."
1244+
host_key = f"{self.store.store_prefix}{host_name}."
1245+
glob_key = f"{self.store.store_prefix}{glob_name}."
1246+
1247+
pre_svc_val = {"svcA": MockTLSObject.to_json(MockTLSObject("svc-seed", True, False))}
1248+
pre_host_val = {"hostA": MockTLSObject.to_json(MockTLSObject("host-seed", True, False))}
1249+
pre_glob_val = MockTLSObject.to_json(MockTLSObject("glob-seed", True, False))
1250+
1251+
self.mgr.set_store(svc_key, json.dumps(pre_svc_val))
1252+
self.mgr.set_store(host_key, json.dumps(pre_host_val))
1253+
self.mgr.set_store(glob_key, json.dumps(pre_glob_val))
1254+
1255+
# Act: register (should be a no-op on KV content)
1256+
self.store.register_object_name(svc_name, TLSObjectScope.SERVICE)
1257+
self.store.register_object_name(host_name, TLSObjectScope.HOST)
1258+
self.store.register_object_name(glob_name, TLSObjectScope.GLOBAL)
1259+
1260+
# Assert KV unchanged
1261+
assert json.loads(self.mgr.store[svc_key]) == pre_svc_val
1262+
assert json.loads(self.mgr.store[host_key]) == pre_host_val
1263+
assert json.loads(self.mgr.store[glob_key]) == pre_glob_val
1264+
1265+
# And registry entries exist
1266+
assert svc_name in self.store.objects_by_name
1267+
assert host_name in self.store.objects_by_name
1268+
assert glob_name in self.store.objects_by_name

src/pybind/mgr/cephadm/tlsobject_store.py

Lines changed: 28 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,25 @@ def _kv_key(self, obj_name: str) -> str:
4949
def _set_store(self, obj_name: str, payload: Any) -> None:
5050
self.mgr.set_store(self._kv_key(obj_name), json.dumps(payload))
5151

52+
def ensure_tombstone(self, obj_name: str, scope: TLSObjectScope) -> None:
53+
"""
54+
Idempotently ensure a tombstone (empty) KV entry for obj_name so the
55+
name is rediscovered after manager restarts (see load method).
56+
57+
- SERVICE/HOST → `{}` (empty per-target map)
58+
- GLOBAL → minimal JSON for an empty TLS object
59+
"""
60+
61+
# Do nothing if the TLS object name already exists in the store
62+
if self.mgr.get_store_prefix(self._kv_key(obj_name)):
63+
return
64+
65+
if scope in (TLSObjectScope.SERVICE, TLSObjectScope.HOST):
66+
self._set_store(obj_name, {})
67+
elif scope == TLSObjectScope.GLOBAL:
68+
empty = self.tlsobject_class() # falsy empty instance
69+
self._set_store(obj_name, self.tlsobject_class.to_json(empty))
70+
5271
def register_object_name(self, obj_name: str, scope: TLSObjectScope) -> None:
5372
"""
5473
Register a new TLS object name under the specified scope if it does not already exist.
@@ -59,8 +78,15 @@ def register_object_name(self, obj_name: str, scope: TLSObjectScope) -> None:
5978
ValueError: If an invalid scope is provided.
6079
"""
6180
if obj_name not in self.objects_by_name:
62-
# Initialize an empty dictionary to track TLS objects for this obj_name
63-
self.objects_by_name[obj_name] = {}
81+
# Initialize an empty dictionary/TLSobj to track TLS objects for this obj_name
82+
if scope in (TLSObjectScope.SERVICE, TLSObjectScope.HOST):
83+
self.objects_by_name[obj_name] = {}
84+
elif scope == TLSObjectScope.GLOBAL:
85+
self.objects_by_name[obj_name] = self.tlsobject_class()
86+
else:
87+
raise ValueError(f"Invalid TLSObjectScope '{scope}' for obj_name '{obj_name}'")
88+
# Initialize an empty tombstone for the TLS object(s) in the store
89+
self.ensure_tombstone(obj_name, scope)
6490

6591
# Add to the appropriate scope list
6692
if scope == TLSObjectScope.SERVICE and obj_name not in self.service_scoped_objects:

0 commit comments

Comments
 (0)