Skip to content

Commit 77c93e6

Browse files
authored
Merge pull request ceph#66041 from rkachach/fix_issue_73625
mgr/cephadm: add tombstones to persist certs info after mgr failover Reviewed-by: Adam King <[email protected]>
2 parents 28307aa + b9f81a6 commit 77c93e6

File tree

3 files changed

+205
-37
lines changed

3 files changed

+205
-37
lines changed

src/pybind/mgr/cephadm/tests/test_certmgr.py

Lines changed: 110 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -773,13 +773,12 @@ def _fake_prefix_store(prefix):
773773
assert 'iscsi_ssl_key' in key_store
774774
assert isinstance(key_store['iscsi_ssl_key'], dict) and len(key_store['iscsi_ssl_key']) == 0
775775

776-
# Global-scoped key with bad JSON should also not be instantiated (no PrivKey object)
777-
# Depending on how you seed known names, it might be absent OR present but falsy.
778-
# Accept either: absent OR not a PrivKey instance.
779-
if 'mgmt_gateway_ssl_key' in key_store:
780-
assert not isinstance(key_store['mgmt_gateway_ssl_key'], PrivKey)
781-
else:
782-
assert 'mgmt_gateway_ssl_key' not in key_store
776+
# Global-scoped key with bad JSON should NOT be hydrated
777+
assert 'mgmt_gateway_ssl_key' in key_store
778+
obj = key_store.get('mgmt_gateway_ssl_key')
779+
# It should be a PrivKey (because globals are pre-seeded) but remain empty/falsy
780+
assert isinstance(obj, PrivKey)
781+
assert not bool(obj) # still the tombstone, not parsed content
783782

784783
messages = [r.getMessage() for r in caplog.records
785784
if r.levelno >= logging.WARNING and r.name == "cephadm.tlsobject_store"]
@@ -1163,3 +1162,107 @@ def test_validate_tlsobject_name(self):
11631162
self.store._validate_tlsobject_name("per_host1")
11641163
with self.assertRaises(TLSObjectException):
11651164
self.store._validate_tlsobject_name("per_service1")
1165+
1166+
def test_register_does_not_double_write_on_second_call(self):
1167+
"""
1168+
Calling register_object_name() twice for the same name should not create a new KV value
1169+
nor change the existing one (basic idempotency).
1170+
"""
1171+
name = "per_service_twice"
1172+
key = f"{self.store.store_prefix}{name}"
1173+
1174+
# First register → writes {}
1175+
self.store.register_object_name(name, TLSObjectScope.SERVICE)
1176+
first_val = self.mgr.store.get(key)
1177+
assert first_val is not None
1178+
assert json.loads(first_val) == {}
1179+
1180+
# Second register → value should remain exactly the same
1181+
self.store.register_object_name(name, TLSObjectScope.SERVICE)
1182+
second_val = self.mgr.store.get(key)
1183+
assert second_val == first_val
1184+
1185+
def test_register_writes_tombstone_for_all_scopes(self):
1186+
"""
1187+
register_object_name() must persist a tombstone for any scope:
1188+
- SERVICE/HOST → "{}" (empty per-target map)
1189+
- GLOBAL → minimal JSON for an empty TLS object
1190+
"""
1191+
# Fresh names (not present in objects_by_name yet)
1192+
svc_name = "per_service_new"
1193+
host_name = "per_host_new"
1194+
glob_name = "global_cert_new"
1195+
1196+
svc_key = f"{self.store.store_prefix}{svc_name}"
1197+
host_key = f"{self.store.store_prefix}{host_name}"
1198+
glob_key = f"{self.store.store_prefix}{glob_name}"
1199+
1200+
# Sanity: no keys yet
1201+
assert svc_key not in self.mgr.store
1202+
assert host_key not in self.mgr.store
1203+
assert glob_key not in self.mgr.store
1204+
1205+
# Act
1206+
self.store.register_object_name(svc_name, TLSObjectScope.SERVICE)
1207+
self.store.register_object_name(host_name, TLSObjectScope.HOST)
1208+
self.store.register_object_name(glob_name, TLSObjectScope.GLOBAL)
1209+
1210+
# Assert KV tombstones
1211+
assert svc_key in self.mgr.store
1212+
assert host_key in self.mgr.store
1213+
assert glob_key in self.mgr.store
1214+
1215+
# SERVICE/HOST → empty dict
1216+
assert json.loads(self.mgr.store[svc_key]) == {}
1217+
assert json.loads(self.mgr.store[host_key]) == {}
1218+
1219+
# GLOBAL → minimal JSON for empty MockTLSObject
1220+
expected_global_min = MockTLSObject.to_json(MockTLSObject())
1221+
assert json.loads(self.mgr.store[glob_key]) == expected_global_min
1222+
1223+
# Also assert scope lists updated & in-memory skeletons created
1224+
assert svc_name in self.store.service_scoped_objects
1225+
assert host_name in self.store.host_scoped_objects
1226+
assert glob_name in self.store.global_scoped_objects
1227+
1228+
assert isinstance(self.store.objects_by_name[svc_name], dict) and self.store.objects_by_name[svc_name] == {}
1229+
assert isinstance(self.store.objects_by_name[host_name], dict) and self.store.objects_by_name[host_name] == {}
1230+
# For globals, register() seeds an entry; it will be assigned the empty object later when loaded/saved.
1231+
# We only need to ensure the name exists in the registry.
1232+
assert glob_name in self.store.objects_by_name
1233+
1234+
def test_register_is_idempotent_and_does_not_overwrite_existing_kv(self):
1235+
"""
1236+
If a store key already has content, register_object_name() must not clobber it.
1237+
"""
1238+
# Pre-seed non-empty SERVICE and HOST entries, and a GLOBAL entry
1239+
svc_name = "per_service_existing"
1240+
host_name = "per_host_existing"
1241+
glob_name = "global_existing"
1242+
1243+
svc_key = f"{self.store.store_prefix}{svc_name}."
1244+
host_key = f"{self.store.store_prefix}{host_name}."
1245+
glob_key = f"{self.store.store_prefix}{glob_name}."
1246+
1247+
pre_svc_val = {"svcA": MockTLSObject.to_json(MockTLSObject("svc-seed", True, False))}
1248+
pre_host_val = {"hostA": MockTLSObject.to_json(MockTLSObject("host-seed", True, False))}
1249+
pre_glob_val = MockTLSObject.to_json(MockTLSObject("glob-seed", True, False))
1250+
1251+
self.mgr.set_store(svc_key, json.dumps(pre_svc_val))
1252+
self.mgr.set_store(host_key, json.dumps(pre_host_val))
1253+
self.mgr.set_store(glob_key, json.dumps(pre_glob_val))
1254+
1255+
# Act: register (should be a no-op on KV content)
1256+
self.store.register_object_name(svc_name, TLSObjectScope.SERVICE)
1257+
self.store.register_object_name(host_name, TLSObjectScope.HOST)
1258+
self.store.register_object_name(glob_name, TLSObjectScope.GLOBAL)
1259+
1260+
# Assert KV unchanged
1261+
assert json.loads(self.mgr.store[svc_key]) == pre_svc_val
1262+
assert json.loads(self.mgr.store[host_key]) == pre_host_val
1263+
assert json.loads(self.mgr.store[glob_key]) == pre_glob_val
1264+
1265+
# And registry entries exist
1266+
assert svc_name in self.store.objects_by_name
1267+
assert host_name in self.store.objects_by_name
1268+
assert glob_name in self.store.objects_by_name

src/pybind/mgr/cephadm/tlsobject_store.py

Lines changed: 94 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,5 @@
11
from typing import (Any,
22
Dict,
3-
Union,
43
List,
54
Tuple,
65
Optional,
@@ -33,12 +32,41 @@ def __init__(self, mgr: 'CephadmOrchestrator',
3332
self.mgr: CephadmOrchestrator = mgr
3433
self.cephadm_signed_object_checker = cephadm_signed_obj_checker
3534
self.tlsobject_class = tlsobject_class
36-
all_known_objects_names = [item for sublist in known_objects_names.values() for item in sublist]
37-
self.objects_by_name: Dict[str, Any] = {key: {} for key in all_known_objects_names}
3835
self.service_scoped_objects = known_objects_names[TLSObjectScope.SERVICE]
3936
self.host_scoped_objects = known_objects_names[TLSObjectScope.HOST]
4037
self.global_scoped_objects = known_objects_names[TLSObjectScope.GLOBAL]
4138
self.store_prefix = f'{TLSOBJECT_STORE_PREFIX}{tlsobject_class.STORAGE_PREFIX}.'
39+
# initialize objects by name for the different scopes
40+
self.objects_by_name: Dict[str, Any] = {}
41+
for n in self.service_scoped_objects + self.host_scoped_objects:
42+
self.objects_by_name[n] = {}
43+
for n in self.global_scoped_objects:
44+
self.objects_by_name[n] = self.tlsobject_class()
45+
46+
def _kv_key(self, obj_name: str) -> str:
47+
return self.store_prefix + obj_name
48+
49+
def _set_store(self, obj_name: str, payload: Any) -> None:
50+
self.mgr.set_store(self._kv_key(obj_name), json.dumps(payload))
51+
52+
def ensure_tombstone(self, obj_name: str, scope: TLSObjectScope) -> None:
53+
"""
54+
Idempotently ensure a tombstone (empty) KV entry for obj_name so the
55+
name is rediscovered after manager restarts (see load method).
56+
57+
- SERVICE/HOST → `{}` (empty per-target map)
58+
- GLOBAL → minimal JSON for an empty TLS object
59+
"""
60+
61+
# Do nothing if the TLS object name already exists in the store
62+
if self.mgr.get_store_prefix(self._kv_key(obj_name)):
63+
return
64+
65+
if scope in (TLSObjectScope.SERVICE, TLSObjectScope.HOST):
66+
self._set_store(obj_name, {})
67+
elif scope == TLSObjectScope.GLOBAL:
68+
empty = self.tlsobject_class() # falsy empty instance
69+
self._set_store(obj_name, self.tlsobject_class.to_json(empty))
4270

4371
def register_object_name(self, obj_name: str, scope: TLSObjectScope) -> None:
4472
"""
@@ -50,8 +78,15 @@ def register_object_name(self, obj_name: str, scope: TLSObjectScope) -> None:
5078
ValueError: If an invalid scope is provided.
5179
"""
5280
if obj_name not in self.objects_by_name:
53-
# Initialize an empty dictionary to track TLS objects for this obj_name
54-
self.objects_by_name[obj_name] = {}
81+
# Initialize an empty dictionary/TLSobj to track TLS objects for this obj_name
82+
if scope in (TLSObjectScope.SERVICE, TLSObjectScope.HOST):
83+
self.objects_by_name[obj_name] = {}
84+
elif scope == TLSObjectScope.GLOBAL:
85+
self.objects_by_name[obj_name] = self.tlsobject_class()
86+
else:
87+
raise ValueError(f"Invalid TLSObjectScope '{scope}' for obj_name '{obj_name}'")
88+
# Initialize an empty tombstone for the TLS object(s) in the store
89+
self.ensure_tombstone(obj_name, scope)
5590

5691
# Add to the appropriate scope list
5792
if scope == TLSObjectScope.SERVICE and obj_name not in self.service_scoped_objects:
@@ -105,18 +140,16 @@ def save_tlsobject(self, obj_name: str,
105140
self._validate_tlsobject_name(obj_name, service_name, host)
106141
tlsobject = self.tlsobject_class(tlsobject, user_made, editable)
107142
scope, target = self.get_tlsobject_scope_and_target(obj_name, service_name, host)
108-
j: Union[str, Dict[Any, Any], None] = None
109143
if scope in (TLSObjectScope.SERVICE, TLSObjectScope.HOST):
110144
self.objects_by_name[obj_name][target] = tlsobject
111-
j = {
145+
serialized_targets = {
112146
key: self.tlsobject_class.to_json(self.objects_by_name[obj_name][key])
113147
for key in self.objects_by_name[obj_name]
114148
}
115-
self.mgr.set_store(self.store_prefix + obj_name, json.dumps(j))
149+
self._set_store(obj_name, serialized_targets)
116150
elif scope == TLSObjectScope.GLOBAL:
117151
self.objects_by_name[obj_name] = tlsobject
118-
j = self.tlsobject_class.to_json(tlsobject)
119-
self.mgr.set_store(self.store_prefix + obj_name, json.dumps(j))
152+
self._set_store(obj_name, self.tlsobject_class.to_json(tlsobject))
120153
else:
121154
logger.error(f'Trying to save TLS object name {obj_name} with a not-supported/unknown TLSObjectScope scope {scope.value}')
122155

@@ -155,28 +188,27 @@ def rm_tlsobject(self, obj_name: str, service_name: Optional[str] = None, host:
155188
"""
156189
self._validate_tlsobject_name(obj_name, service_name, host)
157190
scope, target = self.get_tlsobject_scope_and_target(obj_name, service_name, host)
158-
j: Union[str, Dict[Any, Any], None] = None
159191
if scope in (TLSObjectScope.SERVICE, TLSObjectScope.HOST):
160192
if obj_name in self.objects_by_name and target in self.objects_by_name[obj_name]:
161193
del self.objects_by_name[obj_name][target]
162-
j = {
194+
serialized_targets = {
163195
key: self.tlsobject_class.to_json(self.objects_by_name[obj_name][key])
164196
for key in self.objects_by_name[obj_name]
165197
}
166-
self.mgr.set_store(self.store_prefix + obj_name, json.dumps(j))
198+
self._set_store(obj_name, serialized_targets)
167199
return True
168200
elif scope == TLSObjectScope.GLOBAL:
169201
self.objects_by_name[obj_name] = self.tlsobject_class()
170-
j = self.tlsobject_class.to_json(self.objects_by_name[obj_name])
171-
self.mgr.set_store(self.store_prefix + obj_name, json.dumps(j))
202+
serialized_obj = self.tlsobject_class.to_json(self.objects_by_name[obj_name])
203+
self._set_store(obj_name, serialized_obj)
172204
return True
173205
else:
174206
raise TLSObjectException(f'Attempted to remove {self.tlsobject_class.__name__.lower()} for unknown obj_name {obj_name}')
175207
return False
176208

177209
def _validate_tlsobject_name(self, obj_name: str, service_name: Optional[str] = None, host: Optional[str] = None) -> None:
178210
cred_type = self.tlsobject_class.__name__.lower()
179-
if obj_name not in self.objects_by_name.keys():
211+
if obj_name not in self.objects_by_name:
180212
raise TLSObjectException(f'Attempted to access {cred_type} for unknown TLS object name {obj_name}')
181213
if obj_name in self.host_scoped_objects and not host:
182214
raise TLSObjectException(f'Need host to access {cred_type} for TLS object {obj_name}')
@@ -205,39 +237,72 @@ def list_tlsobjects(self) -> List[Tuple[str, TLSObjectProtocol, Optional[str]]]:
205237
return tlsobjects
206238

207239
def load(self) -> None:
240+
241+
def _kv_preview(key: str, raw: str, n: int = 20) -> Tuple[str, int, str]:
242+
# Safe, short, escaped preview for logs
243+
return key, (len(raw) if raw else 0), (raw[:n] if raw else "")
244+
208245
for k, v in self.mgr.get_store_prefix(self.store_prefix).items():
209246
obj_name = k[len(self.store_prefix):]
210247
is_cephadm_signed_object = self.cephadm_signed_object_checker(obj_name)
211248
if not is_cephadm_signed_object and obj_name not in self.objects_by_name:
212-
logger.warning(f"TLSObjectStore: Discarding unknown obj_name '{obj_name}'")
249+
logger.warning("TLSObjectStore: Discarding unknown obj_name %r", obj_name)
213250
continue
214251

215252
try:
216253
tls_object_targets = json.loads(v)
254+
if not isinstance(tls_object_targets, dict):
255+
key_preview, vlen, vstart = _kv_preview(k, v)
256+
logger.error(
257+
"TLSObjectStore: Invalid data structure for object %r. "
258+
"Expected dict but got %s. key=%r, len=%d, startswith=%r",
259+
obj_name, type(tls_object_targets).__name__,
260+
key_preview, vlen, vstart,
261+
)
262+
continue
217263
except json.JSONDecodeError as e:
264+
key_preview, vlen, vstart = _kv_preview(k, v)
218265
logger.warning(
219-
f"TLSObjectStore: Cannot parse JSON for '{obj_name}': "
220-
f"key={k}, len={len(v) if v else 0}, startswith={v[:20]!r}, error={e}"
266+
"TLSObjectStore: Cannot parse JSON for %r: key=%r, len=%d, startswith=%r, error=%r",
267+
obj_name, key_preview, vlen, vstart, e,
221268
)
222269
continue
223270
except Exception as e:
271+
key_preview, vlen, vstart = _kv_preview(k, v)
224272
logger.error(
225-
f"TLSObjectStore: Unexpected error happened while trying to parse JSON for '{obj_name}': "
226-
f"key={k}, len={len(v) if v else 0}, startswith={v[:20]!r}, error={e}"
273+
"TLSObjectStore: Unexpected error while parsing %r: key=%r, len=%d, startswith=%r, error=%r",
274+
obj_name, key_preview, vlen, vstart, e,
275+
exc_info=True, # include traceback for unexpected errors
227276
)
228277
continue
229278

230279
if is_cephadm_signed_object or (obj_name in self.service_scoped_objects) or (obj_name in self.host_scoped_objects):
231280
if is_cephadm_signed_object and obj_name not in self.host_scoped_objects:
232281
self.host_scoped_objects.append(obj_name)
233282
self.objects_by_name[obj_name] = {}
234-
for target in tls_object_targets:
235-
tlsobject = self.tlsobject_class.from_json(tls_object_targets[target])
236-
if tlsobject:
237-
self.objects_by_name[obj_name][target] = tlsobject
283+
for target, payload in tls_object_targets.items():
284+
try:
285+
tlsobject = self.tlsobject_class.from_json(payload)
286+
if tlsobject: # skip tombstones
287+
self.objects_by_name[obj_name][target] = tlsobject
288+
else:
289+
logger.debug("TLSObjectStore: Skipping tombstone for %r (target %r)", obj_name, target)
290+
except Exception as e:
291+
key_preview, vlen, vstart = _kv_preview(k, str(payload))
292+
logger.warning(
293+
"TLSObjectStore: Failed to decode scoped TLS object %r (target %r): %r. "
294+
"key=%r, len=%d, startswith=%r",
295+
obj_name, target, e, key_preview, vlen, vstart
296+
)
238297
elif obj_name in self.global_scoped_objects:
239-
tlsobject = self.tlsobject_class.from_json(tls_object_targets)
240-
if tlsobject:
241-
self.objects_by_name[obj_name] = tlsobject
298+
try:
299+
self.objects_by_name[obj_name] = self.tlsobject_class.from_json(tls_object_targets)
300+
except Exception as e:
301+
key_preview, vlen, vstart = _kv_preview(k, v)
302+
logger.warning(
303+
"TLSObjectStore: Failed to decode global TLS object %r: %r. "
304+
"key=%r, len=%d, startswith=%r",
305+
obj_name, e, key_preview, vlen, vstart
306+
)
242307
else:
243-
logger.error(f"TLSObjectStore: Found a known TLS object name {obj_name} with unknown scope!")
308+
logger.error("TLSObjectStore: Found a known TLS object name %r with unknown scope!", obj_name)

src/pybind/mgr/cephadm/tlsobject_types.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -156,6 +156,6 @@ def from_json(cls, data: Dict[str, str]) -> 'PrivKey':
156156
user_made = parse_bool(data.get('user_made', False))
157157
editable = parse_bool(data.get('editable', False))
158158
except ValueError as e:
159-
raise TLSObjectException(f'Expected field in Cert object to be bool but got another type: {e}')
159+
raise TLSObjectException(f'Expected field in PrivKey object to be bool but got another type: {e}')
160160

161161
return cls(key=key, user_made=user_made, editable=editable)

0 commit comments

Comments
 (0)