Skip to content

Commit 85d2633

Browse files
committed
lmgr/cephadm: validate no duplicate groups and group vs. service id for nvmeof
This came as a recommendation from the nvmeof team because 1) users shouldn't have multiple nvmeof services with the same group and 2) some users may take an existing nvmeof service spec and modify the service id to point to another group but forget to update the actual group parameter. This makes it look at a glance like the two services are using different groups when they are in fact not. Usually I'd write a spec migration for this, but if we were to find users had two specs with duplicate groups or specs where the service id doesn't match the group name, I don't think there's a clear path of action to take to remedy the situation. Even just forcing the service id of an nvmeof service to be pool_name.group_name I think could cause a lot of confusion during an upgrade so I'm tentative to do it. Signed-off-by: Adam King <[email protected]>
1 parent 27c9e65 commit 85d2633

File tree

2 files changed

+93
-15
lines changed

2 files changed

+93
-15
lines changed

src/pybind/mgr/cephadm/module.py

Lines changed: 23 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -33,12 +33,15 @@
3333
from ceph.cephadm.images import DefaultImages
3434
from ceph.deployment import inventory
3535
from ceph.deployment.drive_group import DriveGroupSpec
36-
from ceph.deployment.service_spec import \
37-
ServiceSpec, PlacementSpec, \
38-
HostPlacementSpec, IngressSpec, \
39-
TunedProfileSpec, \
40-
MgmtGatewaySpec, \
41-
NvmeofServiceSpec
36+
from ceph.deployment.service_spec import (
37+
ServiceSpec,
38+
PlacementSpec,
39+
HostPlacementSpec,
40+
IngressSpec,
41+
TunedProfileSpec,
42+
MgmtGatewaySpec,
43+
NvmeofServiceSpec,
44+
)
4245
from ceph.utils import str_to_datetime, datetime_to_str, datetime_now
4346
from cephadm.serve import CephadmServe
4447
from cephadm.services.cephadmservice import CephadmDaemonDeploySpec
@@ -3397,13 +3400,24 @@ def _apply_service_spec(self, spec: ServiceSpec) -> str:
33973400
raise OrchestratorError("The 'oauth2-proxy' service depends on the 'mgmt-gateway' service, but it is not configured.")
33983401

33993402
if spec.service_type == 'nvmeof':
3400-
spec = cast(NvmeofServiceSpec, spec)
3401-
assert spec.pool is not None, "Pool cannot be None for nvmeof services"
3403+
nvmeof_spec = cast(NvmeofServiceSpec, spec)
3404+
assert nvmeof_spec.pool is not None, "Pool cannot be None for nvmeof services"
3405+
assert nvmeof_spec.service_id is not None # for mypy
34023406
try:
3403-
self._check_pool_exists(spec.pool, spec.service_name())
3407+
self._check_pool_exists(nvmeof_spec.pool, nvmeof_spec.service_name())
34043408
except OrchestratorError as e:
34053409
self.log.debug(f"{e}")
34063410
raise
3411+
nvmeof_spec = cast(NvmeofServiceSpec, spec)
3412+
assert nvmeof_spec.service_id is not None # for mypy
3413+
if nvmeof_spec.group and not nvmeof_spec.service_id.endswith(nvmeof_spec.group):
3414+
raise OrchestratorError("The 'nvmeof' service id/name must end with '.<nvmeof-group-name>'. Found "
3415+
f"group name '{nvmeof_spec.group}' and service id '{nvmeof_spec.service_id}'")
3416+
for sspec in [s.spec for s in self.spec_store.get_by_service_type('nvmeof')]:
3417+
nspec = cast(NvmeofServiceSpec, sspec)
3418+
if nvmeof_spec.group == nspec.group:
3419+
raise OrchestratorError(f"Cannot create nvmeof service with group {nvmeof_spec.group}. That group is already "
3420+
f"being used by the service {nspec.service_name()}")
34073421

34083422
if spec.placement.count is not None:
34093423
if spec.service_type in ['mon', 'mgr']:

src/pybind/mgr/cephadm/tests/test_services.py

Lines changed: 70 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -339,11 +339,11 @@ def test_nvmeof_dashboard_config(self, mock_resolve_ip):
339339
@patch("cephadm.module.CephadmOrchestrator.get_unique_name")
340340
def test_nvmeof_config(self, _get_name, _run_cephadm, cephadm_module: CephadmOrchestrator):
341341

342-
nvmeof_daemon_id = 'testpool.test.qwert'
343342
pool = 'testpool'
343+
group = 'mygroup'
344+
nvmeof_daemon_id = f'{pool}.{group}.test.qwert'
344345
tgt_cmd_extra_args = '--cpumask=0xFF --msg-mempool-size=524288'
345346
default_port = 5500
346-
group = 'mygroup'
347347
_run_cephadm.side_effect = async_side_effect(('{}', '', 0))
348348
_get_name.return_value = nvmeof_daemon_id
349349

@@ -426,7 +426,7 @@ def test_nvmeof_config(self, _get_name, _run_cephadm, cephadm_module: CephadmOrc
426426
timeout = 1.0\n"""
427427

428428
with with_host(cephadm_module, 'test'):
429-
with with_service(cephadm_module, NvmeofServiceSpec(service_id=pool,
429+
with with_service(cephadm_module, NvmeofServiceSpec(service_id=f'{pool}.{group}',
430430
tgt_cmd_extra_args=tgt_cmd_extra_args,
431431
group=group,
432432
pool=pool)):
@@ -437,14 +437,14 @@ def test_nvmeof_config(self, _get_name, _run_cephadm, cephadm_module: CephadmOrc
437437
[],
438438
stdin=json.dumps({
439439
"fsid": "fsid",
440-
"name": "nvmeof.testpool.test.qwert",
440+
"name": "nvmeof.testpool.mygroup.test.qwert",
441441
"image": "",
442442
"deploy_arguments": [],
443443
"params": {
444444
"tcp_ports": [5500, 4420, 8009, 10008]
445445
},
446446
"meta": {
447-
"service_name": "nvmeof.testpool",
447+
"service_name": "nvmeof.testpool.mygroup",
448448
"ports": [5500, 4420, 8009, 10008],
449449
"ip": None,
450450
"deployed_by": [],
@@ -455,7 +455,7 @@ def test_nvmeof_config(self, _get_name, _run_cephadm, cephadm_module: CephadmOrc
455455
},
456456
"config_blobs": {
457457
"config": "",
458-
"keyring": "[client.nvmeof.testpool.test.qwert]\nkey = None\n",
458+
"keyring": "[client.nvmeof.testpool.mygroup.test.qwert]\nkey = None\n",
459459
"files": {
460460
"ceph-nvmeof.conf": nvmeof_gateway_conf
461461
}
@@ -465,6 +465,70 @@ def test_nvmeof_config(self, _get_name, _run_cephadm, cephadm_module: CephadmOrc
465465
use_current_daemon_image=False,
466466
)
467467

468+
@patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm('{}'))
469+
def test_validate_no_group_duplicate_on_apply(self, cephadm_module: CephadmOrchestrator):
470+
nvmeof_spec_group1 = NvmeofServiceSpec(
471+
service_id='testpool.testgroup',
472+
group='testgroup',
473+
pool='testpool'
474+
)
475+
nvmeof_spec_also_group1 = NvmeofServiceSpec(
476+
service_id='testpool2.testgroup',
477+
group='testgroup',
478+
pool='testpool2'
479+
)
480+
with with_host(cephadm_module, 'test'):
481+
out = cephadm_module._apply_service_spec(nvmeof_spec_group1)
482+
assert out == 'Scheduled nvmeof.testpool.testgroup update...'
483+
nvmeof_specs = cephadm_module.spec_store.get_by_service_type('nvmeof')
484+
assert len(nvmeof_specs) == 1
485+
assert nvmeof_specs[0].spec.service_name() == 'nvmeof.testpool.testgroup'
486+
with pytest.raises(
487+
OrchestratorError,
488+
match='Cannot create nvmeof service with group testgroup. That group is already '
489+
'being used by the service nvmeof.testpool.testgroup'
490+
):
491+
cephadm_module._apply_service_spec(nvmeof_spec_also_group1)
492+
assert len(cephadm_module.spec_store.get_by_service_type('nvmeof')) == 1
493+
494+
@patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm('{}'))
495+
def test_validate_service_id_matches_group_on_apply(self, cephadm_module: CephadmOrchestrator):
496+
matching_nvmeof_spec_group_service_id = NvmeofServiceSpec(
497+
service_id='pool1.right_group',
498+
group='right_group',
499+
pool='pool1'
500+
)
501+
mismatch_nvmeof_spec_group_service_id = NvmeofServiceSpec(
502+
service_id='pool2.wrong_group',
503+
group='right_group',
504+
pool='pool2'
505+
)
506+
matching_nvmeof_spec_group_service_id_with_dot = NvmeofServiceSpec(
507+
service_id='pool3.right.group',
508+
group='right.group',
509+
pool='pool3'
510+
)
511+
mismatch_nvmeof_spec_group_service_id_with_dot = NvmeofServiceSpec(
512+
service_id='pool4.wrong.group',
513+
group='right.group',
514+
pool='pool4'
515+
)
516+
with with_host(cephadm_module, 'test'):
517+
cephadm_module._apply_service_spec(matching_nvmeof_spec_group_service_id)
518+
with pytest.raises(
519+
OrchestratorError,
520+
match='The \'nvmeof\' service id/name must end with \'.<nvmeof-group-name>\'. Found '
521+
'group name \'right_group\' and service id \'pool2.wrong_group\''
522+
):
523+
cephadm_module._apply_service_spec(mismatch_nvmeof_spec_group_service_id)
524+
cephadm_module._apply_service_spec(matching_nvmeof_spec_group_service_id_with_dot)
525+
with pytest.raises(
526+
OrchestratorError,
527+
match='The \'nvmeof\' service id/name must end with \'.<nvmeof-group-name>\'. Found '
528+
'group name \'right.group\' and service id \'pool4.wrong.group\''
529+
):
530+
cephadm_module._apply_service_spec(mismatch_nvmeof_spec_group_service_id_with_dot)
531+
468532

469533
class TestMonitoring:
470534
def _get_config(self, url: str) -> str:

0 commit comments

Comments
 (0)