Skip to content

Commit d2c4e63

Browse files
committed
cephadm: Provide user friendly error message if osd device path is invalid
Signed-off-by: Kushal Deb <[email protected]>
1 parent fa05bc4 commit d2c4e63

File tree

5 files changed

+61
-7
lines changed

5 files changed

+61
-7
lines changed

qa/tasks/cephadm.py

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1117,6 +1117,7 @@ def ceph_osds(ctx, config):
11171117

11181118
cur = 0
11191119
raw = config.get('raw-osds', False)
1120+
use_skip_validation = True
11201121
for osd_id in sorted(id_to_remote.keys()):
11211122
if raw:
11221123
raise ConfigError(
@@ -1151,7 +1152,16 @@ def ceph_osds(ctx, config):
11511152
osd_method = config.get('osd_method')
11521153
if osd_method:
11531154
add_osd_args.append(osd_method)
1154-
_shell(ctx, cluster_name, remote, add_osd_args)
1155+
if use_skip_validation:
1156+
try:
1157+
_shell(ctx, cluster_name, remote, add_osd_args + ['--skip-validation'])
1158+
except Exception as e:
1159+
log.warning(f"--skip-validation falied with error {e}. Retrying without it")
1160+
use_skip_validation = False
1161+
_shell(ctx, cluster_name, remote, add_osd_args)
1162+
else:
1163+
_shell(ctx, cluster_name, remote, add_osd_args)
1164+
11551165
ctx.daemons.register_daemon(
11561166
remote, 'osd', id_,
11571167
cluster=cluster_name,

src/pybind/mgr/cephadm/module.py

Lines changed: 44 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2979,8 +2979,45 @@ def create_osd_default_spec(self, drive_group: DriveGroupSpec) -> None:
29792979
self.spec_store.save(osd_default_spec)
29802980
self.apply([osd_default_spec])
29812981

2982+
def validate_device(self, host_name: str, drive_group: DriveGroupSpec) -> str:
2983+
"""
2984+
Validates whether the specified device exists and is available for OSD creation.
2985+
Returns:
2986+
str: An error message if validation fails; an empty string if validation passes.
2987+
"""
2988+
try:
2989+
2990+
if not drive_group.data_devices or not drive_group.data_devices.paths:
2991+
return "Error: No data devices specified."
2992+
2993+
if self.cache.is_host_unreachable(host_name):
2994+
return f"Host {host_name} is not reachable (it may be offline or in maintenance mode)."
2995+
2996+
host_cache = self.cache.devices.get(host_name, [])
2997+
if not host_cache:
2998+
return (f"Error: No devices found for host {host_name}. "
2999+
"You can check known devices with 'ceph orch device ls'. "
3000+
"If no devices appear, wait for an automatic refresh.")
3001+
3002+
available_devices = {
3003+
dev.path: dev for dev in host_cache if dev.available
3004+
}
3005+
self.log.debug(f"Host {host_name} has {len(available_devices)} available devices.")
3006+
3007+
for device in drive_group.data_devices.paths:
3008+
matching_device = next((dev for dev in host_cache if dev.path == device.path), None)
3009+
if not matching_device:
3010+
return f"Error: Device {device.path} is not found on host {host_name}"
3011+
if not matching_device.available:
3012+
return (f"Error: Device {device.path} is present but unavailable for OSD creation. "
3013+
f"Reason: {', '.join(matching_device.rejected_reasons) if matching_device.rejected_reasons else 'Unknown'}")
3014+
3015+
return ""
3016+
except AttributeError as e:
3017+
return f"Error- Attribute issue: {e}"
3018+
29823019
@handle_orch_error
2983-
def create_osds(self, drive_group: DriveGroupSpec) -> str:
3020+
def create_osds(self, drive_group: DriveGroupSpec, skip_validation: bool = False) -> str:
29843021
hosts: List[HostSpec] = self.inventory.all_specs()
29853022
filtered_hosts: List[str] = drive_group.placement.filter_matching_hostspecs(hosts)
29863023
if not filtered_hosts:
@@ -2994,6 +3031,12 @@ def create_osds(self, drive_group: DriveGroupSpec) -> str:
29943031
self.create_osd_default_spec(drive_group)
29953032
else:
29963033
self.log.info("osd.default already exists.")
3034+
host_name = filtered_hosts[0]
3035+
if not skip_validation:
3036+
self.log.warning("Skipping the validation of device paths for osd daemon add command. Please make sure that the osd path is valid")
3037+
err_msg = self.validate_device(host_name, drive_group)
3038+
if err_msg:
3039+
return err_msg
29973040

29983041
return self.osd_service.create_from_spec(drive_group)
29993042

src/pybind/mgr/cephadm/tests/test_cephadm.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1208,7 +1208,7 @@ def test_create_osds(self, cephadm_module):
12081208
data_devices=DeviceSelection(paths=['']))
12091209
c = cephadm_module.create_osds(dg)
12101210
out = wait(cephadm_module, c)
1211-
assert out == "Created no osd(s) on host test; already created?"
1211+
assert "Error: No devices found for host test." in out
12121212
bad_dg = DriveGroupSpec(placement=PlacementSpec(host_pattern='invalid_host'),
12131213
data_devices=DeviceSelection(paths=['']))
12141214
c = cephadm_module.create_osds(bad_dg)
@@ -1222,7 +1222,7 @@ def test_create_noncollocated_osd(self, cephadm_module):
12221222
data_devices=DeviceSelection(paths=['']))
12231223
c = cephadm_module.create_osds(dg)
12241224
out = wait(cephadm_module, c)
1225-
assert out == "Created no osd(s) on host test; already created?"
1225+
assert "Error: No devices found for host test." in out
12261226

12271227
@mock.patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm('{}'))
12281228
@mock.patch('cephadm.services.osd.OSDService._run_ceph_volume_command')

src/pybind/mgr/orchestrator/_interface.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -737,7 +737,7 @@ def daemon_action(self, action: str, daemon_name: str, image: Optional[str] = No
737737
# assert action in ["start", "stop", "reload, "restart", "redeploy"]
738738
raise NotImplementedError()
739739

740-
def create_osds(self, drive_group: DriveGroupSpec) -> OrchResult[str]:
740+
def create_osds(self, drive_group: DriveGroupSpec, skip_validation: bool = False) -> OrchResult[str]:
741741
"""
742742
Create one or more OSDs within a single Drive Group.
743743

src/pybind/mgr/orchestrator/module.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1515,7 +1515,8 @@ def _apply_osd(self,
15151515
@_cli_write_command('orch daemon add osd')
15161516
def _daemon_add_osd(self,
15171517
svc_arg: Optional[str] = None,
1518-
method: Optional[OSDMethod] = None) -> HandleCommandResult:
1518+
method: Optional[OSDMethod] = None,
1519+
skip_validation: bool = False) -> HandleCommandResult:
15191520
"""Create OSD daemon(s) on specified host and device(s) (e.g., ceph orch daemon add osd myhost:/dev/sdb)"""
15201521
# Create one or more OSDs"""
15211522

@@ -1570,7 +1571,7 @@ def _daemon_add_osd(self,
15701571
msg = f"Invalid 'host:device' spec: '{svc_arg}': {e}" + usage
15711572
return HandleCommandResult(-errno.EINVAL, stderr=msg)
15721573

1573-
completion = self.create_osds(drive_group)
1574+
completion = self.create_osds(drive_group, skip_validation)
15741575
raise_if_exception(completion)
15751576
return HandleCommandResult(stdout=completion.result_str())
15761577

0 commit comments

Comments
 (0)