Skip to content

Commit 473eada

Browse files
authored
Merge pull request ceph#54401 from adk3798/osd-rm-stop-reweight
mgr/cephadm: fix reweighting of OSD when OSD removal is stopped Reviewed-by: Michael Fritch <[email protected]>
2 parents 7e49853 + 99fc4a8 commit 473eada

File tree

3 files changed

+35
-11
lines changed

3 files changed

+35
-11
lines changed

src/pybind/mgr/cephadm/module.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3379,8 +3379,7 @@ def stop_remove_osds(self, osd_ids: List[str]) -> str:
33793379
"""
33803380
for osd_id in osd_ids:
33813381
try:
3382-
self.to_remove_osds.rm(OSD(osd_id=int(osd_id),
3383-
remove_util=self.to_remove_osds.rm_util))
3382+
self.to_remove_osds.rm_by_osd_id(int(osd_id))
33843383
except (NotFoundError, KeyError, ValueError):
33853384
return f'Unable to find OSD in the queue: {osd_id}'
33863385

src/pybind/mgr/cephadm/services/osd.py

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -664,6 +664,7 @@ def start(self) -> None:
664664
return None
665665
self.started = True
666666
self.stopped = False
667+
self.original_weight = self.rm_util.get_weight(self)
667668

668669
def start_draining(self) -> bool:
669670
if self.stopped:
@@ -672,7 +673,6 @@ def start_draining(self) -> bool:
672673
if self.replace:
673674
self.rm_util.set_osd_flag([self], 'out')
674675
else:
675-
self.original_weight = self.rm_util.get_weight(self)
676676
self.rm_util.reweight_osd(self, 0.0)
677677
self.drain_started_at = datetime.utcnow()
678678
self.draining = True
@@ -761,6 +761,7 @@ def to_json(self) -> dict:
761761
out['force'] = self.force
762762
out['zap'] = self.zap
763763
out['hostname'] = self.hostname # type: ignore
764+
out['original_weight'] = self.original_weight
764765

765766
for k in ['drain_started_at', 'drain_stopped_at', 'drain_done_at', 'process_started_at']:
766767
if getattr(self, k):
@@ -953,6 +954,16 @@ def enqueue(self, osd: "OSD") -> None:
953954
self.osds.add(osd)
954955
osd.start()
955956

957+
def rm_by_osd_id(self, osd_id: int) -> None:
958+
osd: Optional["OSD"] = None
959+
for o in self.osds:
960+
if o.osd_id == osd_id:
961+
osd = o
962+
if not osd:
963+
logger.debug(f"Could not find osd with id {osd_id} in queue.")
964+
raise KeyError(f'No osd with id {osd_id} in removal queue')
965+
self.rm(osd)
966+
956967
def rm(self, osd: "OSD") -> None:
957968
if not osd.exists:
958969
raise NotFoundError()

src/pybind/mgr/cephadm/tests/test_cephadm.py

Lines changed: 22 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1287,7 +1287,11 @@ def test_raw_driveselection_to_ceph_volume(self, cephadm_module, devices, previe
12871287
))
12881288
@mock.patch("cephadm.services.osd.OSD.exists", True)
12891289
@mock.patch("cephadm.services.osd.RemoveUtil.get_pg_count", lambda _, __: 0)
1290-
def test_remove_osds(self, cephadm_module):
1290+
@mock.patch("cephadm.services.osd.RemoveUtil.get_weight")
1291+
@mock.patch("cephadm.services.osd.RemoveUtil.reweight_osd")
1292+
def test_remove_osds(self, _reweight_osd, _get_weight, cephadm_module):
1293+
osd_initial_weight = 2.1
1294+
_get_weight.return_value = osd_initial_weight
12911295
with with_host(cephadm_module, 'test'):
12921296
CephadmServe(cephadm_module)._refresh_host_daemons('test')
12931297
c = cephadm_module.list_daemons()
@@ -1297,13 +1301,23 @@ def test_remove_osds(self, cephadm_module):
12971301
out = wait(cephadm_module, c)
12981302
assert out == ["Removed osd.0 from host 'test'"]
12991303

1300-
cephadm_module.to_remove_osds.enqueue(OSD(osd_id=0,
1301-
replace=False,
1302-
force=False,
1303-
hostname='test',
1304-
process_started_at=datetime_now(),
1305-
remove_util=cephadm_module.to_remove_osds.rm_util
1306-
))
1304+
osd_0 = OSD(osd_id=0,
1305+
replace=False,
1306+
force=False,
1307+
hostname='test',
1308+
process_started_at=datetime_now(),
1309+
remove_util=cephadm_module.to_remove_osds.rm_util
1310+
)
1311+
1312+
cephadm_module.to_remove_osds.enqueue(osd_0)
1313+
_get_weight.assert_called()
1314+
1315+
# test that OSD is properly reweighted on removal
1316+
cephadm_module.stop_remove_osds([0])
1317+
_reweight_osd.assert_called_with(mock.ANY, osd_initial_weight)
1318+
1319+
# add OSD back to queue and test normal removal queue processing
1320+
cephadm_module.to_remove_osds.enqueue(osd_0)
13071321
cephadm_module.to_remove_osds.process_removal_queue()
13081322
assert cephadm_module.to_remove_osds == OSDRemovalQueue(cephadm_module)
13091323

0 commit comments

Comments
 (0)