Skip to content

Commit bda005b

Browse files
Merge pull request ceph#65131 from kamoltat/wip-ksirivad-fix-72647
mgr/progress: compare up set instead of acting set
2 parents 091d516 + d13d739 commit bda005b

File tree

1 file changed

+33
-38
lines changed

1 file changed

+33
-38
lines changed

src/pybind/mgr/progress/module.py

Lines changed: 33 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -210,11 +210,14 @@ def global_event_update_progress(self, log):
210210
# possible that some pgs might not have any movement
211211
# even before the start of the event.
212212
if pg['reported_epoch'] < self._start_epoch:
213-
log.debug("Skipping pg {0} since reported_epoch {1} < start_epoch {2}"
214-
.format(pg['pgid'], pg['reported_epoch'], self._start_epoch))
215213
skipped_pgs += 1
216214
continue
217215

216+
# Log skipped PGs
217+
if skipped_pgs > 0:
218+
log.debug("Skipped {0} PGs with reported_epoch < start_epoch {1}"
219+
.format(skipped_pgs, self._start_epoch))
220+
218221
if self._active_clean_num != new_active_clean_num:
219222
# Have this case to know when need to update
220223
# the progress
@@ -492,57 +495,39 @@ def config_notify(self):
492495
self.get_module_option(opt['name']))
493496
self.log.debug(' %s = %s', opt['name'], getattr(self, opt['name']))
494497

495-
def _osd_in_out(self, old_map, old_dump, new_map, osd_id, marked):
496-
# type: (OSDMap, Dict, OSDMap, str, str) -> None
498+
def _osd_in_out(self, old_map: OSDMap, old_dump: Dict,
499+
new_map: OSDMap, osd_id: str, marked: str) -> None:
497500
# A function that will create or complete an event when an
498501
# OSD is marked in or out according to the affected PGs
499502
affected_pgs = []
500503
for pool in old_dump['pools']:
501504
pool_id = pool['pool'] # type: str
502505
for ps in range(0, pool['pg_num']):
503506

504-
# Was this OSD affected by the OSD coming in/out?
505-
# Compare old and new osds using
506-
# data from the json dump
507+
# Was this pg affected by the OSD coming in/out?
508+
# Compare old and new OSDs using
509+
# data from the old/new acting and up sets.
507510
old_up_acting = old_map.pg_to_up_acting_osds(pool['pool'], ps)
508-
old_osds = set(old_up_acting['acting'])
511+
old_acting_osds = old_up_acting['acting']
512+
old_up_set_osds = old_up_acting['up']
509513
new_up_acting = new_map.pg_to_up_acting_osds(pool['pool'], ps)
510-
new_osds = set(new_up_acting['acting'])
514+
new_acting_osds = new_up_acting['acting']
515+
new_up_set_osds = new_up_acting['up']
511516

512-
# Check the osd_id being in the acting set for both old
513-
# and new maps to cover both out and in cases
514-
was_on_out_or_in_osd = osd_id in old_osds or osd_id in new_osds
515-
if not was_on_out_or_in_osd:
517+
# Check if this PG involves the OSD that was marked in/out
518+
osd_affected = osd_id in old_up_set_osds or osd_id in new_up_set_osds
519+
if not osd_affected:
516520
continue
517-
518-
self.log.debug("pool_id, ps = {0}, {1}".format(
519-
pool_id, ps
520-
))
521-
522-
self.log.debug(
523-
"old_up_acting: {0}".format(json.dumps(old_up_acting, indent=4, sort_keys=True)))
524-
525-
# Has this OSD been assigned a new location?
526-
# (it might not be if there is no suitable place to move
527-
# after an OSD is marked in/out)
528521

529-
is_relocated = old_osds != new_osds
530-
531-
self.log.debug(
532-
"new_up_acting: {0}".format(json.dumps(new_up_acting,
533-
indent=4,
534-
sort_keys=True)))
522+
up_set_changed = old_up_set_osds != new_up_set_osds
535523

536-
if was_on_out_or_in_osd and is_relocated:
524+
if osd_affected and up_set_changed:
525+
self.log.debug("PG %s.%x: acting %s->%s up %s->%s (relocated=%s)",
526+
pool_id, ps, old_acting_osds, new_acting_osds, old_up_set_osds,
527+
new_up_set_osds, up_set_changed)
537528
# This PG is now in motion, track its progress
538529
affected_pgs.append(PgId(pool_id, ps))
539530

540-
# In the case that we ignored some PGs, log the reason why (we may
541-
# not end up creating a progress event)
542-
543-
self.log.warning("{0} PGs affected by osd.{1} being marked {2}".format(
544-
len(affected_pgs), osd_id, marked))
545-
546531
# In the case of the osd coming back in, we might need to cancel
547532
# previous recovery event for that osd
548533
if marked == "in":
@@ -556,8 +541,17 @@ def _osd_in_out(self, old_map, old_dump, new_map, osd_id, marked):
556541
self._complete(ev)
557542
except KeyError:
558543
self.log.warning("_osd_in_out: ev {0} does not exist".format(ev_id))
544+
545+
# In the case that we ignored some PGs, log the reason why (we may
546+
# not end up creating a progress event)
547+
548+
if (len(affected_pgs) == 0):
549+
self.log.warning("No PGs affected by osd.{0} being marked {1}, no recovery event created".format(
550+
osd_id, marked))
551+
else:
552+
self.log.warning("{0} PGs affected by osd.{1} being marked {2}".format(
553+
len(affected_pgs), osd_id, marked))
559554

560-
if len(affected_pgs) > 0:
561555
r_ev = PgRecoveryEvent(
562556
"Rebalancing after osd.{0} marked {1}".format(osd_id, marked),
563557
refs=[("osd", osd_id)],
@@ -580,6 +574,7 @@ def _osdmap_changed(self, old_osdmap, new_osdmap):
580574
osd_id = osd['osd']
581575
new_weight = osd['in']
582576
if osd_id in old_osds:
577+
self.log.debug("Processing osd.{0}: {1} -> {2}".format(osd_id, old_osds[osd_id]['weight'], osd['weight']))
583578
old_weight = old_osds[osd_id]['in']
584579

585580
if new_weight == 0.0 and old_weight > new_weight:

0 commit comments

Comments
 (0)