Skip to content

Commit 3cb404b

Browse files
authored
Merge pull request ceph#54999 from Matan-B/wip-matanb-mon-osd-epochs
mon/OSDMonitor: fix get_min_last_epoch_clean() Reviewed-by: Samuel Just <[email protected]>
2 parents e7ecafc + 685047b commit 3cb404b

File tree

2 files changed

+33
-14
lines changed

2 files changed

+33
-14
lines changed

src/mon/OSDMonitor.cc

Lines changed: 14 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -395,7 +395,7 @@ void LastEpochClean::report(unsigned pg_num, const pg_t& pg,
395395
return lec.report(pg_num, pg.ps(), last_epoch_clean);
396396
}
397397

398-
epoch_t LastEpochClean::get_lower_bound(const OSDMap& latest) const
398+
epoch_t LastEpochClean::get_lower_bound_by_pool(const OSDMap& latest) const
399399
{
400400
auto floor = latest.get_epoch();
401401
for (auto& pool : latest.get_pools()) {
@@ -906,12 +906,7 @@ void OSDMonitor::update_from_paxos(bool *need_bootstrap)
906906
if (state & CEPH_OSD_UP) {
907907
// could be marked up *or* down, but we're too lazy to check which
908908
last_osd_report.erase(osd);
909-
}
910-
}
911-
for (auto [osd, weight] : inc.new_weight) {
912-
if (weight == CEPH_OSD_OUT) {
913-
// manually marked out, so drop it
914-
osd_epochs.erase(osd);
909+
osd_epochs.erase(osd);
915910
}
916911
}
917912
}
@@ -2285,13 +2280,21 @@ version_t OSDMonitor::get_trim_to() const
22852280
return 0;
22862281
}
22872282

2283+
/* There are two constraints on trimming:
2284+
* 1. we must not trim past the last_epoch_clean for any pg
2285+
* 2. we must not trim past the last reported epoch for any up
2286+
* osds.
2287+
*
2288+
* LastEpochClean::get_lower_bound_by_pool gives a value <= constraint 1.
2289+
* For constraint 2, we take the min over osd_epochs, which is populated with
2290+
* MOSDBeacon::version, see OSDMonitor::prepare_beacon
2291+
*/
22882292
epoch_t OSDMonitor::get_min_last_epoch_clean() const
22892293
{
2290-
auto floor = last_epoch_clean.get_lower_bound(osdmap);
2291-
// also scan osd epochs
2292-
// don't trim past the oldest reported osd epoch
2294+
auto floor = last_epoch_clean.get_lower_bound_by_pool(osdmap);
22932295
for (auto [osd, epoch] : osd_epochs) {
22942296
if (epoch < floor) {
2297+
ceph_assert(osdmap.is_up(osd));
22952298
floor = epoch;
22962299
}
22972300
}
@@ -4399,8 +4402,8 @@ bool OSDMonitor::prepare_beacon(MonOpRequestRef op)
43994402

44004403
last_osd_report[from].first = ceph_clock_now();
44014404
last_osd_report[from].second = beacon->osd_beacon_report_interval;
4405+
ceph_assert(osdmap.is_up(from));
44024406
osd_epochs[from] = beacon->version;
4403-
44044407
for (const auto& pg : beacon->pgs) {
44054408
if (auto* pool = osdmap.get_pg_pool(pg.pool()); pool != nullptr) {
44064409
unsigned pg_num = pool->get_pg_num();

src/mon/OSDMonitor.h

Lines changed: 19 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -114,7 +114,13 @@ class LastEpochClean {
114114
public:
115115
void report(unsigned pg_num, const pg_t& pg, epoch_t last_epoch_clean);
116116
void remove_pool(uint64_t pool);
117-
epoch_t get_lower_bound(const OSDMap& latest) const;
117+
/**
118+
* get_lower_bound_by_pool
119+
*
120+
* Returns epoch e such that e <= pg.last_epoch_clean for all pgs in cluster.
121+
* May return 0 if any pool does not have comprehensive values for all pgs.
122+
*/
123+
epoch_t get_lower_bound_by_pool(const OSDMap& latest) const;
118124

119125
void dump(Formatter *f) const;
120126
};
@@ -639,8 +645,18 @@ class OSDMonitor : public PaxosService,
639645

640646
// when we last received PG stats from each osd and the osd's osd_beacon_report_interval
641647
std::map<int, std::pair<utime_t, int>> last_osd_report;
642-
// TODO: use last_osd_report to store the osd report epochs, once we don't
643-
// need to upgrade from pre-luminous releases.
648+
/**
649+
* osd_epochs
650+
*
651+
* Records the MOSDBeacon::version (the osd epoch at which the OSD sent the
652+
* beacon) of the most recent beacon recevied from each currently up OSD.
653+
* Used in OSDMonitor::get_min_last_epoch_clean().
654+
* Down osds are trimmed upon commit of each map
655+
* (OSDMonitor::update_from_paxos).
656+
*
657+
* TODO: use last_osd_report to store the osd report epochs, once we don't
658+
* need to upgrade from pre-luminous releases.
659+
*/
644660
std::map<int,epoch_t> osd_epochs;
645661
LastEpochClean last_epoch_clean;
646662
bool preprocess_beacon(MonOpRequestRef op);

0 commit comments

Comments
 (0)