Skip to content

Commit afd7739

Browse files
Merge pull request ceph#63398 from NitzanMordhai/wip-nitzan-calc-pool-availability-valgrind-invalid-read
src/mon/MgrStatMonitor: fix invalid iterator increment in calc_pool_availability()
2 parents 47c922c + 7369a4d commit afd7739

File tree

1 file changed

+69
-71
lines changed

1 file changed

+69
-71
lines changed

src/mon/MgrStatMonitor.cc

Lines changed: 69 additions & 71 deletions
Original file line numberDiff line numberDiff line change
@@ -113,88 +113,86 @@ void MgrStatMonitor::calc_pool_availability()
113113
return;
114114
}
115115

116-
// if reset_availability_last_uptime_downtime_val is not utime_t(1, 2),
117-
// update last_uptime and last_downtime for all pools to the
118-
// recorded values
119-
if (reset_availability_last_uptime_downtime_val.has_value()) {
120-
for (const auto& i : pool_availability) {
121-
const auto& poolid = i.first;
122-
pool_availability[poolid].last_downtime = reset_availability_last_uptime_downtime_val.value();
123-
pool_availability[poolid].last_uptime = reset_availability_last_uptime_downtime_val.value();
116+
// Add new pools from digest
117+
for ([[maybe_unused]] const auto& [poolid, _] : digest.pool_pg_unavailable_map) {
118+
if (!pool_availability.contains(poolid)) {
119+
pool_availability.emplace(poolid, PoolAvailability{});
120+
dout(20) << fmt::format("{}: Adding pool: {}",
121+
__func__, poolid) << dendl;
124122
}
125-
dout(20) << __func__ << " reset last_uptime and last_downtime to "
126-
<< reset_availability_last_uptime_downtime_val << dendl;
127-
reset_availability_last_uptime_downtime_val.reset();
128123
}
129124

130-
auto pool_avail_end = pool_availability.end();
131-
for (const auto& i : digest.pool_pg_unavailable_map) {
132-
const auto& poolid = i.first;
133-
if (pool_availability.find(poolid) == pool_avail_end){
134-
// New Pool so we add.
135-
pool_availability.insert({poolid, PoolAvailability()});
136-
dout(20) << __func__ << "Adding pool: " << poolid << dendl;
125+
// Remove unavailable pools
126+
// A pool is removed if it meets either of the following criteria:
127+
// 1. It is not present in the digest's pool_pg_unavailable_map.
128+
// 2. It is not present in the osdmap (checked via mon.osdmon()->osdmap).
129+
std::erase_if(pool_availability, [&](const auto& kv) {
130+
const auto& poolid = kv.first;
131+
132+
if (!digest.pool_pg_unavailable_map.contains(poolid)) {
133+
dout(20) << fmt::format("{}: Deleting pool (not in digest): {}",
134+
__func__, poolid) << dendl;
135+
return true;
137136
}
138-
}
139-
utime_t now(ceph_clock_now());
140-
auto pool_unavail_end = digest.pool_pg_unavailable_map.end();
141-
for (const auto& i : pool_availability) {
142-
const auto& poolid = i.first;
143-
if (digest.pool_pg_unavailable_map.find(poolid) ==
144-
pool_unavail_end) {
145-
// delete none exist pool
146-
pool_availability.erase(poolid);
147-
dout(20) << __func__ << "Deleting pool: " << poolid << dendl;
148-
continue;
149-
}
150-
if (mon.osdmon()->osdmap.have_pg_pool(poolid)){
151-
// Currently, couldn't find an elegant way to get pool name
152-
pool_availability[poolid].pool_name = mon.osdmon()->osdmap.get_pool_name(poolid);
153-
} else {
154-
pool_availability.erase(poolid);
155-
dout(20) << __func__ << "pool: "
156-
<< poolid << " no longer exists in osdmap! Deleting pool: "
157-
<< poolid << dendl;
158-
continue;
137+
if (!mon.osdmon()->osdmap.have_pg_pool(poolid)) {
138+
dout(20) << fmt::format("{}: Deleting pool (not in osdmap): {}",
139+
__func__, poolid) << dendl;
140+
return true;
159141
}
160-
if (pool_availability[poolid].is_avail) {
161-
if (!digest.pool_pg_unavailable_map[poolid].empty()) {
162-
// avail to unavail
163-
dout(20) << __func__
164-
<< ": Pool " << poolid << " status: Available to Unavailable" << dendl;
165-
pool_availability[poolid].is_avail = false;
166-
pool_availability[poolid].num_failures += 1;
167-
pool_availability[poolid].last_downtime = now;
168-
pool_availability[poolid].uptime +=
169-
now - pool_availability[poolid].last_uptime;
142+
return false;
143+
});
144+
145+
// Update pool availability
146+
utime_t now(ceph_clock_now());
147+
for (auto& [poolid, avail] : pool_availability) {
148+
avail.pool_name = mon.osdmon()->osdmap.get_pool_name(poolid);
149+
150+
auto it = digest.pool_pg_unavailable_map.find(poolid);
151+
const bool currently_avail = (it != digest.pool_pg_unavailable_map.end()) &&
152+
it->second.empty();
153+
154+
if (avail.is_avail) {
155+
if (!currently_avail) { // Available → Unavailable
156+
dout(20) << fmt::format("{}: Pool {} status: Available to Unavailable",
157+
__func__, poolid) << dendl;
158+
avail.is_avail = false;
159+
++avail.num_failures;
160+
avail.last_downtime = now;
161+
avail.uptime += now - avail.last_uptime;
170162
} else {
171-
// avail to avail
172-
dout(20) << __func__
173-
<< ": Pool " << poolid << " status: Available to Available" << dendl;
174-
pool_availability[poolid].uptime +=
175-
now - pool_availability[poolid].last_uptime;
176-
pool_availability[poolid].last_uptime = now;
163+
// Available to Available
164+
dout(20) << fmt::format("{}: Pool {} status: Available to Available",
165+
__func__, poolid) << dendl;
166+
avail.uptime += now - avail.last_uptime;
167+
avail.last_uptime = now;
177168
}
178-
} else {
179-
if (!digest.pool_pg_unavailable_map[poolid].empty()) {
180-
// unavail to unavail
181-
dout(20) << __func__
182-
<< ": Pool " << poolid << " status: Unavailable to Unavailable" << dendl;
183-
pool_availability[poolid].downtime +=
184-
now - pool_availability[poolid].last_downtime;
185-
pool_availability[poolid].last_downtime = now;
186-
} else {
187-
// unavail to avail
188-
dout(20) << __func__
189-
<< ": Pool " << poolid << " status: Unavailable to Available" << dendl;
190-
pool_availability[poolid].is_avail = true;
191-
pool_availability[poolid].last_uptime = now;
192-
pool_availability[poolid].uptime +=
193-
now - pool_availability[poolid].last_downtime;
169+
} else { // Unavailable
170+
if (currently_avail) { // Unavailable to Available
171+
dout(20) << fmt::format("{}: Pool {} status: Unavailable to Available",
172+
__func__, poolid) << dendl;
173+
avail.is_avail = true;
174+
avail.last_uptime = now;
175+
avail.uptime += now - avail.last_downtime;
176+
} else { // Unavailable to Unavailable
177+
dout(20) << fmt::format("{}: Pool {} status: Unavailable to Unavailable",
178+
__func__, poolid) << dendl;
179+
avail.downtime += now - avail.last_downtime;
180+
avail.last_downtime = now;
194181
}
195182
}
183+
184+
// if reset_availability_last_uptime_downtime_val is not utime_t(1, 2),
185+
// update last_uptime and last_downtime for all pools to the
186+
// recorded values
187+
if (reset_availability_last_uptime_downtime_val.has_value()) {
188+
dout(20) << fmt::format("{}: Pool {} reset last_uptime and last_downtime to {}",
189+
__func__, poolid, reset_availability_last_uptime_downtime_val.value()) << dendl;
190+
avail.last_downtime = reset_availability_last_uptime_downtime_val.value();
191+
avail.last_uptime = reset_availability_last_uptime_downtime_val.value();
192+
}
196193
}
197194
pending_pool_availability.swap(pool_availability);
195+
reset_availability_last_uptime_downtime_val.reset();
198196
}
199197

200198
void MgrStatMonitor::update_from_paxos(bool *need_bootstrap)

0 commit comments

Comments
 (0)