Skip to content

Commit b5cad26

Browse files
JonBailey1993aainscow
authored andcommitted
osd: Reduce the amount of status invalidations when rolling shards forwards during peering
Currently stats invalidations happen during peering when rolling forward shards. We can reduce this so we only invalidate the stats when we don't have any other shards at the version we want to roll the stats forwards to. In the cases where we have a shard with the stats at the correct version, we use those stats instead of invalidating. If we do not have any shards with the correct version of stats, we do the invalidate as before. Signed-off-by: Jon Bailey <[email protected]>
1 parent ac4e092 commit b5cad26

File tree

2 files changed

+45
-3
lines changed

2 files changed

+45
-3
lines changed

src/osd/PeeringState.cc

Lines changed: 43 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -455,6 +455,8 @@ bool PeeringState::proc_replica_notify(const pg_shard_t &from, const pg_notify_t
455455
psdout(10) << " got osd." << from << " " << oinfo << dendl;
456456
ceph_assert(is_primary());
457457
peer_info[from] = oinfo;
458+
stats_last_update[from] = oinfo.last_update;
459+
458460
update_peer_info(from, oinfo);
459461
might_have_unfound.insert(from);
460462

@@ -1037,6 +1039,7 @@ void PeeringState::clear_primary_state()
10371039
peer_bytes.clear();
10381040
peer_missing.clear();
10391041
peer_last_complete_ondisk.clear();
1042+
stats_last_update.clear();
10401043
peer_activated.clear();
10411044
min_last_complete_ondisk = eversion_t();
10421045
pg_trim_to = eversion_t();
@@ -3359,6 +3362,9 @@ void PeeringState::proc_master_log(
33593362
psdout(10) << "proc_master_log for osd." << from << ": "
33603363
<< olog << " " << omissing << dendl;
33613364
ceph_assert(!is_peered() && is_primary());
3365+
stats_last_update[pg_whoami] = info.last_update;
3366+
psdout(20) << " recording last stats update on " << pg_whoami << ": "
3367+
<< info.last_update << dendl;
33623368

33633369
if (info.partial_writes_last_complete.contains(from.shard)) {
33643370
apply_pwlc(info.partial_writes_last_complete[from.shard], from, oinfo,
@@ -3466,9 +3472,9 @@ void PeeringState::proc_master_log(
34663472
invalidate_stats = true;
34673473
eversion_t previous_version;
34683474
if (p == pg_log.get_log().log.begin()) {
3469-
previous_version = pg_log.get_tail();
3475+
previous_version = pg_log.get_tail();
34703476
} else {
3471-
previous_version = std::prev(p)->version;
3477+
previous_version = std::prev(p)->version;
34723478
}
34733479
rollbacker.get()->partial_write(&info, previous_version, *p);
34743480
olog.head = p->version;
@@ -3481,8 +3487,42 @@ void PeeringState::proc_master_log(
34813487
// make any adjustments to their missing map; we are taking their
34823488
// log to be authoritative (i.e., their entries are by definitely
34833489
// non-divergent).
3490+
3491+
// Find the version we want to roll forwards to
3492+
// Iterate over all shards and see if any have a last_update equal to where we want to roll to
3493+
// Copy the stats for this shard into oinfo
3494+
// Set invalidate_stats to folse again if we do copy these stats
3495+
// Verify that this reintroduces the bug (Which is intended for stage 2)
3496+
3497+
if (invalidate_stats)
3498+
{
3499+
for (const auto& [shard, my_info] : peer_info)
3500+
{
3501+
if (invalidate_stats && stats_last_update[shard] == olog.head)
3502+
{
3503+
oinfo.stats = my_info.stats;
3504+
invalidate_stats = false;
3505+
psdout(10) << "keeping stats for " << shard
3506+
<< " (wanted last update: " << olog.head
3507+
<< ", stats last update: " << stats_last_update[shard]
3508+
<< ", shard last update: " << my_info.last_update << ")."
3509+
<< dendl;
3510+
} else {
3511+
psdout(20) << "not using stats for " << shard
3512+
<< " (wanted last update: " << olog.head
3513+
<< ", stats last update: " << stats_last_update[shard]
3514+
<< ", shard last update: " << my_info.last_update << ")."
3515+
<< dendl;
3516+
}
3517+
}
3518+
}
3519+
34843520
merge_log(t, oinfo, std::move(olog), from);
34853521
info.stats.stats_invalid |= invalidate_stats;
3522+
if (info.stats.stats_invalid)
3523+
{
3524+
psdout(10) << "invalidating stats for " << pg_whoami << dendl;
3525+
}
34863526
peer_info[from] = oinfo;
34873527
psdout(10) << " peer osd." << from << " now " << oinfo
34883528
<< " " << omissing << dendl;
@@ -3503,7 +3543,7 @@ void PeeringState::proc_master_log(
35033543
}
35043544
update_history(oinfo.history);
35053545
ceph_assert(cct->_conf->osd_find_best_info_ignore_history_les ||
3506-
info.last_epoch_started >= info.history.last_epoch_started);
3546+
info.last_epoch_started >= info.history.last_epoch_started);
35073547

35083548
peer_missing[from].claim(std::move(omissing));
35093549
}

src/osd/PeeringState.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1509,6 +1509,8 @@ class PeeringState : public MissingLoc::MappingInfo {
15091509
eversion_t last_update_applied; ///< last_update readable
15101510
/// last version to which rollback_info trimming has been applied
15111511
eversion_t last_rollback_info_trimmed_to_applied;
1512+
// last version in which the stats for a shard were updated
1513+
std::map<pg_shard_t,eversion_t> stats_last_update;
15121514

15131515
/// Counter to determine when pending flushes have completed
15141516
unsigned flushes_in_progress = 0;

0 commit comments

Comments
 (0)