Skip to content

Commit a476d4e

Browse files
Merge pull request ceph#65788 from JonBailey1993/stats_mismatch_fix
osd: Fix stats mismatch cluster error seen during scrubbing occasionally
2 parents 73ef7db + b7b8567 commit a476d4e

File tree

2 files changed

+3
-45
lines changed

2 files changed

+3
-45
lines changed

src/osd/PeeringState.cc

Lines changed: 3 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -455,8 +455,6 @@ bool PeeringState::proc_replica_notify(const pg_shard_t &from, const pg_notify_t
455455
psdout(10) << " got osd." << from << " " << oinfo << dendl;
456456
ceph_assert(is_primary());
457457
peer_info[from] = oinfo;
458-
stats_last_update[from] = oinfo.last_update;
459-
460458
update_peer_info(from, oinfo);
461459
might_have_unfound.insert(from);
462460

@@ -1039,7 +1037,6 @@ void PeeringState::clear_primary_state()
10391037
peer_bytes.clear();
10401038
peer_missing.clear();
10411039
peer_last_complete_ondisk.clear();
1042-
stats_last_update.clear();
10431040
peer_activated.clear();
10441041
min_last_complete_ondisk = eversion_t();
10451042
pg_trim_to = eversion_t();
@@ -3362,9 +3359,6 @@ void PeeringState::proc_master_log(
33623359
psdout(10) << "proc_master_log for osd." << from << ": "
33633360
<< olog << " " << omissing << dendl;
33643361
ceph_assert(!is_peered() && is_primary());
3365-
stats_last_update[pg_whoami] = info.last_update;
3366-
psdout(20) << " recording last stats update on " << pg_whoami << ": "
3367-
<< info.last_update << dendl;
33683362

33693363
if (info.partial_writes_last_complete.contains(from.shard)) {
33703364
apply_pwlc(info.partial_writes_last_complete[from.shard], from, oinfo,
@@ -3472,9 +3466,9 @@ void PeeringState::proc_master_log(
34723466
invalidate_stats = true;
34733467
eversion_t previous_version;
34743468
if (p == pg_log.get_log().log.begin()) {
3475-
previous_version = pg_log.get_tail();
3469+
previous_version = pg_log.get_tail();
34763470
} else {
3477-
previous_version = std::prev(p)->version;
3471+
previous_version = std::prev(p)->version;
34783472
}
34793473
rollbacker.get()->partial_write(&info, previous_version, *p);
34803474
olog.head = p->version;
@@ -3487,42 +3481,8 @@ void PeeringState::proc_master_log(
34873481
// make any adjustments to their missing map; we are taking their
34883482
// log to be authoritative (i.e., their entries are by definitely
34893483
// non-divergent).
3490-
3491-
// Find the version we want to roll forwards to
3492-
// Iterate over all shards and see if any have a last_update equal to where we want to roll to
3493-
// Copy the stats for this shard into oinfo
3494-
// Set invalidate_stats to folse again if we do copy these stats
3495-
// Verify that this reintroduces the bug (Which is intended for stage 2)
3496-
3497-
if (invalidate_stats)
3498-
{
3499-
for (const auto& [shard, my_info] : peer_info)
3500-
{
3501-
if (invalidate_stats && stats_last_update[shard] == olog.head)
3502-
{
3503-
oinfo.stats = my_info.stats;
3504-
invalidate_stats = false;
3505-
psdout(10) << "keeping stats for " << shard
3506-
<< " (wanted last update: " << olog.head
3507-
<< ", stats last update: " << stats_last_update[shard]
3508-
<< ", shard last update: " << my_info.last_update << ")."
3509-
<< dendl;
3510-
} else {
3511-
psdout(20) << "not using stats for " << shard
3512-
<< " (wanted last update: " << olog.head
3513-
<< ", stats last update: " << stats_last_update[shard]
3514-
<< ", shard last update: " << my_info.last_update << ")."
3515-
<< dendl;
3516-
}
3517-
}
3518-
}
3519-
35203484
merge_log(t, oinfo, std::move(olog), from);
35213485
info.stats.stats_invalid |= invalidate_stats;
3522-
if (info.stats.stats_invalid)
3523-
{
3524-
psdout(10) << "invalidating stats for " << pg_whoami << dendl;
3525-
}
35263486
peer_info[from] = oinfo;
35273487
psdout(10) << " peer osd." << from << " now " << oinfo
35283488
<< " " << omissing << dendl;
@@ -3543,7 +3503,7 @@ void PeeringState::proc_master_log(
35433503
}
35443504
update_history(oinfo.history);
35453505
ceph_assert(cct->_conf->osd_find_best_info_ignore_history_les ||
3546-
info.last_epoch_started >= info.history.last_epoch_started);
3506+
info.last_epoch_started >= info.history.last_epoch_started);
35473507

35483508
peer_missing[from].claim(std::move(omissing));
35493509
}

src/osd/PeeringState.h

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1509,8 +1509,6 @@ class PeeringState : public MissingLoc::MappingInfo {
15091509
eversion_t last_update_applied; ///< last_update readable
15101510
/// last version to which rollback_info trimming has been applied
15111511
eversion_t last_rollback_info_trimmed_to_applied;
1512-
// last version in which the stats for a shard were updated
1513-
std::map<pg_shard_t,eversion_t> stats_last_update;
15141512

15151513
/// Counter to determine when pending flushes have completed
15161514
unsigned flushes_in_progress = 0;

0 commit comments

Comments
 (0)