@@ -331,7 +331,7 @@ void PeeringState::apply_pwlc(const std::pair<eversion_t, eversion_t> pwlc,
331331 // knowledge of partial_writes
332332 const auto & [fromversion, toversion] = pwlc;
333333 if (toversion > info.last_update ) {
334- if (fromversion. version <= info.last_update . version ) {
334+ if (fromversion <= info.last_update ) {
335335 if (info.last_complete == info.last_update ) {
336336 psdout (10 ) << " osd." << shard << " has last_complete"
337337 << " =last_update " << info.last_update
@@ -368,8 +368,9 @@ void PeeringState::update_peer_info(const pg_shard_t &from,
368368{
369369 // Merge pwlc information from another shard into
370370 // info.partial_writes_last_complete keeping the newest
371- // updates
372- if (!oinfo.partial_writes_last_complete .empty ()) {
371+ // updates. Ignore pwlc from nonprimary shards.
372+ if (!oinfo.partial_writes_last_complete .empty ()&&
373+ !pool.info .is_nonprimary_shard (from.shard )) {
373374 bool updated = false ;
374375 // oinfo includes partial_writes_last_complete data.
375376 // Merge this with our copy keeping the most up to date versions
@@ -379,12 +380,15 @@ void PeeringState::update_peer_info(const pg_shard_t &from,
379380 if (info.partial_writes_last_complete .contains (shard)) {
380381 auto & [fromversion, toversion] =
381382 info.partial_writes_last_complete [shard];
382- // Prefer pwlc with a newer toversion, if toversion matches prefer an
383- // older fromversion.
384- if ((ofromversion.epoch > fromversion.epoch ) ||
385- ((ofromversion.epoch == fromversion.epoch ) && (otoversion > toversion)) ||
386- ((ofromversion.epoch == fromversion.epoch ) && (otoversion == toversion) &&
387- (ofromversion.version < fromversion.version ))) {
383+ // Prefer pwlc with a newer epoch, then pwlc with a newer
384+ // toversion, then pwlc with an older fromversion.
385+ bool newer_epoch = (oinfo.partial_writes_last_complete_epoch >
386+ info.partial_writes_last_complete_epoch );
387+ bool same_epoch = (oinfo.partial_writes_last_complete_epoch ==
388+ info.partial_writes_last_complete_epoch );
389+ if (newer_epoch ||
390+ (same_epoch && (otoversion > toversion)) ||
391+ (same_epoch && (otoversion == toversion) && (ofromversion < fromversion))) {
388392 if (!updated) {
389393 updated = true ;
390394 psdout (10 ) << " osd." << from
@@ -408,6 +412,10 @@ void PeeringState::update_peer_info(const pg_shard_t &from,
408412 if (updated) {
409413 psdout (10 ) << " pwlc=" << info.partial_writes_last_complete << dendl;
410414 }
415+ // Update last updated epoch
416+ info.partial_writes_last_complete_epoch = std::max (
417+ info.partial_writes_last_complete_epoch ,
418+ oinfo.partial_writes_last_complete_epoch );
411419 }
412420 // 3 cases:
413421 // 1. This is the primary, from is the shard that sent the oinfo which may
@@ -2756,12 +2764,14 @@ bool PeeringState::search_for_missing(
27562764 tinfo.pgid .shard = pg_whoami.shard ;
27572765 // add partial write from our info
27582766 tinfo.partial_writes_last_complete = info.partial_writes_last_complete ;
2767+ tinfo.partial_writes_last_complete_epoch = info.partial_writes_last_complete_epoch ;
27592768 if (info.partial_writes_last_complete .contains (from.shard )) {
27602769 apply_pwlc (info.partial_writes_last_complete [from.shard ], from, tinfo);
27612770 }
27622771 if (!tinfo.partial_writes_last_complete .empty ()) {
27632772 psdout (20 ) << " sending info to " << from
2764- << " pwlc=" << tinfo.partial_writes_last_complete
2773+ << " pwlc=e" << tinfo.partial_writes_last_complete_epoch
2774+ << " :" << tinfo.partial_writes_last_complete
27652775 << " info=" << tinfo
27662776 << dendl;
27672777 }
@@ -3020,7 +3030,8 @@ void PeeringState::activate(
30203030 << " is up to date, queueing in pending_activators" << dendl;
30213031 if (!info.partial_writes_last_complete .empty ()) {
30223032 psdout (20 ) << " sending info to " << peer
3023- << " pwlc=" << info.partial_writes_last_complete
3033+ << " pwlc=e" << info.partial_writes_last_complete_epoch
3034+ << " :" << info.partial_writes_last_complete
30243035 << " info=" << info
30253036 << dendl;
30263037 }
@@ -3057,6 +3068,7 @@ void PeeringState::activate(
30573068 << " to " << info.last_update ;
30583069
30593070 pi.partial_writes_last_complete = info.partial_writes_last_complete ;
3071+ pi.partial_writes_last_complete_epoch = info.partial_writes_last_complete_epoch ;
30603072 pi.last_update = info.last_update ;
30613073 pi.last_complete = info.last_update ;
30623074 pi.set_last_backfill (hobject_t ());
@@ -3336,12 +3348,10 @@ void PeeringState::consider_rollback_pwlc(eversion_t last_complete)
33363348 psdout (10 ) << " shard " << shard << " pwlc rolled back to "
33373349 << info.partial_writes_last_complete [shard] << dendl;
33383350 }
3339- // Always assign the current epoch to the version number so that
3340- // pwlc adjustments made by the whole proc_master_log process
3341- // are recognized as the newest updates
3342- info.partial_writes_last_complete [shard].first .epoch =
3343- get_osdmap_epoch ();
33443351 }
3352+ // Update the epoch so that pwlc adjustments made by the whole
3353+ // proc_master_log process are recognized as the newest updates
3354+ info.partial_writes_last_complete_epoch = get_osdmap_epoch ();
33453355}
33463356
33473357void PeeringState::proc_master_log (
@@ -3689,6 +3699,7 @@ void PeeringState::split_into(
36893699
36903700 // fix up pwlc - it may refer to log entries that are no longer in the log
36913701 child->info .partial_writes_last_complete = info.partial_writes_last_complete ;
3702+ child->info .partial_writes_last_complete_epoch = info.partial_writes_last_complete_epoch ;
36923703 pg_log.split_pwlc (info);
36933704 child->pg_log .split_pwlc (child->info );
36943705
@@ -3857,9 +3868,10 @@ void PeeringState::merge_from(
38573868 info.partial_writes_last_complete ) {
38583869 auto &&[old_v, new_v] = versionrange;
38593870 old_v = new_v = info.last_update ;
3860- old_v.epoch = get_osdmap_epoch ();
38613871 }
3862- psdout (10 ) << " merged pwlc=" << info.partial_writes_last_complete << dendl;
3872+ info.partial_writes_last_complete_epoch = get_osdmap_epoch ();
3873+ psdout (10 ) << " merged pwlc=e" << info.partial_writes_last_complete_epoch
3874+ << " :" << info.partial_writes_last_complete << dendl;
38633875 }
38643876 }
38653877
@@ -4685,6 +4697,7 @@ void PeeringState::append_log(
46854697 fromversion.version = eversion_t::max ().version ;
46864698 toversion = fromversion;
46874699 }
4700+ info.partial_writes_last_complete_epoch = 0 ;
46884701 }
46894702
46904703 for (auto p = logv.begin (); p != logv.end (); ++p) {
@@ -6949,8 +6962,10 @@ boost::statechart::result PeeringState::ReplicaActive::react(
69496962 i.history .last_epoch_started = evt.activation_epoch ;
69506963 i.history .last_interval_started = i.history .same_interval_since ;
69516964 if (!i.partial_writes_last_complete .empty ()) {
6952- psdout (20 ) << " sending info to " << ps->get_primary () << " pwlc="
6953- << i.partial_writes_last_complete << " info=" << i << dendl;
6965+ psdout (20 ) << " sending info to " << ps->get_primary () << " pwlc=e"
6966+ << i.partial_writes_last_complete_epoch
6967+ << " :" << i.partial_writes_last_complete
6968+ << " info=" << i << dendl;
69546969 }
69556970 rctx.send_info (
69566971 ps->get_primary ().osd ,
@@ -7169,11 +7184,12 @@ boost::statechart::result PeeringState::Stray::react(const MInfoRec& infoevt)
71697184 psdout (20 ) << " info from osd." << infoevt.from
71707185 << " last_update=" << infoevt.info .last_update
71717186 << " last_complete=" << infoevt.info .last_complete
7172- << " pwlc=" << pwlc
7187+ << " pwlc=e" << infoevt.info .partial_writes_last_complete_epoch
7188+ << " :" << pwlc
71737189 << " our last_update=" << ps->info .last_update << dendl;
71747190 // Our last update must be in the range described by partial write
71757191 // last_complete
7176- ceph_assert (ps->info .last_update . version >= pwlc.first . version );
7192+ ceph_assert (ps->info .last_update >= pwlc.first );
71777193 // Last complete must match the partial write last_update
71787194 ceph_assert (pwlc.second == infoevt.info .last_update );
71797195 } else {
0 commit comments