@@ -1605,43 +1605,66 @@ void PeeringState::reject_reservation()
16051605}
16061606
16071607/* *
1608- * find_best_info
1608+ * calculate_maxlesf_and_minlua
16091609 *
1610- * Returns an iterator to the best info in infos sorted by:
1611- * 1) Prefer newer last_update
1612- * 2) Prefer longer tail if it brings another info into contiguity
1613- * 3) Prefer current primary
1610+ * Calculate max_last_epoch_started and
1611+ * min_last_update_acceptable
16141612 */
1615- map<pg_shard_t , pg_info_t >::const_iterator PeeringState::find_best_info (
1616- const map<pg_shard_t , pg_info_t > &infos,
1617- bool restrict_to_up_acting,
1618- bool exclude_nonprimary_shards,
1619- bool *history_les_bound) const
1613+ void PeeringState::calculate_maxles_and_minlua ( const map<pg_shard_t , pg_info_t > &infos,
1614+ epoch_t & max_last_epoch_started,
1615+ eversion_t & min_last_update_acceptable,
1616+ bool *history_les_bound) const
16201617{
1621- ceph_assert (history_les_bound);
16221618 /* See doc/dev/osd_internals/last_epoch_started.rst before attempting
16231619 * to make changes to this process. Also, make sure to update it
16241620 * when you find bugs! */
1625- epoch_t max_last_epoch_started_found = 0 ;
1621+ max_last_epoch_started = 0 ;
16261622 for (auto i = infos.begin (); i != infos.end (); ++i) {
16271623 if (!cct->_conf ->osd_find_best_info_ignore_history_les &&
1628- max_last_epoch_started_found < i->second .history .last_epoch_started ) {
1629- *history_les_bound = true ;
1630- max_last_epoch_started_found = i->second .history .last_epoch_started ;
1624+ max_last_epoch_started < i->second .history .last_epoch_started ) {
1625+ if (history_les_bound) {
1626+ *history_les_bound = true ;
1627+ }
1628+ max_last_epoch_started = i->second .history .last_epoch_started ;
16311629 }
16321630 if (!i->second .is_incomplete () &&
1633- max_last_epoch_started_found < i->second .last_epoch_started ) {
1634- *history_les_bound = false ;
1635- max_last_epoch_started_found = i->second .last_epoch_started ;
1631+ max_last_epoch_started < i->second .last_epoch_started ) {
1632+ if (history_les_bound) {
1633+ *history_les_bound = false ;
1634+ }
1635+ max_last_epoch_started = i->second .last_epoch_started ;
16361636 }
16371637 }
1638- eversion_t min_last_update_acceptable = eversion_t::max ();
1638+ min_last_update_acceptable = eversion_t::max ();
16391639 for (auto i = infos.begin (); i != infos.end (); ++i) {
1640- if (max_last_epoch_started_found <= i->second .last_epoch_started ) {
1640+ if (max_last_epoch_started <= i->second .last_epoch_started ) {
16411641 if (min_last_update_acceptable > i->second .last_update )
16421642 min_last_update_acceptable = i->second .last_update ;
16431643 }
16441644 }
1645+ }
1646+
1647+ /* *
1648+ * find_best_info
1649+ *
1650+ * Returns an iterator to the best info in infos sorted by:
1651+ * 1) Prefer newer last_update
1652+ * 2) Prefer longer tail if it brings another info into contiguity
1653+ * 3) Prefer current primary
1654+ */
1655+ map<pg_shard_t , pg_info_t >::const_iterator PeeringState::find_best_info (
1656+ const map<pg_shard_t , pg_info_t > &infos,
1657+ bool restrict_to_up_acting,
1658+ bool exclude_nonprimary_shards,
1659+ bool *history_les_bound) const
1660+ {
1661+ epoch_t max_last_epoch_started;
1662+ eversion_t min_last_update_acceptable;
1663+ calculate_maxles_and_minlua ( infos,
1664+ max_last_epoch_started,
1665+ min_last_update_acceptable,
1666+ history_les_bound);
1667+
16451668 if (min_last_update_acceptable == eversion_t::max ())
16461669 return infos.end ();
16471670
@@ -1658,7 +1681,7 @@ map<pg_shard_t, pg_info_t>::const_iterator PeeringState::find_best_info(
16581681 if (p->second .last_update < min_last_update_acceptable)
16591682 continue ;
16601683 // Disqualify anyone with a too old last_epoch_started
1661- if (p->second .last_epoch_started < max_last_epoch_started_found )
1684+ if (p->second .last_epoch_started < max_last_epoch_started )
16621685 continue ;
16631686 // Disqualify anyone who is incomplete (not fully backfilled)
16641687 if (p->second .is_incomplete ())
@@ -2462,9 +2485,9 @@ void PeeringState::choose_async_recovery_replicated(
24622485 */
24632486bool PeeringState::choose_acting (pg_shard_t &auth_log_shard_id,
24642487 bool restrict_to_up_acting,
2488+ bool request_pg_temp_change_only,
24652489 bool *history_les_bound,
2466- bool *repeat_getlog,
2467- bool request_pg_temp_change_only)
2490+ bool *repeat_getlog)
24682491{
24692492 map<pg_shard_t , pg_info_t > all_info (peer_info.begin (), peer_info.end ());
24702493 all_info[pg_whoami] = info;
@@ -6270,9 +6293,8 @@ PeeringState::Recovering::react(const RequestBackfill &evt)
62706293 // as backfill might take a long time to complete..
62716294 if (!ps->async_recovery_targets .empty ()) {
62726295 pg_shard_t auth_log_shard;
6273- bool history_les_bound = false ;
62746296 // FIXME: Uh-oh we have to check this return value; choose_acting can fail!
6275- ps->choose_acting (auth_log_shard, true , &history_les_bound, nullptr );
6297+ ps->choose_acting (auth_log_shard, true );
62766298 }
62776299 return transit<WaitLocalBackfillReserved>();
62786300}
@@ -6345,13 +6367,12 @@ PeeringState::Recovered::Recovered(my_context ctx)
63456367 }
63466368
63476369 // adjust acting set? (e.g. because backfill completed...)
6348- bool history_les_bound = false ;
63496370 if (ps->acting != ps->up &&
6350- !ps->choose_acting (auth_log_shard, true , &history_les_bound, nullptr )) {
6371+ !ps->choose_acting (auth_log_shard, true )) {
63516372 ceph_assert (ps->want_acting .size ());
63526373 } else if (!ps->async_recovery_targets .empty ()) {
63536374 // FIXME: Uh-oh we have to check this return value; choose_acting can fail!
6354- ps->choose_acting (auth_log_shard, true , &history_les_bound, nullptr );
6375+ ps->choose_acting (auth_log_shard, true );
63556376 }
63566377
63576378 if (context< Active >().all_replicas_activated &&
@@ -6497,9 +6518,8 @@ boost::statechart::result PeeringState::Active::react(const AdvMap& advmap)
64976518 // note that we leave restrict_to_up_acting to false in order to
64986519 // not overkill any chosen stray that is still alive.
64996520 pg_shard_t auth_log_shard;
6500- bool history_les_bound = false ;
65016521 ps->remove_down_peer_info (advmap.osdmap );
6502- ps->choose_acting (auth_log_shard, false , &history_les_bound, nullptr , true );
6522+ ps->choose_acting (auth_log_shard, false , true );
65036523 }
65046524
65056525 /* Check for changes in pool size (if the acting set changed as a result,
@@ -6578,9 +6598,8 @@ boost::statechart::result PeeringState::Active::react(const MNotifyRec& notevt)
65786598 // check if it is a previous down acting member that's coming back.
65796599 // if so, request pg_temp change to trigger a new interval transition
65806600 pg_shard_t auth_log_shard;
6581- bool history_les_bound = false ;
65826601 // FIXME: Uh-oh we have to check this return value; choose_acting can fail!
6583- ps->choose_acting (auth_log_shard, false , &history_les_bound, nullptr , true );
6602+ ps->choose_acting (auth_log_shard, false , true );
65846603 if (!ps->want_acting .empty () && ps->want_acting != ps->acting ) {
65856604 psdout (10 ) << " Active: got notify from previous acting member "
65866605 << notevt.from << " , requesting pg_temp change"
@@ -7470,7 +7489,7 @@ PeeringState::GetLog::GetLog(my_context ctx)
74707489 ps->log_weirdness ();
74717490
74727491 // adjust acting?
7473- if (!ps->choose_acting (auth_log_shard, false ,
7492+ if (!ps->choose_acting (auth_log_shard, false , false ,
74747493 &context< Peering >().history_les_bound ,
74757494 &repeat_getlog)) {
74767495 if (!ps->want_acting .empty ()) {
0 commit comments