Skip to content

Commit f1826fd

Browse files
bill-scalesaainscow
authored andcommitted
osd: Refactor find_best_info and choose_acting
Refactor find_best_info to have separate function to calculate maxles and minlua. The refactor makes history_les_bound optional, tidy up the choose_acting interface removing this where it is not used. Signed-off-by: Bill Scales <[email protected]>
1 parent 1b44fd9 commit f1826fd

File tree

2 files changed

+61
-37
lines changed

2 files changed

+61
-37
lines changed

src/osd/PeeringState.cc

Lines changed: 52 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -1605,43 +1605,66 @@ void PeeringState::reject_reservation()
16051605
}
16061606

16071607
/**
1608-
* find_best_info
1608+
* calculate_maxlesf_and_minlua
16091609
*
1610-
* Returns an iterator to the best info in infos sorted by:
1611-
* 1) Prefer newer last_update
1612-
* 2) Prefer longer tail if it brings another info into contiguity
1613-
* 3) Prefer current primary
1610+
* Calculate max_last_epoch_started and
1611+
* min_last_update_acceptable
16141612
*/
1615-
map<pg_shard_t, pg_info_t>::const_iterator PeeringState::find_best_info(
1616-
const map<pg_shard_t, pg_info_t> &infos,
1617-
bool restrict_to_up_acting,
1618-
bool exclude_nonprimary_shards,
1619-
bool *history_les_bound) const
1613+
void PeeringState::calculate_maxles_and_minlua( const map<pg_shard_t, pg_info_t> &infos,
1614+
epoch_t& max_last_epoch_started,
1615+
eversion_t& min_last_update_acceptable,
1616+
bool *history_les_bound) const
16201617
{
1621-
ceph_assert(history_les_bound);
16221618
/* See doc/dev/osd_internals/last_epoch_started.rst before attempting
16231619
* to make changes to this process. Also, make sure to update it
16241620
* when you find bugs! */
1625-
epoch_t max_last_epoch_started_found = 0;
1621+
max_last_epoch_started = 0;
16261622
for (auto i = infos.begin(); i != infos.end(); ++i) {
16271623
if (!cct->_conf->osd_find_best_info_ignore_history_les &&
1628-
max_last_epoch_started_found < i->second.history.last_epoch_started) {
1629-
*history_les_bound = true;
1630-
max_last_epoch_started_found = i->second.history.last_epoch_started;
1624+
max_last_epoch_started < i->second.history.last_epoch_started) {
1625+
if (history_les_bound) {
1626+
*history_les_bound = true;
1627+
}
1628+
max_last_epoch_started = i->second.history.last_epoch_started;
16311629
}
16321630
if (!i->second.is_incomplete() &&
1633-
max_last_epoch_started_found < i->second.last_epoch_started) {
1634-
*history_les_bound = false;
1635-
max_last_epoch_started_found = i->second.last_epoch_started;
1631+
max_last_epoch_started < i->second.last_epoch_started) {
1632+
if (history_les_bound) {
1633+
*history_les_bound = false;
1634+
}
1635+
max_last_epoch_started = i->second.last_epoch_started;
16361636
}
16371637
}
1638-
eversion_t min_last_update_acceptable = eversion_t::max();
1638+
min_last_update_acceptable = eversion_t::max();
16391639
for (auto i = infos.begin(); i != infos.end(); ++i) {
1640-
if (max_last_epoch_started_found <= i->second.last_epoch_started) {
1640+
if (max_last_epoch_started <= i->second.last_epoch_started) {
16411641
if (min_last_update_acceptable > i->second.last_update)
16421642
min_last_update_acceptable = i->second.last_update;
16431643
}
16441644
}
1645+
}
1646+
1647+
/**
1648+
* find_best_info
1649+
*
1650+
* Returns an iterator to the best info in infos sorted by:
1651+
* 1) Prefer newer last_update
1652+
* 2) Prefer longer tail if it brings another info into contiguity
1653+
* 3) Prefer current primary
1654+
*/
1655+
map<pg_shard_t, pg_info_t>::const_iterator PeeringState::find_best_info(
1656+
const map<pg_shard_t, pg_info_t> &infos,
1657+
bool restrict_to_up_acting,
1658+
bool exclude_nonprimary_shards,
1659+
bool *history_les_bound) const
1660+
{
1661+
epoch_t max_last_epoch_started;
1662+
eversion_t min_last_update_acceptable;
1663+
calculate_maxles_and_minlua( infos,
1664+
max_last_epoch_started,
1665+
min_last_update_acceptable,
1666+
history_les_bound);
1667+
16451668
if (min_last_update_acceptable == eversion_t::max())
16461669
return infos.end();
16471670

@@ -1658,7 +1681,7 @@ map<pg_shard_t, pg_info_t>::const_iterator PeeringState::find_best_info(
16581681
if (p->second.last_update < min_last_update_acceptable)
16591682
continue;
16601683
// Disqualify anyone with a too old last_epoch_started
1661-
if (p->second.last_epoch_started < max_last_epoch_started_found)
1684+
if (p->second.last_epoch_started < max_last_epoch_started)
16621685
continue;
16631686
// Disqualify anyone who is incomplete (not fully backfilled)
16641687
if (p->second.is_incomplete())
@@ -2462,9 +2485,9 @@ void PeeringState::choose_async_recovery_replicated(
24622485
*/
24632486
bool PeeringState::choose_acting(pg_shard_t &auth_log_shard_id,
24642487
bool restrict_to_up_acting,
2488+
bool request_pg_temp_change_only,
24652489
bool *history_les_bound,
2466-
bool *repeat_getlog,
2467-
bool request_pg_temp_change_only)
2490+
bool *repeat_getlog)
24682491
{
24692492
map<pg_shard_t, pg_info_t> all_info(peer_info.begin(), peer_info.end());
24702493
all_info[pg_whoami] = info;
@@ -6270,9 +6293,8 @@ PeeringState::Recovering::react(const RequestBackfill &evt)
62706293
// as backfill might take a long time to complete..
62716294
if (!ps->async_recovery_targets.empty()) {
62726295
pg_shard_t auth_log_shard;
6273-
bool history_les_bound = false;
62746296
// FIXME: Uh-oh we have to check this return value; choose_acting can fail!
6275-
ps->choose_acting(auth_log_shard, true, &history_les_bound, nullptr);
6297+
ps->choose_acting(auth_log_shard, true);
62766298
}
62776299
return transit<WaitLocalBackfillReserved>();
62786300
}
@@ -6345,13 +6367,12 @@ PeeringState::Recovered::Recovered(my_context ctx)
63456367
}
63466368

63476369
// adjust acting set? (e.g. because backfill completed...)
6348-
bool history_les_bound = false;
63496370
if (ps->acting != ps->up &&
6350-
!ps->choose_acting(auth_log_shard, true, &history_les_bound, nullptr)) {
6371+
!ps->choose_acting(auth_log_shard, true)) {
63516372
ceph_assert(ps->want_acting.size());
63526373
} else if (!ps->async_recovery_targets.empty()) {
63536374
// FIXME: Uh-oh we have to check this return value; choose_acting can fail!
6354-
ps->choose_acting(auth_log_shard, true, &history_les_bound, nullptr);
6375+
ps->choose_acting(auth_log_shard, true);
63556376
}
63566377

63576378
if (context< Active >().all_replicas_activated &&
@@ -6497,9 +6518,8 @@ boost::statechart::result PeeringState::Active::react(const AdvMap& advmap)
64976518
// note that we leave restrict_to_up_acting to false in order to
64986519
// not overkill any chosen stray that is still alive.
64996520
pg_shard_t auth_log_shard;
6500-
bool history_les_bound = false;
65016521
ps->remove_down_peer_info(advmap.osdmap);
6502-
ps->choose_acting(auth_log_shard, false, &history_les_bound, nullptr, true);
6522+
ps->choose_acting(auth_log_shard, false, true);
65036523
}
65046524

65056525
/* Check for changes in pool size (if the acting set changed as a result,
@@ -6578,9 +6598,8 @@ boost::statechart::result PeeringState::Active::react(const MNotifyRec& notevt)
65786598
// check if it is a previous down acting member that's coming back.
65796599
// if so, request pg_temp change to trigger a new interval transition
65806600
pg_shard_t auth_log_shard;
6581-
bool history_les_bound = false;
65826601
// FIXME: Uh-oh we have to check this return value; choose_acting can fail!
6583-
ps->choose_acting(auth_log_shard, false, &history_les_bound, nullptr, true);
6602+
ps->choose_acting(auth_log_shard, false, true);
65846603
if (!ps->want_acting.empty() && ps->want_acting != ps->acting) {
65856604
psdout(10) << "Active: got notify from previous acting member "
65866605
<< notevt.from << ", requesting pg_temp change"
@@ -7470,7 +7489,7 @@ PeeringState::GetLog::GetLog(my_context ctx)
74707489
ps->log_weirdness();
74717490

74727491
// adjust acting?
7473-
if (!ps->choose_acting(auth_log_shard, false,
7492+
if (!ps->choose_acting(auth_log_shard, false, false,
74747493
&context< Peering >().history_les_bound,
74757494
&repeat_getlog)) {
74767495
if (!ps->want_acting.empty()) {

src/osd/PeeringState.h

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1688,12 +1688,17 @@ class PeeringState : public MissingLoc::MappingInfo {
16881688

16891689
void reject_reservation();
16901690

1691+
void calculate_maxles_and_minlua( const std::map<pg_shard_t, pg_info_t> &infos,
1692+
epoch_t& max_last_epoch_started,
1693+
eversion_t& min_last_update_acceptable,
1694+
bool *history_les_bound = nullptr) const;
1695+
16911696
// acting std::set
16921697
std::map<pg_shard_t, pg_info_t>::const_iterator find_best_info(
16931698
const std::map<pg_shard_t, pg_info_t> &infos,
16941699
bool restrict_to_up_acting,
16951700
bool exclude_nonprimary_shards,
1696-
bool *history_les_bound) const;
1701+
bool *history_les_bound = nullptr) const;
16971702

16981703
static void calc_ec_acting(
16991704
std::map<pg_shard_t, pg_info_t>::const_iterator auth_log_shard,
@@ -1764,9 +1769,9 @@ class PeeringState : public MissingLoc::MappingInfo {
17641769
bool recoverable(const std::vector<int> &want) const;
17651770
bool choose_acting(pg_shard_t &auth_log_shard,
17661771
bool restrict_to_up_acting,
1767-
bool *history_les_bound,
1768-
bool *repeat_getlog,
1769-
bool request_pg_temp_change_only = false);
1772+
bool request_pg_temp_change_only = false,
1773+
bool *history_les_bound = nullptr,
1774+
bool *repeat_getlog = nullptr);
17701775

17711776
bool search_for_missing(
17721777
const pg_info_t &oinfo, const pg_missing_t &omissing,

0 commit comments

Comments
 (0)