Skip to content

Commit 78ce68d

Browse files
committed
mon [stretch mode]: support disable_stretch_mode
Problem: Currently, Ceph lacks the ability to exit stretch mode and move back to normal cluster (non-stretched). Solution: Provide a command to allow the user to exit stretch mode gracefully: `ceph mon disable_stretch_mode <crush_rule> --yes-i-really-mean-it` User can either specify a crush rule that they want all pools to move to or not specify a rule and Ceph will use a default replicated crush rule. Fixes: https://tracker.ceph.com/issues/67467 Signed-off-by: Kamoltat Sirivadhna <[email protected]>
1 parent 5ecc740 commit 78ce68d

File tree

6 files changed

+152
-0
lines changed

6 files changed

+152
-0
lines changed

src/mon/MonCommands.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -558,6 +558,11 @@ COMMAND("mon enable_stretch_mode " \
558558
"as the tiebreaker and setting <dividing_bucket> locations "
559559
"as the units for stretching across",
560560
"mon", "rw")
561+
COMMAND("mon disable_stretch_mode " \
562+
"name=crush_rule,type=CephString,req=false, "
563+
"name=yes_i_really_mean_it,type=CephBool,req=false, ",
564+
"disable stretch mode, reverting to normal peering rules",
565+
"mon", "rw")
561566
COMMAND("mon set_new_tiebreaker " \
562567
"name=name,type=CephString "
563568
"name=yes_i_really_mean_it,type=CephBool,req=false",

src/mon/Monitor.cc

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6683,6 +6683,8 @@ void Monitor::notify_new_monmap(bool can_change_external_state, bool remove_rank
66836683

66846684
if (monmap->stretch_mode_enabled) {
66856685
try_engage_stretch_mode();
6686+
} else {
6687+
try_disable_stretch_mode();
66866688
}
66876689

66886690
if (is_stretch_mode()) {
@@ -6741,6 +6743,32 @@ void Monitor::try_engage_stretch_mode()
67416743
disconnect_disallowed_stretch_sessions();
67426744
}
67436745
}
6746+
struct CMonDisableStretchMode : public Context {
6747+
Monitor *m;
6748+
CMonDisableStretchMode(Monitor *mon) : m(mon) {}
6749+
void finish(int r) {
6750+
m->try_disable_stretch_mode();
6751+
}
6752+
};
6753+
void Monitor::try_disable_stretch_mode()
6754+
{
6755+
dout(20) << __func__ << dendl;
6756+
if (!stretch_mode_engaged) return;
6757+
if (!osdmon()->is_readable()) {
6758+
dout(20) << "osdmon is not readable" << dendl;
6759+
osdmon()->wait_for_readable_ctx(new CMonDisableStretchMode(this));
6760+
return;
6761+
}
6762+
if (!osdmon()->osdmap.stretch_mode_enabled &&
6763+
!monmap->stretch_mode_enabled) {
6764+
dout(10) << "Disabling stretch mode!" << dendl;
6765+
stretch_mode_engaged = false;
6766+
stretch_bucket_divider.clear();
6767+
degraded_stretch_mode = false;
6768+
recovering_stretch_mode = false;
6769+
}
6770+
6771+
}
67446772

67456773
void Monitor::do_stretch_mode_election_work()
67466774
{
@@ -6797,6 +6825,7 @@ struct CMonGoRecovery : public Context {
67976825
void Monitor::go_recovery_stretch_mode()
67986826
{
67996827
dout(20) << __func__ << dendl;
6828+
if (!is_stretch_mode()) return;
68006829
dout(20) << "is_leader(): " << is_leader() << dendl;
68016830
if (!is_leader()) return;
68026831
dout(20) << "is_degraded_stretch_mode(): " << is_degraded_stretch_mode() << dendl;
@@ -6827,6 +6856,7 @@ void Monitor::go_recovery_stretch_mode()
68276856

68286857
void Monitor::set_recovery_stretch_mode()
68296858
{
6859+
if (!is_stretch_mode()) return;
68306860
degraded_stretch_mode = true;
68316861
recovering_stretch_mode = true;
68326862
osdmon()->set_recovery_stretch_mode();
@@ -6835,6 +6865,7 @@ void Monitor::set_recovery_stretch_mode()
68356865
void Monitor::maybe_go_degraded_stretch_mode()
68366866
{
68376867
dout(20) << __func__ << dendl;
6868+
if (!is_stretch_mode()) return;
68386869
if (is_degraded_stretch_mode()) return;
68396870
if (!is_leader()) return;
68406871
if (dead_mon_buckets.empty()) return;
@@ -6873,6 +6904,7 @@ void Monitor::trigger_degraded_stretch_mode(const set<string>& dead_mons,
68736904
const set<int>& dead_buckets)
68746905
{
68756906
dout(20) << __func__ << dendl;
6907+
if (!is_stretch_mode()) return;
68766908
ceph_assert(osdmon()->is_writeable());
68776909
ceph_assert(monmon()->is_writeable());
68786910

@@ -6893,6 +6925,7 @@ void Monitor::trigger_degraded_stretch_mode(const set<string>& dead_mons,
68936925
void Monitor::set_degraded_stretch_mode()
68946926
{
68956927
dout(20) << __func__ << dendl;
6928+
if (!is_stretch_mode()) return;
68966929
degraded_stretch_mode = true;
68976930
recovering_stretch_mode = false;
68986931
osdmon()->set_degraded_stretch_mode();
@@ -6910,6 +6943,7 @@ struct CMonGoHealthy : public Context {
69106943
void Monitor::trigger_healthy_stretch_mode()
69116944
{
69126945
dout(20) << __func__ << dendl;
6946+
if (!is_stretch_mode()) return;
69136947
if (!is_degraded_stretch_mode()) return;
69146948
if (!is_leader()) return;
69156949
if (!osdmon()->is_writeable()) {
@@ -6930,6 +6964,7 @@ void Monitor::trigger_healthy_stretch_mode()
69306964

69316965
void Monitor::set_healthy_stretch_mode()
69326966
{
6967+
if (!is_stretch_mode()) return;
69336968
degraded_stretch_mode = false;
69346969
recovering_stretch_mode = false;
69356970
osdmon()->set_healthy_stretch_mode();

src/mon/Monitor.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -293,6 +293,7 @@ class Monitor : public Dispatcher,
293293
* updates across the entire cluster.
294294
*/
295295
void try_engage_stretch_mode();
296+
void try_disable_stretch_mode();
296297
void maybe_go_degraded_stretch_mode();
297298
void trigger_degraded_stretch_mode(const std::set<std::string>& dead_mons,
298299
const std::set<int>& dead_buckets);

src/mon/MonmapMonitor.cc

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1187,6 +1187,42 @@ bool MonmapMonitor::prepare_command(MonOpRequestRef op)
11871187
ceph_assert(okay == true);
11881188
}
11891189
request_proposal(mon.osdmon());
1190+
} else if (prefix == "mon disable_stretch_mode") {
1191+
if (!mon.osdmon()->is_writeable()) {
1192+
dout(10) << __func__
1193+
<< ": waiting for osdmon writeable for stretch mode" << dendl;
1194+
mon.osdmon()->wait_for_writeable(op, new Monitor::C_RetryMessage(&mon, op));
1195+
return false; /* do not propose, yet */
1196+
}
1197+
bool sure = false;
1198+
bool okay = false;
1199+
int errcode = 0;
1200+
if (!pending_map.stretch_mode_enabled) {
1201+
ss << "stretch mode is already disabled";
1202+
err = -EINVAL;
1203+
goto reply_no_propose;
1204+
}
1205+
cmd_getval(cmdmap, "yes_i_really_mean_it", sure);
1206+
if (!sure) {
1207+
ss << " This command will disable stretch mode, "
1208+
"which means all your pools will be reverted back "
1209+
"to the default size, min_size and crush_rule. "
1210+
"Pass --yes-i-really-mean-it to proceed.";
1211+
err = -EPERM;
1212+
goto reply_no_propose;
1213+
}
1214+
string crush_rule = cmd_getval_or<string>(cmdmap, "crush_rule", string{});
1215+
mon.osdmon()->try_disable_stretch_mode(ss, &okay, &errcode, crush_rule);
1216+
if (!okay) {
1217+
err = errcode;
1218+
goto reply_no_propose;
1219+
}
1220+
pending_map.stretch_mode_enabled = false;
1221+
pending_map.tiebreaker_mon = "";
1222+
pending_map.disallowed_leaders.clear();
1223+
pending_map.stretch_marked_down_mons.clear();
1224+
pending_map.last_changed = ceph_clock_now();
1225+
request_proposal(mon.osdmon());
11901226
} else {
11911227
ss << "unknown command " << prefix;
11921228
err = -EINVAL;

src/mon/OSDMonitor.cc

Lines changed: 61 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -983,6 +983,8 @@ void OSDMonitor::update_from_paxos(bool *need_bootstrap)
983983
dout(20) << "Checking degraded stretch mode due to osd changes" << dendl;
984984
mon.maybe_go_degraded_stretch_mode();
985985
}
986+
} else {
987+
mon.try_disable_stretch_mode();
986988
}
987989
}
988990

@@ -15079,6 +15081,65 @@ void OSDMonitor::convert_pool_priorities(void)
1507915081
}
1508015082
}
1508115083

15084+
void OSDMonitor::try_disable_stretch_mode(stringstream& ss,
15085+
bool *okay,
15086+
int *errcode,
15087+
const string& crush_rule)
15088+
{
15089+
dout(20) << __func__ << dendl;
15090+
*okay = false;
15091+
if (!osdmap.stretch_mode_enabled) {
15092+
ss << "stretch mode is already disabled";
15093+
*errcode = -EINVAL;
15094+
return;
15095+
}
15096+
if (osdmap.recovering_stretch_mode) {
15097+
ss << "stretch mode is currently recovering and cannot be disabled";
15098+
*errcode = -EBUSY;
15099+
return;
15100+
}
15101+
for (const auto& pi : osdmap.get_pools()) {
15102+
pg_pool_t *pool = pending_inc.get_new_pool(pi.first, &pi.second);
15103+
pool->peering_crush_bucket_count = 0;
15104+
pool->peering_crush_bucket_target = 0;
15105+
pool->peering_crush_bucket_barrier = 0;
15106+
pool->peering_crush_mandatory_member = CRUSH_ITEM_NONE;
15107+
pool->size = g_conf().get_val<uint64_t>("osd_pool_default_size");
15108+
pool->min_size = g_conf().get_osd_pool_default_min_size(pool->size);
15109+
// if crush rule is supplied, use it if it exists in crush map
15110+
if (!crush_rule.empty()) {
15111+
int crush_rule_id = osdmap.crush->get_rule_id(crush_rule);
15112+
if (crush_rule_id < 0) {
15113+
ss << "unrecognized crush rule " << crush_rule;
15114+
*errcode = -EINVAL;
15115+
return;
15116+
}
15117+
if (!osdmap.crush->rule_valid_for_pool_type(crush_rule_id, pool->get_type())) {
15118+
ss << "crush rule " << crush_rule << " type does not match pool type";
15119+
*errcode = -EINVAL;
15120+
return;
15121+
}
15122+
if (crush_rule_id == pool->crush_rule) {
15123+
ss << "You can't disable stretch mode with the same crush rule you are using";
15124+
*errcode = -EINVAL;
15125+
return;
15126+
}
15127+
pool->crush_rule = crush_rule_id;
15128+
} else {
15129+
// otherwise, use the default rule
15130+
pool->crush_rule = osdmap.crush->get_osd_pool_default_crush_replicated_rule(cct);
15131+
}
15132+
}
15133+
pending_inc.change_stretch_mode = true;
15134+
pending_inc.stretch_mode_enabled = false;
15135+
pending_inc.new_stretch_bucket_count = 0;
15136+
pending_inc.new_degraded_stretch_mode = 0;
15137+
pending_inc.new_stretch_mode_bucket = 0;
15138+
pending_inc.new_recovering_stretch_mode = 0;
15139+
*okay = true;
15140+
return;
15141+
}
15142+
1508215143
void OSDMonitor::try_enable_stretch_mode_pools(stringstream& ss, bool *okay,
1508315144
int *errcode,
1508415145
set<pg_pool_t*>* pools,

src/mon/OSDMonitor.h

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -844,6 +844,20 @@ class OSDMonitor : public PaxosService,
844844
uint32_t bucket_count,
845845
const std::set<pg_pool_t*>& pools,
846846
const std::string& new_crush_rule);
847+
/**
848+
*
849+
* Set all stretch mode values of all pools back to pre-stretch mode values.
850+
* Set all stretch mode values of OSDMap back to pre-stretch mode values.
851+
* If crush_rule is not empty, set the crush rule to that value, else use
852+
* the default replicated crush rule.
853+
* @param ss: a stringstream to write errors into
854+
* @param errcode: filled with -errno if there's a problem
855+
* @param crush_rule: the crush rule that will used after disabling stretch mode
856+
*/
857+
void try_disable_stretch_mode(std::stringstream& ss,
858+
bool *okay,
859+
int *errcode,
860+
const std::string& crush_rule);
847861
/**
848862
* Check the input dead_buckets mapping (buckets->dead monitors) to see
849863
* if the OSDs are also down. If so, fill in really_down_buckets and

0 commit comments

Comments
 (0)