Skip to content

Commit f436050

Browse files
committed
osd_recovery_sleep_degraded, osd_recovery_sleep_degraded_ssd, osd_recovery_sleep_degraded_hdd added in the configuration to throttle the data movement while recovery when pg is degraded
Fixes: https://tracker.ceph.com/issues/67700 Signed-off-by: Md Mahamudur Rahaman Sajib <[email protected]>
1 parent c6fbca3 commit f436050

File tree

6 files changed

+90
-4
lines changed

6 files changed

+90
-4
lines changed

doc/rados/configuration/mclock-config-ref.rst

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -292,6 +292,10 @@ sleep options are disabled (set to 0),
292292
- :confval:`osd_recovery_sleep_hdd`
293293
- :confval:`osd_recovery_sleep_ssd`
294294
- :confval:`osd_recovery_sleep_hybrid`
295+
- :confval:`osd_recovery_sleep_degraded`
296+
- :confval:`osd_recovery_sleep_degraded_hdd`
297+
- :confval:`osd_recovery_sleep_degraded_ssd`
298+
- :confval:`osd_recovery_sleep_degraded_hybrid`
295299
- :confval:`osd_scrub_sleep`
296300
- :confval:`osd_delete_sleep`
297301
- :confval:`osd_delete_sleep_hdd`

doc/rados/configuration/osd-config-ref.rst

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -431,6 +431,10 @@ perform well in a degraded state.
431431
.. confval:: osd_recovery_sleep_hdd
432432
.. confval:: osd_recovery_sleep_ssd
433433
.. confval:: osd_recovery_sleep_hybrid
434+
.. confval:: osd_recovery_sleep_degraded
435+
.. confval:: osd_recovery_sleep_degraded_hdd
436+
.. confval:: osd_recovery_sleep_degraded_ssd
437+
.. confval:: osd_recovery_sleep_degraded_hybrid
434438
.. confval:: osd_recovery_priority
435439

436440
Tiering

src/common/options/osd.yaml.in

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -140,6 +140,51 @@ options:
140140
- osd_recovery_sleep
141141
flags:
142142
- runtime
143+
- name: osd_recovery_sleep_degraded
144+
type: float
145+
level: advanced
146+
desc: Time in seconds to sleep before next recovery or backfill op when PGs are degraded.
147+
This setting overrides _ssd, _hdd, and _hybrid if non-zero.
148+
fmt_desc: Time in seconds to sleep before the next recovery or backfill op when PGs
149+
are degraded. Increasing this value will slow down recovery ops while client
150+
ops will be less impacted.
151+
default: 0
152+
flags:
153+
- runtime
154+
- name: osd_recovery_sleep_degraded_hdd
155+
type: float
156+
level: advanced
157+
desc: Time in seconds to sleep before next recovery or backfill op for HDDs
158+
when PGs is degraded.
159+
fmt_desc: Time in seconds to sleep before next recovery or backfill op
160+
for HDDs when PGs are degraded.
161+
default: 0.1
162+
flags:
163+
- runtime
164+
- name: osd_recovery_sleep_degraded_ssd
165+
type: float
166+
level: advanced
167+
desc: Time in seconds to sleep before next recovery or backfill op for SSDs
168+
when PGs are degraded.
169+
fmt_desc: Time in seconds to sleep before the next recovery or backfill op
170+
for SSDs when PGs are degraded.
171+
default: 0
172+
see_also:
173+
- osd_recovery_sleep_degraded
174+
flags:
175+
- runtime
176+
- name: osd_recovery_sleep_degraded_hybrid
177+
type: float
178+
level: advanced
179+
desc: Time in seconds to sleep before next recovery or backfill op when PGs
180+
are degraded and data is on HDD and journal is on SSD
181+
fmt_desc: Time in seconds to sleep before the next recovery or backfill op when
182+
PGs are degraded and OSD data is on HDD and OSD journal / WAL+DB is on SSD.
183+
default: 0.025
184+
see_also:
185+
- osd_recovery_sleep_degraded
186+
flags:
187+
- runtime
143188
- name: osd_snap_trim_sleep
144189
type: float
145190
level: advanced

src/osd/OSD.cc

Lines changed: 35 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3681,6 +3681,21 @@ float OSD::get_osd_recovery_sleep()
36813681
return cct->_conf->osd_recovery_sleep_hdd;
36823682
}
36833683

3684+
float OSD::get_osd_recovery_sleep_degraded() {
3685+
float osd_recovery_sleep_degraded =
3686+
cct->_conf.get_val<double>("osd_recovery_sleep_degraded");
3687+
if (osd_recovery_sleep_degraded > 0) {
3688+
return osd_recovery_sleep_degraded;
3689+
}
3690+
if (!store_is_rotational && !journal_is_rotational) {
3691+
return cct->_conf.get_val<double>("osd_recovery_sleep_degraded_ssd");
3692+
} else if (store_is_rotational && !journal_is_rotational) {
3693+
return cct->_conf.get_val<double>("osd_recovery_sleep_degraded_hybrid");
3694+
} else {
3695+
return cct->_conf.get_val<double>("osd_recovery_sleep_degraded_hdd");
3696+
}
3697+
}
3698+
36843699
float OSD::get_osd_delete_sleep()
36853700
{
36863701
float osd_delete_sleep = cct->_conf.get_val<double>("osd_delete_sleep");
@@ -9703,9 +9718,12 @@ void OSD::do_recovery(
97039718
* ops are scheduled after osd_recovery_sleep amount of time from the previous
97049719
* recovery event's schedule time. This is done by adding a
97059720
* recovery_requeue_callback event, which re-queues the recovery op using
9706-
* queue_recovery_after_sleep.
9721+
* queue_recovery_after_sleep. (osd_recovery_sleep_degraded will be
9722+
* used instead of osd_recovery_sleep when pg is degraded)
97079723
*/
9708-
float recovery_sleep = get_osd_recovery_sleep();
9724+
float recovery_sleep = pg->is_degraded()
9725+
? get_osd_recovery_sleep_degraded()
9726+
: get_osd_recovery_sleep();
97099727
{
97109728
std::lock_guard l(service.sleep_lock);
97119729
if (recovery_sleep > 0 && service.recovery_needs_sleep) {
@@ -10014,6 +10032,10 @@ std::vector<std::string> OSD::get_tracked_keys() const noexcept
1001410032
"osd_recovery_sleep_hdd"s,
1001510033
"osd_recovery_sleep_ssd"s,
1001610034
"osd_recovery_sleep_hybrid"s,
10035+
"osd_recovery_sleep_degraded"s,
10036+
"osd_recovery_sleep_degraded_hdd"s,
10037+
"osd_recovery_sleep_degraded_ssd"s,
10038+
"osd_recovery_sleep_degraded_hybrid"s,
1001710039
"osd_delete_sleep"s,
1001810040
"osd_delete_sleep_hdd"s,
1001910041
"osd_delete_sleep_ssd"s,
@@ -10079,7 +10101,11 @@ void OSD::handle_conf_change(const ConfigProxy& conf,
1007910101
changed.count("osd_recovery_sleep") ||
1008010102
changed.count("osd_recovery_sleep_hdd") ||
1008110103
changed.count("osd_recovery_sleep_ssd") ||
10082-
changed.count("osd_recovery_sleep_hybrid")) {
10104+
changed.count("osd_recovery_sleep_hybrid") ||
10105+
changed.count("osd_recovery_sleep_degraded") ||
10106+
changed.count("osd_recovery_sleep_degraded_hdd") ||
10107+
changed.count("osd_recovery_sleep_degraded_ssd") ||
10108+
changed.count("osd_recovery_sleep_degraded_hybrid")) {
1008310109
maybe_override_sleep_options_for_qos();
1008410110
}
1008510111
if (changed.count("osd_min_recovery_priority")) {
@@ -10411,6 +10437,12 @@ void OSD::maybe_override_sleep_options_for_qos()
1041110437
cct->_conf.set_val("osd_recovery_sleep_ssd", std::to_string(0));
1041210438
cct->_conf.set_val("osd_recovery_sleep_hybrid", std::to_string(0));
1041310439

10440+
// Disable recovery sleep for pg degraded
10441+
cct->_conf.set_val("osd_recovery_sleep_degraded", std::to_string(0));
10442+
cct->_conf.set_val("osd_recovery_sleep_degraded_hdd", std::to_string(0));
10443+
cct->_conf.set_val("osd_recovery_sleep_degraded_ssd", std::to_string(0));
10444+
cct->_conf.set_val("osd_recovery_sleep_degraded_hybrid", std::to_string(0));
10445+
1041410446
// Disable delete sleep
1041510447
cct->_conf.set_val("osd_delete_sleep", std::to_string(0));
1041610448
cct->_conf.set_val("osd_delete_sleep_hdd", std::to_string(0));

src/osd/OSD.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2017,6 +2017,7 @@ class OSD : public Dispatcher,
20172017
int get_num_op_threads();
20182018

20192019
float get_osd_recovery_sleep();
2020+
float get_osd_recovery_sleep_degraded();
20202021
float get_osd_delete_sleep();
20212022
float get_osd_snap_trim_sleep();
20222023

src/osd/PG.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1241,6 +1241,7 @@ class PG : public DoutPrefixProvider,
12411241

12421242
public:
12431243
int pg_stat_adjust(osd_stat_t *new_stat);
1244+
bool is_degraded() const { return recovery_state.is_degraded(); }
12441245
protected:
12451246
bool delete_needs_sleep = false;
12461247

@@ -1264,7 +1265,6 @@ class PG : public DoutPrefixProvider,
12641265
bool is_backfill_unfound() const { return recovery_state.is_backfill_unfound(); }
12651266
bool is_incomplete() const { return recovery_state.is_incomplete(); }
12661267
bool is_clean() const { return recovery_state.is_clean(); }
1267-
bool is_degraded() const { return recovery_state.is_degraded(); }
12681268
bool is_undersized() const { return recovery_state.is_undersized(); }
12691269
bool is_scrubbing() const { return state_test(PG_STATE_SCRUBBING); } // Primary only
12701270
bool is_remapped() const { return recovery_state.is_remapped(); }

0 commit comments

Comments
 (0)