Skip to content

Commit d65dce2

Browse files
committed
osd/scrub: fix deadline calculations
The scrub scheduling deadlines are calculated based on pool and OSD configuration parameters. The specifics of the calculations are modified to match the new scrub scheduling design. Comments and documentation are updated to reflect the fact that the deadlines no longer have any meaningful effect on scrub scheduling. Signed-off-by: Ronen Friedman <[email protected]> (cherry picked from commit 170e9f7)
1 parent f590506 commit d65dce2

File tree

5 files changed

+66
-51
lines changed

5 files changed

+66
-51
lines changed

src/osd/scrubber/pg_scrubber.cc

Lines changed: 49 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -674,46 +674,68 @@ Scrub::sched_conf_t PgScrubber::populate_config_params() const
674674
const auto& conf = get_pg_cct()->_conf; // for brevity
675675
Scrub::sched_conf_t configs;
676676

677-
// deep-scrub optimal interval
678-
configs.deep_interval =
679-
pool_conf.value_or(pool_opts_t::DEEP_SCRUB_INTERVAL, 0.0);
680-
if (configs.deep_interval <= 0.0) {
681-
configs.deep_interval = conf->osd_deep_scrub_interval;
682-
}
683-
684-
// shallow-scrub interval
685-
configs.shallow_interval =
677+
// shallow scrubs interval
678+
const auto shallow_pool =
686679
pool_conf.value_or(pool_opts_t::SCRUB_MIN_INTERVAL, 0.0);
687-
if (configs.shallow_interval <= 0.0) {
688-
configs.shallow_interval = conf->osd_scrub_min_interval;
689-
}
690-
691-
// the max allowed delay between scrubs.
692-
// For deep scrubs - there is no equivalent of scrub_max_interval. Per the
693-
// documentation, once deep_scrub_interval has passed, we are already
694-
// "overdue", at least as far as the "ignore allowed load" window is
695-
// concerned.
680+
configs.shallow_interval =
681+
shallow_pool > 0.0 ? shallow_pool : conf->osd_scrub_min_interval;
696682

697-
configs.max_deep = configs.deep_interval + configs.shallow_interval;
683+
// deep scrubs optimal interval
684+
const auto deep_pool =
685+
pool_conf.value_or(pool_opts_t::DEEP_SCRUB_INTERVAL, 0.0);
686+
configs.deep_interval =
687+
deep_pool > 0.0 ? deep_pool : conf->osd_deep_scrub_interval;
698688

689+
/**
690+
* 'max_deep' and 'max_shallow' are set to the maximum allowed delay between
691+
* scrubs. These deadlines have almost no effect on scrub scheduling
692+
* (the only minor exception: when sorting two scrub jobs that are
693+
* equivalent in all but the deadline).
694+
*
695+
* 'max_shallow' is controlled by a pool option and a configuration
696+
* parameter. Note that if the value configured is less than the
697+
* shallow interval, the max_shallow is disabled.
698+
*/
699699
auto max_shallow = pool_conf.value_or(pool_opts_t::SCRUB_MAX_INTERVAL, 0.0);
700700
if (max_shallow <= 0.0) {
701701
max_shallow = conf->osd_scrub_max_interval;
702702
}
703+
703704
if (max_shallow > 0.0) {
704-
configs.max_shallow = max_shallow;
705-
// otherwise - we're left with the default nullopt
705+
const auto min_accepted_deadline =
706+
configs.shallow_interval *
707+
(1 + conf->osd_scrub_interval_randomize_ratio);
708+
709+
if (max_shallow >= min_accepted_deadline) {
710+
configs.max_shallow = max_shallow;
711+
} else {
712+
// this is a bit odd, but the pool option is set to a value
713+
// less than the interval. Keep the nullopt in max_shallow,
714+
dout(10) << fmt::format(
715+
"{}: configured 'max shallow' rejected as too low ({}/{} "
716+
"< {})",
717+
__func__,
718+
pool_conf.value_or(pool_opts_t::SCRUB_MAX_INTERVAL, 0.0),
719+
conf->osd_scrub_max_interval, min_accepted_deadline)
720+
<< dendl;
721+
}
706722
}
707723

708-
// but seems like our tests require: \todo fix!
709-
configs.max_deep =
710-
std::max(configs.max_shallow.value_or(0.0), configs.deep_interval);
724+
// There are no comparable options for max_deep. We set it here to
725+
// 4X the deep interval, as a reasonable default.
726+
configs.max_deep = 4 * configs.deep_interval;
711727

712728
configs.interval_randomize_ratio = conf->osd_scrub_interval_randomize_ratio;
713-
configs.deep_randomize_ratio = conf.get_val<double>("osd_deep_scrub_interval_cv");
729+
configs.deep_randomize_ratio =
730+
conf.get_val<double>("osd_deep_scrub_interval_cv");
714731
configs.mandatory_on_invalid = conf->osd_scrub_invalid_stats;
715732

716-
dout(15) << fmt::format("{}: updated config:{}", __func__, configs) << dendl;
733+
dout(15) << fmt::format(
734+
"{}: inputs: intervals: sh:{}(pl:{}),dp:{}(pl:{})",
735+
__func__, configs.shallow_interval, shallow_pool,
736+
configs.deep_interval, deep_pool)
737+
<< dendl;
738+
dout(10) << fmt::format("{}: updated config:{}", __func__, configs) << dendl;
717739
return configs;
718740
}
719741

@@ -2080,7 +2102,7 @@ void PgScrubber::on_digest_updates()
20802102
void PgScrubber::on_mid_scrub_abort(Scrub::delay_cause_t issue)
20812103
{
20822104
if (!m_scrub_job->is_registered()) {
2083-
dout(10) << fmt::format(
2105+
dout(5) << fmt::format(
20842106
"{}: PG not registered for scrubbing on this OSD. Won't "
20852107
"requeue!",
20862108
__func__)

src/osd/scrubber/scrub_job.cc

Lines changed: 5 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -122,7 +122,7 @@ void ScrubJob::adjust_shallow_schedule(
122122
if (app_conf.max_shallow) {
123123
sh_times.deadline += *app_conf.max_shallow;
124124
} else {
125-
sh_times.deadline = utime_t{};
125+
sh_times.deadline = utime_t::max();
126126
}
127127
if (adj_not_before < adj_target) {
128128
adj_not_before = adj_target;
@@ -135,7 +135,7 @@ void ScrubJob::adjust_shallow_schedule(
135135
// the target time is already set. Make sure to reset the n.b. and
136136
// the (irrelevant) deadline
137137
sh_times.not_before = sh_times.scheduled_at;
138-
sh_times.deadline = sh_times.scheduled_at;
138+
sh_times.deadline = utime_t::max();
139139
}
140140

141141
dout(10) << fmt::format(
@@ -257,12 +257,8 @@ void ScrubJob::adjust_deep_schedule(
257257
app_conf.deep_randomize_ratio, adj_target)
258258
<< dendl;
259259

260-
// the deadline can be updated directly into the scrub-job
261-
if (app_conf.max_shallow) {
262-
dp_times.deadline += *app_conf.max_shallow; // RRR fix
263-
} else {
264-
dp_times.deadline = utime_t{};
265-
}
260+
dp_times.deadline += app_conf.max_deep;
261+
266262
if (adj_not_before < adj_target) {
267263
adj_not_before = adj_target;
268264
}
@@ -272,7 +268,7 @@ void ScrubJob::adjust_deep_schedule(
272268
// the target time is already set. Make sure to reset the n.b. and
273269
// the (irrelevant) deadline
274270
dp_times.not_before = dp_times.scheduled_at;
275-
dp_times.deadline = dp_times.scheduled_at;
271+
dp_times.deadline = utime_t::max();
276272
}
277273

278274
dout(10) << fmt::format(

src/osd/scrubber/scrub_job.h

Lines changed: 7 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -44,15 +44,13 @@ struct sched_conf_t {
4444
std::optional<double> max_shallow;
4545

4646
/**
47-
* the maximum interval between deep scrubs.
48-
* For deep scrubs - there is no equivalent of scrub_max_interval. Per the
49-
* documentation, once deep_scrub_interval has passed, we are already
50-
* "overdue", at least as far as the "ignore allowed load" window is
51-
* concerned. \todo based on users complaints (and the fact that the
52-
* interaction between the configuration parameters is clear to no one),
53-
* this will be revised shortly.
47+
* the maximum interval between deep scrubs, after which the
48+
* (info-only) "overdue" field in the scheduler dump is set.
49+
* There is no specific configuration parameter to control the
50+
* deep scrubs max. Instead - we set it to 4 times the average
51+
* interval.
5452
*/
55-
double max_deep{0.0};
53+
double max_deep{std::numeric_limits<double>::max()};
5654

5755
/**
5856
* interval_randomize_ratio
@@ -226,7 +224,7 @@ class ScrubJob {
226224
* The new values are updated into the scrub-job.
227225
*
228226
* Specifically:
229-
* - for high-priority scrubs: n.b. & deadline are set equal to the
227+
* - for high-priority scrubs: the 'not_before' is set to the
230228
* (untouched) proposed target time.
231229
* - for regular scrubs: the proposed time is adjusted (delayed) based
232230
* on the configuration; the deadline is set further out (if configured)

src/osd/scrubber/scrub_queue_entry.h

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -61,9 +61,6 @@ enum class urgency_t {
6161
* the 'urgency' attribute of the scheduled scrub (which determines most of
6262
* its behavior and scheduling decisions) and the actual time attributes
6363
* for scheduling (target, deadline, not_before).
64-
*
65-
* In this commit - the 'urgency' attribute is not fully used yet, and some
66-
* of the scrub behavior is still controlled by the 'planned scrub' flags.
6764
*/
6865
struct SchedEntry {
6966
constexpr SchedEntry(spg_t pgid, scrub_level_t level)

src/osd/scrubber_common.h

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -136,9 +136,11 @@ struct scrub_schedule_t {
136136
* the 'deadline' is the time by which we expect the periodic scrub to
137137
* complete. It is determined by the SCRUB_MAX_INTERVAL pool configuration
138138
* and by osd_scrub_max_interval;
139-
* Once passed, the scrub will be allowed to run even if the OSD is
140-
* overloaded.It would also have higher priority than other
141-
* auto-scheduled scrubs.
139+
* Note: the 'deadline' has only a limited effect on scheduling: when
140+
* comparing jobs having identical urgency and target time (scheduled_at'),
141+
* the job with the earlier 'deadline' is preferred.
142+
* Being past deadline also sets the 'overdue' flag in scrub
143+
* scheduling dumps.
142144
*/
143145
utime_t deadline{utime_t::max()};
144146

0 commit comments

Comments
 (0)