Skip to content

Commit 8a8fa48

Browse files
committed
osd/scrub: scheduling the next scrub following scrub completion
or after an aborted scrub. To note: one of the important changes in this commit: merging the functionality of adjust_target_time() & update_schedule() into a single function - adjust_schedule(). Regarding the handling of aborts: Most of the time - all that is required following a scrub abort is to requeue the scrub job - the one that triggered the aborted scrub - with just a delay added to its n.b.. But we must take into account scenarios where "something" caused the parameters prepared for the *next* scrub to show higher urgency or priority. "Something" - as in an operator command requiring immediate scrubbing, or a change in the pool/cluster configuration. In such cases - the current requested flags and the parameters of the aborted scrub must be merged. Note that the current implementation is a temporary solution, to be replaced by a per-level updating of the relevant target. Signed-off-by: Ronen Friedman <[email protected]>
1 parent 75b8ecb commit 8a8fa48

File tree

6 files changed

+145
-108
lines changed

6 files changed

+145
-108
lines changed

src/osd/scrubber/pg_scrubber.cc

Lines changed: 61 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -534,30 +534,16 @@ sched_params_t PgScrubber::determine_initial_schedule(
534534
}
535535

536536

537-
/*
538-
* Note: referring to m_planned_scrub here is temporary, as this set of
539-
* scheduling flags will be removed in a followup PR.
540-
*/
541537
void PgScrubber::schedule_scrub_with_osd()
542538
{
543539
ceph_assert(is_primary());
544540
ceph_assert(m_scrub_job);
545541

546-
auto pre_reg = registration_state();
547-
m_scrub_job->registered = true;
548-
549-
const auto applicable_conf = populate_config_params();
550-
const auto scrub_clock_now = ceph_clock_now();
551-
auto suggested = determine_initial_schedule(applicable_conf, scrub_clock_now);
552-
m_scrub_job->init_targets(
553-
suggested, m_pg->info, applicable_conf, scrub_clock_now);
554-
555-
m_osds->get_scrub_services().enqueue_target(*m_scrub_job);
556-
557-
dout(10) << fmt::format(
558-
"{}: <flags:{}> <{:.5}> --> <{:.5}>", __func__,
559-
m_planned_scrub, pre_reg, registration_state())
542+
dout(20) << fmt::format(
543+
"{}: state at entry: {}", __func__, m_scrub_job->state_desc())
560544
<< dendl;
545+
m_scrub_job->registered = true;
546+
update_scrub_job(delay_ready_t::delay_ready);
561547
}
562548

563549

@@ -613,8 +599,12 @@ void PgScrubber::update_scrub_job(Scrub::delay_ready_t delay_ready)
613599
ceph_assert(m_pg->is_locked());
614600
const auto applicable_conf = populate_config_params();
615601
const auto scrub_clock_now = ceph_clock_now();
616-
const auto suggested = determine_initial_schedule(applicable_conf, scrub_clock_now);
617-
m_scrub_job->on_periods_change(suggested, applicable_conf, scrub_clock_now);
602+
const auto suggested =
603+
determine_initial_schedule(applicable_conf, scrub_clock_now);
604+
605+
ceph_assert(m_scrub_job->is_registered());
606+
m_scrub_job->adjust_schedule(
607+
suggested, applicable_conf, scrub_clock_now, delay_ready);
618608
m_osds->get_scrub_services().enqueue_target(*m_scrub_job);
619609
m_scrub_job->target_queued = true;
620610
m_pg->publish_stats_to_osd();
@@ -761,10 +751,7 @@ void PgScrubber::on_operator_periodic_cmd(
761751
<< dendl;
762752

763753
// move the relevant time-stamp backwards - enough to trigger a scrub
764-
765-
utime_t now_is = ceph_clock_now();
766-
utime_t stamp = now_is;
767-
754+
utime_t stamp = ceph_clock_now();
768755
if (offset > 0) {
769756
stamp -= offset;
770757
} else {
@@ -2083,6 +2070,56 @@ void PgScrubber::on_digest_updates()
20832070
}
20842071

20852072

2073+
/**
2074+
* The scrub session was aborted. We are left with two sets of parameters
2075+
* as to when the next scrub of this PG should take place, and what should
2076+
* it be like. One set of parameters is the one that was used to start the
2077+
* scrub, and that was 'frozen' by set_op_parameters(). It has its own
2078+
* scheduling target, priority, not-before, etc'.
2079+
* The other set is the updated state of the current scrub-job. It may
2080+
* have had its priority, flags, or schedule modified in the meantime.
2081+
* And - it does not (at least initially, i.e. immediately after
2082+
* set_op_parameters()), have high priority.
2083+
*
2084+
* Alas, the scrub session that was initiated was aborted. We must now
2085+
* merge the two sets of parameters, using the highest priority and the
2086+
* nearest target time for the next scrub.
2087+
*
2088+
* Note: only half-functioning in this commit. As the scrub-job copy
2089+
* (the one that was in the scheduling queue, and was passed to the scrubber)
2090+
* does not have the 'urgency' parameter, we are missing some information
2091+
* that is still encoded in the 'planned scrub' flags. This will be fixed in
2092+
* the next step.
2093+
*/
2094+
void PgScrubber::on_mid_scrub_abort(Scrub::delay_cause_t issue)
2095+
{
2096+
// assuming we can still depend on the 'scrubbing' flag being set;
2097+
// Also on Queued&Active.
2098+
2099+
// note again: this is not how merging should work in the final version:
2100+
// e.g. - the 'aborted_schedule' data should be passed thru the scrubber.
2101+
// In this current patchworik, for example, we are only guessing at
2102+
// the original value of 'must_deep_scrub'.
2103+
m_planned_scrub.must_deep_scrub =
2104+
m_planned_scrub.must_deep_scrub || (m_flags.required && m_is_deep);
2105+
m_planned_scrub.must_scrub = m_planned_scrub.must_deep_scrub ||
2106+
m_planned_scrub.must_scrub || m_flags.required;
2107+
m_planned_scrub.must_repair = m_planned_scrub.must_repair || m_is_repair;
2108+
m_planned_scrub.need_auto = m_planned_scrub.need_auto || m_flags.auto_repair;
2109+
m_planned_scrub.deep_scrub_on_error =
2110+
m_planned_scrub.deep_scrub_on_error || m_flags.deep_scrub_on_error;
2111+
m_planned_scrub.check_repair =
2112+
m_planned_scrub.check_repair || m_flags.check_repair;
2113+
2114+
m_scrub_job->merge_and_delay(
2115+
m_active_target->schedule, issue, m_planned_scrub, ceph_clock_now());
2116+
ceph_assert(m_scrub_job->is_registered());
2117+
ceph_assert(!m_scrub_job->target_queued);
2118+
m_osds->get_scrub_services().enqueue_target(*m_scrub_job);
2119+
m_scrub_job->target_queued = true;
2120+
}
2121+
2122+
20862123
void PgScrubber::requeue_penalized(Scrub::delay_cause_t cause)
20872124
{
20882125
/// \todo fix the 5s' to use a cause-specific delay parameter

src/osd/scrubber/pg_scrubber.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -437,7 +437,7 @@ class PgScrubber : public ScrubPgIF,
437437

438438
void scrub_finish() final;
439439

440-
void penalize_next_scrub(Scrub::delay_cause_t cause) final;
440+
void on_mid_scrub_abort(Scrub::delay_cause_t issue) final;
441441

442442
ScrubMachineListener::MsgAndEpoch prep_replica_map_msg(
443443
Scrub::PreemptionNoted was_preempted) final;

src/osd/scrubber/scrub_job.cc

Lines changed: 54 additions & 51 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@ using OSDRestrictions = Scrub::OSDRestrictions;
1111
using sched_conf_t = Scrub::sched_conf_t;
1212
using scrub_schedule_t = Scrub::scrub_schedule_t;
1313
using ScrubJob = Scrub::ScrubJob;
14+
using delay_ready_t = Scrub::delay_ready_t;
1415

1516

1617
// ////////////////////////////////////////////////////////////////////////// //
@@ -32,7 +33,7 @@ ScrubJob::ScrubJob(CephContext* cct, const spg_t& pg, int node_id)
3233
: pgid{pg}
3334
, whoami{node_id}
3435
, cct{cct}
35-
, log_msg_prefix{fmt::format("osd.{}: scrub-job:pg[{}]:", node_id, pgid)}
36+
, log_msg_prefix{fmt::format("osd.{} scrub-job:pg[{}]:", node_id, pgid)}
3637
{}
3738

3839
// debug usage only
@@ -44,74 +45,76 @@ ostream& operator<<(ostream& out, const ScrubJob& sjob)
4445
} // namespace std
4546

4647

47-
Scrub::scrub_schedule_t ScrubJob::adjust_target_time(
48-
const sched_conf_t& app_conf,
49-
const sched_params_t& suggested) const
48+
void ScrubJob::adjust_schedule(
49+
const Scrub::sched_params_t& suggested,
50+
const Scrub::sched_conf_t& app_conf,
51+
utime_t scrub_clock_now,
52+
delay_ready_t modify_ready_targets)
5053
{
51-
Scrub::scrub_schedule_t adjusted{
52-
suggested.proposed_time, suggested.proposed_time, suggested.proposed_time};
54+
dout(10) << fmt::format(
55+
"{} current h.p.:{:c} conf:{} also-ready?{:c} "
56+
"sjob@entry:{}",
57+
suggested, high_priority ? 'y' : 'n', app_conf,
58+
(modify_ready_targets == delay_ready_t::delay_ready) ? 'y'
59+
: 'n',
60+
*this)
61+
<< dendl;
5362

54-
if (suggested.is_must == Scrub::must_scrub_t::not_mandatory) {
55-
// unless explicitly requested, postpone the scrub with a random delay
56-
adjusted.scheduled_at += app_conf.shallow_interval;
57-
double r = rand() / (double)RAND_MAX;
58-
adjusted.scheduled_at +=
59-
app_conf.shallow_interval * app_conf.interval_randomize_ratio * r;
63+
high_priority = (suggested.is_must == must_scrub_t::mandatory);
64+
utime_t adj_not_before = suggested.proposed_time;
65+
utime_t adj_target = suggested.proposed_time;
66+
schedule.deadline = adj_target;
67+
68+
if (!high_priority) {
69+
// add a random delay to the proposed scheduled time - but only for periodic
70+
// scrubs that are not already eligible for scrubbing.
71+
if ((modify_ready_targets == delay_ready_t::delay_ready) ||
72+
adj_not_before > scrub_clock_now) {
73+
adj_target += app_conf.shallow_interval;
74+
double r = rand() / (double)RAND_MAX;
75+
adj_target +=
76+
app_conf.shallow_interval * app_conf.interval_randomize_ratio * r;
77+
}
6078

79+
// the deadline can be updated directly into the scrub-job
6180
if (app_conf.max_shallow) {
62-
adjusted.deadline += *app_conf.max_shallow;
81+
schedule.deadline += *app_conf.max_shallow;
6382
} else {
64-
adjusted.deadline = utime_t{};
83+
schedule.deadline = utime_t{};
6584
}
6685

67-
if (adjusted.not_before < adjusted.scheduled_at) {
68-
adjusted.not_before = adjusted.scheduled_at;
86+
if (adj_not_before < adj_target) {
87+
adj_not_before = adj_target;
6988
}
70-
71-
dout(20) << fmt::format(
72-
"not-must. Was:{:s} config:{} adjusted:{}",
73-
suggested.proposed_time, app_conf, adjusted) << dendl;
7489
}
75-
// else - no log is needed. All relevant data will be logged by the caller
7690

77-
return adjusted;
91+
schedule.scheduled_at = adj_target;
92+
schedule.not_before = adj_not_before;
93+
dout(10) << fmt::format(
94+
"adjusted: nb:{:s} target:{:s} deadline:{:s} ({})",
95+
schedule.not_before, schedule.scheduled_at, schedule.deadline,
96+
state_desc())
97+
<< dendl;
7898
}
7999

80100

81-
void ScrubJob::init_targets(
82-
const sched_params_t& suggested,
83-
const pg_info_t& info,
84-
const Scrub::sched_conf_t& aconf,
101+
void ScrubJob::merge_and_delay(
102+
const scrub_schedule_t& aborted_schedule,
103+
delay_cause_t issue,
104+
requested_scrub_t updated_flags,
85105
utime_t scrub_clock_now)
86106
{
87-
auto adjusted = adjust_target_time(aconf, suggested);
88-
high_priority = suggested.is_must == must_scrub_t::mandatory;
89-
update_schedule(adjusted, true);
107+
// merge the schedule targets:
108+
schedule.scheduled_at =
109+
std::min(aborted_schedule.scheduled_at, schedule.scheduled_at);
110+
high_priority = high_priority || updated_flags.must_scrub;
111+
delay_on_failure(5s, issue, scrub_clock_now);
112+
113+
// the new deadline is the minimum of the two
114+
schedule.deadline = std::min(aborted_schedule.deadline, schedule.deadline);
90115
}
91116

92117

93-
void ScrubJob::update_schedule(
94-
const Scrub::scrub_schedule_t& adjusted,
95-
bool reset_failure_penalty)
96-
{
97-
dout(15) << fmt::format(
98-
"was: nb:{:s}({:s}). Called with: rest?{} {:s} ({})",
99-
schedule.not_before, schedule.scheduled_at,
100-
reset_failure_penalty, adjusted.scheduled_at,
101-
state_desc())
102-
<< dendl;
103-
schedule.scheduled_at = adjusted.scheduled_at;
104-
schedule.deadline = adjusted.deadline;
105-
106-
if (reset_failure_penalty || (schedule.not_before < schedule.scheduled_at)) {
107-
schedule.not_before = schedule.scheduled_at;
108-
}
109-
dout(10) << fmt::format(
110-
"adjusted: nb:{:s} ({:s}) ({})", schedule.not_before,
111-
schedule.scheduled_at, state_desc())
112-
<< dendl;
113-
}
114-
115118
void ScrubJob::delay_on_failure(
116119
std::chrono::seconds delay,
117120
Scrub::delay_cause_t delay_cause,

src/osd/scrubber/scrub_job.h

Lines changed: 27 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -155,27 +155,23 @@ class ScrubJob {
155155
}
156156

157157
/**
158-
* 'reset_failure_penalty' is used to reset the 'not_before' jo attribute to
159-
* the updated 'scheduled_at' time. This is used whenever the scrub-job
160-
* schedule is updated, and the update is not a result of a scrub attempt
161-
* failure.
162-
*/
163-
void update_schedule(
164-
const scrub_schedule_t& adjusted,
165-
bool reset_failure_penalty);
166-
167-
/**
168-
* If the scrub job was not explicitly requested, we postpone it by some
169-
* random length of time.
170-
* And if delaying the scrub - we calculate, based on pool parameters, a
171-
* deadline we should scrub before.
158+
* Given a proposed time for the next scrub, and the relevant
159+
* configuration, adjust_schedule() determines the actual target time,
160+
* the deadline, and the 'not_before' time for the scrub.
161+
* The new values are updated into the scrub-job.
172162
*
173-
* @return updated (i.e. - possibly delayed) scrub schedule (schedule,
174-
* deadline, not_before)
163+
* Specifically:
164+
* - for high-priority scrubs: n.b. & deadline are set equal to the
165+
* (untouched) proposed target time.
166+
* - for regular scrubs: the proposed time is adjusted (delayed) based
167+
* on the configuration; the deadline is set further out (if configured)
168+
* and the n.b. is reset to the target.
175169
*/
176-
Scrub::scrub_schedule_t adjust_target_time(
177-
const Scrub::sched_conf_t& app_conf,
178-
const Scrub::sched_params_t& proposed_schedule) const;
170+
void adjust_schedule(
171+
const Scrub::sched_params_t& suggested,
172+
const Scrub::sched_conf_t& aconf,
173+
utime_t scrub_clock_now,
174+
Scrub::delay_ready_t modify_ready_targets);
179175

180176
/**
181177
* push the 'not_before' time out by 'delay' seconds, so that this scrub target
@@ -187,14 +183,18 @@ class ScrubJob {
187183
utime_t scrub_clock_now);
188184

189185
/**
190-
* initial setting of the scheduling parameters of a newly registered
191-
* PG. The scrub targets (in this stage of the refactoring - the whole
192-
* scrub job) is initialized as for a regular periodic scrub.
186+
* Recalculating any possible updates to the scrub schedule, following an
187+
* aborted scrub attempt.
188+
* Usually - we can use the same schedule that triggered the aborted scrub.
189+
* But we must take into account scenarios where "something" caused the
190+
* parameters prepared for the *next* scrub to show higher urgency or
191+
* priority. "Something" - as in an operator command requiring immediate
192+
* scrubbing, or a change in the pool/cluster configuration.
193193
*/
194-
void init_targets(
195-
const sched_params_t& suggested,
196-
const pg_info_t& info,
197-
const Scrub::sched_conf_t& aconf,
194+
void merge_and_delay(
195+
const scrub_schedule_t& aborted_schedule,
196+
Scrub::delay_cause_t issue,
197+
requested_scrub_t updated_flags,
198198
utime_t scrub_clock_now);
199199

200200
/**
@@ -213,7 +213,6 @@ class ScrubJob {
213213

214214
void dump(ceph::Formatter* f) const;
215215

216-
217216
bool is_registered() const { return registered; }
218217

219218
/**
@@ -263,7 +262,6 @@ struct formatter<Scrub::sched_params_t> {
263262
}
264263
};
265264

266-
267265
template <>
268266
struct formatter<Scrub::ScrubJob> {
269267
constexpr auto parse(format_parse_context& ctx) { return ctx.begin(); }
@@ -272,7 +270,7 @@ struct formatter<Scrub::ScrubJob> {
272270
auto format(const Scrub::ScrubJob& sjob, FormatContext& ctx) const
273271
{
274272
return fmt::format_to(
275-
ctx.out(), "pg[{}] @ nb:{:s} ({:s}) (dl:{:s}) - <{}>",
273+
ctx.out(), "pg[{}]:nb:{:s} / trg:{:s} / dl:{:s} <{}>",
276274
sjob.pgid, sjob.schedule.not_before, sjob.schedule.scheduled_at,
277275
sjob.schedule.deadline, sjob.state_desc());
278276
}

src/osd/scrubber/scrub_machine.cc

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -308,7 +308,7 @@ ActiveScrubbing::~ActiveScrubbing()
308308
// completed successfully), we use it now to set the 'failed scrub' duration.
309309
if (session.m_session_started_at != ScrubTimePoint{}) {
310310
// delay the next invocation of the scrubber on this target
311-
scrbr->penalize_next_scrub(Scrub::delay_cause_t::aborted);
311+
scrbr->on_mid_scrub_abort(Scrub::delay_cause_t::aborted);
312312

313313
auto logged_duration = ScrubClock::now() - session.m_session_started_at;
314314
session.m_perf_set->tinc(scrbcnt_failed_elapsed, logged_duration);

src/osd/scrubber/scrub_machine_lstnr.h

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -163,8 +163,7 @@ struct ScrubMachineListener {
163163
virtual void scrub_finish() = 0;
164164

165165
/// notify the scrubber about a scrub failure
166-
/// (note: temporary implementation)
167-
virtual void penalize_next_scrub(Scrub::delay_cause_t cause) = 0;
166+
virtual void on_mid_scrub_abort(Scrub::delay_cause_t cause) = 0;
168167

169168
/**
170169
* Prepare a MOSDRepScrubMap message carrying the requested scrub map

0 commit comments

Comments
 (0)