Skip to content

Commit 375d01a

Browse files
committed
osd/scrub: update job's NB on failure
When a scrub job fails, update its NB to the current time plus a fixed delay. This prevents the job from being scheduled again immediately. Signed-off-by: Ronen Friedman <[email protected]>
1 parent af8939d commit 375d01a

File tree

12 files changed

+170
-25
lines changed

12 files changed

+170
-25
lines changed

src/osd/PG.cc

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1342,9 +1342,12 @@ Scrub::schedule_result_t PG::start_scrubbing(
13421342
<< dendl;
13431343
ceph_assert(ceph_mutex_is_locked(_lock));
13441344

1345+
// recheck PG status (as the PG was unlocked for a time after being selected
1346+
// for scrubbing)
13451347
if (!is_primary() || !is_active() || !is_clean()) {
13461348
dout(10) << __func__ << ": cannot scrub (not a clean and active primary)"
13471349
<< dendl;
1350+
m_scrubber->penalize_next_scrub(Scrub::delay_cause_t::pg_state);
13481351
return schedule_result_t::target_specific_failure;
13491352
}
13501353

@@ -1361,6 +1364,7 @@ Scrub::schedule_result_t PG::start_scrubbing(
13611364
<< ": skipping this PG as repairing was not explicitly "
13621365
"requested for it"
13631366
<< dendl;
1367+
m_scrubber->penalize_next_scrub(Scrub::delay_cause_t::scrub_params);
13641368
return schedule_result_t::target_specific_failure;
13651369
}
13661370

@@ -1369,6 +1373,7 @@ Scrub::schedule_result_t PG::start_scrubbing(
13691373
// (on the transition from NotTrimming to Trimming/WaitReservation),
13701374
// i.e. some time before setting 'snaptrim'.
13711375
dout(10) << __func__ << ": cannot scrub while snap-trimming" << dendl;
1376+
m_scrubber->penalize_next_scrub(Scrub::delay_cause_t::pg_state);
13721377
return schedule_result_t::target_specific_failure;
13731378
}
13741379

@@ -1381,13 +1386,15 @@ Scrub::schedule_result_t PG::start_scrubbing(
13811386
// (due to configuration or priority issues)
13821387
// The reason was already reported by the callee.
13831388
dout(10) << __func__ << ": failed to initiate a scrub" << dendl;
1389+
m_scrubber->penalize_next_scrub(Scrub::delay_cause_t::scrub_params);
13841390
return schedule_result_t::target_specific_failure;
13851391
}
13861392

13871393
// try to reserve the local OSD resources. If failing: no harm. We will
13881394
// be retried by the OSD later on.
13891395
if (!m_scrubber->reserve_local()) {
13901396
dout(10) << __func__ << ": failed to reserve locally" << dendl;
1397+
m_scrubber->penalize_next_scrub(Scrub::delay_cause_t::local_resources);
13911398
return schedule_result_t::osd_wide_failure;
13921399
}
13931400

src/osd/scrubber/osd_scrub.cc

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -435,6 +435,16 @@ void OsdScrub::update_job(
435435
m_queue.update_job(sjob, suggested, reset_notbefore);
436436
}
437437

438+
void OsdScrub::delay_on_failure(
439+
Scrub::ScrubJobRef sjob,
440+
std::chrono::seconds delay,
441+
Scrub::delay_cause_t delay_cause,
442+
utime_t now_is)
443+
{
444+
m_queue.delay_on_failure(sjob, delay, delay_cause, now_is);
445+
}
446+
447+
438448
void OsdScrub::register_with_osd(
439449
Scrub::ScrubJobRef sjob,
440450
const Scrub::sched_params_t& suggested)

src/osd/scrubber/osd_scrub.h

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -149,6 +149,17 @@ class OsdScrub {
149149

150150
void clear_reserving_now(spg_t reserving_id);
151151

152+
/**
153+
* push the 'not_before' time out by 'delay' seconds, so that this scrub target
154+
* would not be retried before 'delay' seconds have passed.
155+
*/
156+
void delay_on_failure(
157+
Scrub::ScrubJobRef sjob,
158+
std::chrono::seconds delay,
159+
Scrub::delay_cause_t delay_cause,
160+
utime_t now_is);
161+
162+
152163
/**
153164
* \returns true if the current time is within the scrub time window
154165
*/

src/osd/scrubber/osd_scrub_sched.cc

Lines changed: 18 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -153,6 +153,21 @@ void ScrubQueue::update_job(Scrub::ScrubJobRef scrub_job,
153153
scrub_job->update_schedule(adjusted, reset_nb);
154154
}
155155

156+
157+
void ScrubQueue::delay_on_failure(
158+
Scrub::ScrubJobRef sjob,
159+
std::chrono::seconds delay,
160+
Scrub::delay_cause_t delay_cause,
161+
utime_t now_is)
162+
{
163+
dout(10) << fmt::format(
164+
"pg[{}] delay_on_failure: delay:{} now:{:s}",
165+
sjob->pgid, delay, now_is)
166+
<< dendl;
167+
sjob->delay_on_failure(delay, delay_cause, now_is);
168+
}
169+
170+
156171
sched_params_t ScrubQueue::determine_scrub_time(
157172
const requested_scrub_t& request_flags,
158173
const pg_info_t& pg_info,
@@ -281,9 +296,9 @@ ScrubQContainer ScrubQueue::collect_ripe_jobs(
281296
for (const auto& jobref : group) {
282297
if (!filtr(jobref)) {
283298
dout(20) << fmt::format(
284-
" not ripe: {} @ {:s} ({:s})", jobref->pgid,
285-
jobref->schedule.not_before,
286-
jobref->schedule.scheduled_at)
299+
" not eligible: {} @ {:s} ({:s},{:s})", jobref->pgid,
300+
jobref->schedule.not_before,
301+
jobref->schedule.scheduled_at, jobref->last_issue)
287302
<< dendl;
288303
}
289304
}

src/osd/scrubber/osd_scrub_sched.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -220,6 +220,12 @@ class ScrubQueue {
220220
const sched_params_t& suggested,
221221
bool reset_notbefore);
222222

223+
void delay_on_failure(
224+
Scrub::ScrubJobRef sjob,
225+
std::chrono::seconds delay,
226+
Scrub::delay_cause_t delay_cause,
227+
utime_t now_is);
228+
223229
sched_params_t determine_scrub_time(const requested_scrub_t& request_flags,
224230
const pg_info_t& pg_info,
225231
const pool_opts_t& pool_conf) const;

src/osd/scrubber/pg_scrubber.cc

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1756,7 +1756,9 @@ void PgScrubber::clear_scrub_blocked()
17561756

17571757
void PgScrubber::flag_reservations_failure()
17581758
{
1759-
m_scrub_job->resources_failure = true;
1759+
dout(10) << __func__ << dendl;
1760+
// delay the next invocation of the scrubber on this target
1761+
penalize_next_scrub(Scrub::delay_cause_t::replicas);
17601762
}
17611763

17621764
/*
@@ -1968,6 +1970,16 @@ void PgScrubber::scrub_finish()
19681970
}
19691971
}
19701972

1973+
/*
1974+
* note: arbitrary delay used in this early version of the
1975+
* scheduler refactoring.
1976+
*/
1977+
void PgScrubber::penalize_next_scrub(Scrub::delay_cause_t cause)
1978+
{
1979+
m_osds->get_scrub_services().delay_on_failure(
1980+
m_scrub_job, 5s, cause, ceph_clock_now());
1981+
}
1982+
19711983
void PgScrubber::on_digest_updates()
19721984
{
19731985
dout(10) << __func__ << " #pending: " << num_digest_updates_pending << " "

src/osd/scrubber/pg_scrubber.h

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -365,8 +365,7 @@ class PgScrubber : public ScrubPgIF,
365365
PG* get_pg() const final { return m_pg; }
366366
PerfCounters& get_counters_set() const final;
367367

368-
// temporary interface (to be discarded in a follow-up PR)
369-
/// set the 'resources_failure' flag in the scrub-job object
368+
/// delay next retry of this PG after a replica reservation failure
370369
void flag_reservations_failure();
371370

372371
scrubber_callback_cancel_token_t schedule_callback_after(
@@ -430,6 +429,8 @@ class PgScrubber : public ScrubPgIF,
430429

431430
void scrub_finish() final;
432431

432+
void penalize_next_scrub(Scrub::delay_cause_t cause) final;
433+
433434
ScrubMachineListener::MsgAndEpoch prep_replica_map_msg(
434435
Scrub::PreemptionNoted was_preempted) final;
435436

src/osd/scrubber/scrub_job.cc

Lines changed: 18 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -45,31 +45,38 @@ ostream& operator<<(ostream& out, const ScrubJob& sjob)
4545

4646
void ScrubJob::update_schedule(
4747
const Scrub::scrub_schedule_t& adjusted,
48-
bool reset_nb)
48+
bool reset_failure_penalty)
4949
{
50-
dout(15)
51-
<< fmt::format(
52-
"was: nb:{:s}({:s}). Called with: rest?{} nb:{:s} ({:s}) ({})",
53-
schedule.not_before, schedule.scheduled_at, reset_nb,
54-
adjusted.not_before, adjusted.scheduled_at, registration_state())
55-
<< dendl;
50+
dout(15) << fmt::format(
51+
"was: nb:{:s}({:s}). Called with: rest?{} {:s} ({})",
52+
schedule.not_before, schedule.scheduled_at,
53+
reset_failure_penalty, adjusted.scheduled_at,
54+
registration_state())
55+
<< dendl;
5656
schedule.scheduled_at = adjusted.scheduled_at;
5757
schedule.deadline = adjusted.deadline;
5858

59-
if (reset_nb || schedule.not_before < schedule.scheduled_at) {
59+
if (reset_failure_penalty || (schedule.not_before < schedule.scheduled_at)) {
6060
schedule.not_before = schedule.scheduled_at;
6161
}
6262

63-
// 'updated' is changed here while not holding jobs_lock. That's OK, as
64-
// the (atomic) flag will only be cleared by select_pg_and_scrub() after
65-
// scan_penalized() is called and the job was moved to the to_scrub queue.
6663
updated = true;
6764
dout(10) << fmt::format(
6865
"adjusted: nb:{:s} ({:s}) ({})", schedule.not_before,
6966
schedule.scheduled_at, registration_state())
7067
<< dendl;
7168
}
7269

70+
void ScrubJob::delay_on_failure(
71+
std::chrono::seconds delay,
72+
Scrub::delay_cause_t delay_cause,
73+
utime_t scrub_clock_now)
74+
{
75+
schedule.not_before =
76+
std::max(scrub_clock_now, schedule.not_before) + utime_t{delay};
77+
last_issue = delay_cause;
78+
}
79+
7380
std::string ScrubJob::scheduling_state(utime_t now_is, bool is_deep_expected)
7481
const
7582
{

src/osd/scrubber/scrub_job.h

Lines changed: 27 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -71,8 +71,8 @@ class ScrubJob final : public RefCountedObject {
7171
*/
7272
std::atomic_bool in_queues{false};
7373

74-
/// last scrub attempt failed to secure replica resources
75-
bool resources_failure{false};
74+
/// how the last attempt to scrub this PG ended
75+
delay_cause_t last_issue{delay_cause_t::none};
7676

7777
/**
7878
* 'updated' is a temporary flag, used to create a barrier after
@@ -118,6 +118,15 @@ class ScrubJob final : public RefCountedObject {
118118
const scrub_schedule_t& adjusted,
119119
bool reset_failure_penalty);
120120

121+
/**
122+
* push the 'not_before' time out by 'delay' seconds, so that this scrub target
123+
* would not be retried before 'delay' seconds have passed.
124+
*/
125+
void delay_on_failure(
126+
std::chrono::seconds delay,
127+
delay_cause_t delay_cause,
128+
utime_t scrub_clock_now);
129+
121130
void dump(ceph::Formatter* f) const;
122131

123132
/*
@@ -227,6 +236,20 @@ struct formatter<Scrub::qu_state_t> : formatter<std::string_view> {
227236
}
228237
};
229238

239+
template <>
240+
struct formatter<Scrub::sched_params_t> {
241+
constexpr auto parse(format_parse_context& ctx) { return ctx.begin(); }
242+
template <typename FormatContext>
243+
auto format(const Scrub::sched_params_t& pm, FormatContext& ctx)
244+
{
245+
return fmt::format_to(
246+
ctx.out(), "(proposed:{:s} min/max:{:.3f}/{:.3f} must:{:2s})",
247+
utime_t{pm.proposed_time}, pm.min_interval, pm.max_interval,
248+
pm.is_must == Scrub::must_scrub_t::mandatory ? "true" : "false");
249+
}
250+
};
251+
252+
230253
template <>
231254
struct formatter<Scrub::ScrubJob> {
232255
constexpr auto parse(format_parse_context& ctx) { return ctx.begin(); }
@@ -235,12 +258,10 @@ struct formatter<Scrub::ScrubJob> {
235258
auto format(const Scrub::ScrubJob& sjob, FormatContext& ctx)
236259
{
237260
return fmt::format_to(
238-
ctx.out(),
239-
"pg[{}] @ {:s} ({:s}) (dl:{:s}) - <{}> / failure: {} / queue state: "
240-
"{:.7}",
261+
ctx.out(), "pg[{}] @ nb:{:s} ({:s}) (dl:{:s}) - <{}> queue state:{:.7}",
241262
sjob.pgid, sjob.schedule.not_before, sjob.schedule.scheduled_at,
242263
sjob.schedule.deadline, sjob.registration_state(),
243-
sjob.resources_failure, sjob.state.load(std::memory_order_relaxed));
264+
sjob.state.load(std::memory_order_relaxed));
244265
}
245266
};
246267

src/osd/scrubber/scrub_machine.cc

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -339,6 +339,9 @@ ActiveScrubbing::~ActiveScrubbing()
339339
// if the begin-time stamp was not set 'off' (as done if the scrubbing
340340
// completed successfully), we use it now to set the 'failed scrub' duration.
341341
if (session.m_session_started_at != ScrubTimePoint{}) {
342+
// delay the next invocation of the scrubber on this target
343+
scrbr->penalize_next_scrub(Scrub::delay_cause_t::aborted);
344+
342345
auto logged_duration = ScrubClock::now() - session.m_session_started_at;
343346
session.m_perf_set->tinc(scrbcnt_failed_elapsed, logged_duration);
344347
session.m_perf_set->inc(scrbcnt_failed);

0 commit comments

Comments
 (0)