Skip to content

Commit d294ea8

Browse files
committed
osd/scrub: introduce a 'not before' attribute for scrub jobs
The NB enables the OSD to delay the next attempt to schedule a specific scrub job. This is useful for jobs that have failed for whatever reason, especially if the primary has failed to acquire the replicas. Signed-off-by: Ronen Friedman <[email protected]>
1 parent e60c698 commit d294ea8

File tree

7 files changed

+68
-38
lines changed

7 files changed

+68
-38
lines changed

src/osd/scrubber/osd_scrub.cc

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -429,9 +429,10 @@ Scrub::sched_params_t OsdScrub::determine_scrub_time(
429429

430430
void OsdScrub::update_job(
431431
Scrub::ScrubJobRef sjob,
432-
const Scrub::sched_params_t& suggested)
432+
const Scrub::sched_params_t& suggested,
433+
bool reset_notbefore)
433434
{
434-
m_queue.update_job(sjob, suggested);
435+
m_queue.update_job(sjob, suggested, reset_notbefore);
435436
}
436437

437438
void OsdScrub::register_with_osd(

src/osd/scrubber/osd_scrub.h

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -90,21 +90,23 @@ class OsdScrub {
9090
* the registration will be with "beginning of time" target, making the
9191
* scrub-job eligible to immediate scrub (given that external conditions
9292
* do not prevent scrubbing)
93-
*
9493
* - 'must' is asserted, and the suggested time is 'now':
9594
* This happens if our stats are unknown. The results are similar to the
9695
* previous scenario.
97-
*
9896
* - not a 'must': we take the suggested time as a basis, and add to it some
9997
* configuration / random delays.
100-
*
10198
* ('must' is Scrub::sched_params_t.is_must)
10299
*
100+
* 'reset_notbefore' is used to reset the 'not_before' time to the updated
101+
* 'scheduled_at' time. This is used whenever the scrub-job schedule is
102+
* updated not as a result of a scrub attempt failure.
103+
*
103104
* locking: not using the jobs_lock
104105
*/
105106
void update_job(
106107
Scrub::ScrubJobRef sjob,
107-
const Scrub::sched_params_t& suggested);
108+
const Scrub::sched_params_t& suggested,
109+
bool reset_notbefore);
108110

109111
/**
110112
* Add the scrub job to the list of jobs (i.e. list of PGs) to be periodically

src/osd/scrubber/osd_scrub_sched.cc

Lines changed: 12 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -94,7 +94,7 @@ void ScrubQueue::register_with_osd(
9494
switch (state_at_entry) {
9595
case qu_state_t::registered:
9696
// just updating the schedule?
97-
update_job(scrub_job, suggested);
97+
update_job(scrub_job, suggested, false /* keep n.b. delay */);
9898
break;
9999

100100
case qu_state_t::not_registered:
@@ -110,7 +110,7 @@ void ScrubQueue::register_with_osd(
110110
break;
111111
}
112112

113-
update_job(scrub_job, suggested);
113+
update_job(scrub_job, suggested, true /* resets not_before */);
114114
to_scrub.push_back(scrub_job);
115115
scrub_job->in_queues = true;
116116
scrub_job->state = qu_state_t::registered;
@@ -124,7 +124,7 @@ void ScrubQueue::register_with_osd(
124124
// at any minute
125125
std::lock_guard lck{jobs_lock};
126126

127-
update_job(scrub_job, suggested);
127+
update_job(scrub_job, suggested, true /* resets not_before */);
128128
if (scrub_job->state == qu_state_t::not_registered) {
129129
dout(5) << " scrub job state changed to 'not registered'" << dendl;
130130
to_scrub.push_back(scrub_job);
@@ -138,18 +138,19 @@ void ScrubQueue::register_with_osd(
138138
dout(10) << fmt::format(
139139
"pg[{}] sched-state changed from <{:.14}> to <{:.14}> (@{:s})",
140140
scrub_job->pgid, state_at_entry, scrub_job->state.load(),
141-
scrub_job->schedule.scheduled_at)
141+
scrub_job->schedule.not_before)
142142
<< dendl;
143143
}
144144

145-
// look mommy - no locks!
145+
146146
void ScrubQueue::update_job(Scrub::ScrubJobRef scrub_job,
147-
const sched_params_t& suggested)
147+
const sched_params_t& suggested,
148+
bool reset_nb)
148149
{
149150
// adjust the suggested scrub time according to OSD-wide status
150151
auto adjusted = adjust_target_time(suggested);
151-
scrub_job->update_schedule(adjusted);
152152
scrub_job->high_priority = suggested.is_must == must_scrub_t::mandatory;
153+
scrub_job->update_schedule(adjusted, reset_nb);
153154
}
154155

155156
sched_params_t ScrubQueue::determine_scrub_time(
@@ -262,7 +263,7 @@ ScrubQContainer ScrubQueue::collect_ripe_jobs(
262263
utime_t time_now)
263264
{
264265
auto filtr = [time_now, rst = restrictions](const auto& jobref) -> bool {
265-
return jobref->schedule.scheduled_at <= time_now &&
266+
return jobref->schedule.not_before <= time_now &&
266267
(!rst.high_priority_only || jobref->high_priority) &&
267268
(!rst.only_deadlined || (!jobref->schedule.deadline.is_zero() &&
268269
jobref->schedule.deadline <= time_now));
@@ -280,7 +281,8 @@ ScrubQContainer ScrubQueue::collect_ripe_jobs(
280281
for (const auto& jobref : group) {
281282
if (!filtr(jobref)) {
282283
dout(20) << fmt::format(
283-
" not ripe: {} @ {:s}", jobref->pgid,
284+
" not ripe: {} @ {:s} ({:s})", jobref->pgid,
285+
jobref->schedule.not_before,
284286
jobref->schedule.scheduled_at)
285287
<< dendl;
286288
}
@@ -295,7 +297,7 @@ Scrub::scrub_schedule_t ScrubQueue::adjust_target_time(
295297
const sched_params_t& times) const
296298
{
297299
Scrub::scrub_schedule_t sched_n_dead{
298-
times.proposed_time, times.proposed_time};
300+
times.proposed_time, times.proposed_time, times.proposed_time};
299301

300302
if (times.is_must == Scrub::must_scrub_t::not_mandatory) {
301303
// unless explicitly requested, postpone the scrub with a random delay

src/osd/scrubber/osd_scrub_sched.h

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -202,19 +202,23 @@ class ScrubQueue {
202202
* the registration will be with "beginning of time" target, making the
203203
* scrub-job eligible to immediate scrub (given that external conditions
204204
* do not prevent scrubbing)
205-
*
206205
* - 'must' is asserted, and the suggested time is 'now':
207206
* This happens if our stats are unknown. The results are similar to the
208207
* previous scenario.
209-
*
210208
* - not a 'must': we take the suggested time as a basis, and add to it some
211209
* configuration / random delays.
212-
*
213210
* ('must' is sched_params_t.is_must)
214211
*
212+
* 'reset_notbefore' is used to reset the 'not_before' time to the updated
213+
* 'scheduled_at' time. This is used whenever the scrub-job schedule is
214+
* updated not as a result of a scrub attempt failure.
215+
*
215216
* locking: not using the jobs_lock
216217
*/
217-
void update_job(Scrub::ScrubJobRef sjob, const sched_params_t& suggested);
218+
void update_job(
219+
Scrub::ScrubJobRef sjob,
220+
const sched_params_t& suggested,
221+
bool reset_notbefore);
218222

219223
sched_params_t determine_scrub_time(const requested_scrub_t& request_flags,
220224
const pg_info_t& pg_info,

src/osd/scrubber/pg_scrubber.cc

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -547,7 +547,7 @@ void PgScrubber::update_scrub_job(const requested_scrub_t& request_flags)
547547
ceph_assert(m_pg->is_locked());
548548
auto suggested = m_osds->get_scrub_services().determine_scrub_time(
549549
request_flags, m_pg->info, m_pg->get_pgpool().info.opts);
550-
m_osds->get_scrub_services().update_job(m_scrub_job, suggested);
550+
m_osds->get_scrub_services().update_job(m_scrub_job, suggested, true);
551551
m_pg->publish_stats_to_osd();
552552
}
553553

@@ -2126,7 +2126,7 @@ pg_scrubbing_status_t PgScrubber::get_schedule() const
21262126
!m_planned_scrub.must_deep_scrub;
21272127

21282128
// are we ripe for scrubbing?
2129-
if (now_is > m_scrub_job->schedule.scheduled_at) {
2129+
if (now_is > m_scrub_job->schedule.not_before) {
21302130
// we are waiting for our turn at the OSD.
21312131
return pg_scrubbing_status_t{m_scrub_job->schedule.scheduled_at,
21322132
0,
@@ -2136,7 +2136,7 @@ pg_scrubbing_status_t PgScrubber::get_schedule() const
21362136
periodic};
21372137
}
21382138

2139-
return pg_scrubbing_status_t{m_scrub_job->schedule.scheduled_at,
2139+
return pg_scrubbing_status_t{m_scrub_job->schedule.not_before,
21402140
0,
21412141
pg_scrub_sched_status_t::scheduled,
21422142
false,

src/osd/scrubber/scrub_job.cc

Lines changed: 22 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -43,18 +43,30 @@ ostream& operator<<(ostream& out, const ScrubJob& sjob)
4343
}
4444
} // namespace std
4545

46-
void ScrubJob::update_schedule(const Scrub::scrub_schedule_t& adjusted)
46+
void ScrubJob::update_schedule(
47+
const Scrub::scrub_schedule_t& adjusted,
48+
bool reset_nb)
4749
{
48-
schedule = adjusted;
49-
penalty_timeout = utime_t(0, 0); // helps with debugging
50+
dout(15)
51+
<< fmt::format(
52+
"was: nb:{:s}({:s}). Called with: rest?{} nb:{:s} ({:s}) ({})",
53+
schedule.not_before, schedule.scheduled_at, reset_nb,
54+
adjusted.not_before, adjusted.scheduled_at, registration_state())
55+
<< dendl;
56+
schedule.scheduled_at = adjusted.scheduled_at;
57+
schedule.deadline = adjusted.deadline;
58+
59+
if (reset_nb || schedule.not_before < schedule.scheduled_at) {
60+
schedule.not_before = schedule.scheduled_at;
61+
}
5062

5163
// 'updated' is changed here while not holding jobs_lock. That's OK, as
5264
// the (atomic) flag will only be cleared by select_pg_and_scrub() after
5365
// scan_penalized() is called and the job was moved to the to_scrub queue.
5466
updated = true;
5567
dout(10) << fmt::format(
56-
"adjusted: {:s} ({})", schedule.scheduled_at,
57-
registration_state())
68+
"adjusted: nb:{:s} ({:s}) ({})", schedule.not_before,
69+
schedule.scheduled_at, registration_state())
5870
<< dendl;
5971
}
6072

@@ -67,15 +79,14 @@ std::string ScrubJob::scheduling_state(utime_t now_is, bool is_deep_expected)
6779
}
6880

6981
// if the time has passed, we are surely in the queue
70-
// (note that for now we do not tell client if 'penalized')
71-
if (now_is > schedule.scheduled_at) {
82+
if (now_is > schedule.not_before) {
7283
// we are never sure that the next scrub will indeed be shallow:
7384
return fmt::format("queued for {}scrub", (is_deep_expected ? "deep " : ""));
7485
}
7586

7687
return fmt::format(
77-
"{}scrub scheduled @ {:s}", (is_deep_expected ? "deep " : ""),
78-
schedule.scheduled_at);
88+
"{}scrub scheduled @ {:s} ({:s})", (is_deep_expected ? "deep " : ""),
89+
schedule.not_before, schedule.scheduled_at);
7990
}
8091

8192
std::ostream& ScrubJob::gen_prefix(std::ostream& out, std::string_view fn) const
@@ -100,7 +111,8 @@ void ScrubJob::dump(ceph::Formatter* f) const
100111
{
101112
f->open_object_section("scrub");
102113
f->dump_stream("pgid") << pgid;
103-
f->dump_stream("sched_time") << schedule.scheduled_at;
114+
f->dump_stream("sched_time") << schedule.not_before;
115+
f->dump_stream("orig_sched_time") << schedule.scheduled_at;
104116
f->dump_stream("deadline") << schedule.deadline;
105117
f->dump_bool("forced",
106118
schedule.scheduled_at == PgScrubber::scrub_must_stamp());

src/osd/scrubber/scrub_job.h

Lines changed: 14 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,7 @@ enum class qu_state_t {
3838
struct scrub_schedule_t {
3939
utime_t scheduled_at{};
4040
utime_t deadline{0, 0};
41+
utime_t not_before{utime_t::max()};
4142
};
4243

4344
struct sched_params_t {
@@ -66,7 +67,7 @@ class ScrubJob final : public RefCountedObject {
6667

6768
/**
6869
* the old 'is_registered'. Set whenever the job is registered with the OSD,
69-
* i.e. is in either the 'to_scrub' or the 'penalized' vectors.
70+
* i.e. is in 'to_scrub'.
7071
*/
7172
std::atomic_bool in_queues{false};
7273

@@ -93,7 +94,7 @@ class ScrubJob final : public RefCountedObject {
9394

9495
ScrubJob(CephContext* cct, const spg_t& pg, int node_id);
9596

96-
utime_t get_sched_time() const { return schedule.scheduled_at; }
97+
utime_t get_sched_time() const { return schedule.not_before; }
9798

9899
static std::string_view qu_state_text(qu_state_t st);
99100

@@ -107,7 +108,15 @@ class ScrubJob final : public RefCountedObject {
107108
return qu_state_text(state.load(std::memory_order_relaxed));
108109
}
109110

110-
void update_schedule(const scrub_schedule_t& adjusted);
111+
/**
112+
* 'reset_failure_penalty' is used to reset the 'not_before' jo attribute to
113+
* the updated 'scheduled_at' time. This is used whenever the scrub-job
114+
* schedule is updated, and the update is not a result of a scrub attempt
115+
* failure.
116+
*/
117+
void update_schedule(
118+
const scrub_schedule_t& adjusted,
119+
bool reset_failure_penalty);
111120

112121
void dump(ceph::Formatter* f) const;
113122

@@ -227,9 +236,9 @@ struct formatter<Scrub::ScrubJob> {
227236
{
228237
return fmt::format_to(
229238
ctx.out(),
230-
"pg[{}] @ {:s} (dl:{:s}) - <{}> / failure: {} / queue state: "
239+
"pg[{}] @ {:s} ({:s}) (dl:{:s}) - <{}> / failure: {} / queue state: "
231240
"{:.7}",
232-
sjob.pgid, sjob.schedule.scheduled_at,
241+
sjob.pgid, sjob.schedule.not_before, sjob.schedule.scheduled_at,
233242
sjob.schedule.deadline, sjob.registration_state(),
234243
sjob.resources_failure, sjob.state.load(std::memory_order_relaxed));
235244
}

0 commit comments

Comments
 (0)