Skip to content

Commit 832ba50

Browse files
authored
Merge pull request ceph#58173 from ronen-fr/wip-rf-targets-j13
osd/scrub: no shared scrub-job ownership between PGs and the scrub queue Reviewed-by: Samuel Just <[email protected]>
2 parents 8d7d132 + 699dd28 commit 832ba50

18 files changed

+714
-1285
lines changed

qa/standalone/scrub/osd-scrub-test.sh

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -540,8 +540,8 @@ function TEST_dump_scrub_schedule() {
540540
--osd_scrub_interval_randomize_ratio=0 \
541541
--osd_scrub_backoff_ratio=0.0 \
542542
--osd_op_queue=wpq \
543-
--osd_stats_update_period_not_scrubbing=3 \
544-
--osd_stats_update_period_scrubbing=2 \
543+
--osd_stats_update_period_not_scrubbing=1 \
544+
--osd_stats_update_period_scrubbing=1 \
545545
--osd_scrub_sleep=0.2"
546546

547547
for osd in $(seq 0 $(expr $OSDS - 1))
@@ -562,7 +562,8 @@ function TEST_dump_scrub_schedule() {
562562
rm -f $TESTDATA
563563

564564
local pgid="${poolid}.0"
565-
local now_is=`date -I"ns"`
565+
#local now_is=`date -I"ns"` # note: uses a comma for the ns part
566+
local now_is=`date +'%Y-%m-%dT%H:%M:%S.%N%:z'`
566567

567568
# before the scrubbing starts
568569

@@ -604,8 +605,8 @@ function TEST_dump_scrub_schedule() {
604605
# scheduled for the future' value
605606
#
606607

607-
ceph tell osd.* config set osd_scrub_chunk_max "3" || return 1
608-
ceph tell osd.* config set osd_scrub_sleep "1.0" || return 1
608+
ceph tell osd.* config set osd_shallow_scrub_chunk_max "3" || return 1
609+
ceph tell osd.* config set osd_scrub_sleep "2.0" || return 1
609610
ceph osd set noscrub || return 1
610611
sleep 2
611612
saved_last_stamp=${sched_data['query_last_stamp']}

qa/standalone/scrub/scrub-helpers.sh

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ function extract_published_sch() {
1818
local -n dict=$4 # a ref to the in/out dictionary
1919
local current_time=$2
2020
local extra_time=$3
21-
local extr_dbg=1 # note: 3 and above leave some temp files around
21+
local extr_dbg=2 # note: 3 and above leave some temp files around
2222

2323
#turn off '-x' (but remember previous state)
2424
local saved_echo_flag=${-//[^x]/}
@@ -51,18 +51,26 @@ function extract_published_sch() {
5151
(( extr_dbg >= 2 )) && echo "query output:"
5252
(( extr_dbg >= 2 )) && ceph pg $1 query -f json-pretty | awk -e '/scrubber/,/agent_state/ {print;}'
5353

54+
# note: the query output for the schedule containas two dates: the first is the not-before, and
55+
# the second is the original target time (which is before or the same as the not-before)
56+
# the current line format looks like this:
57+
# "schedule": "scrub scheduled @ 2024-06-26T16:09:56.666 (2024-06-24T16:09:56.338)"
5458
from_qry=`ceph pg $1 query -f json-pretty | jq -r --arg extra_dt "$extra_time" --arg current_dt "$current_time" --arg spt "'" '
5559
. |
5660
(.q_stat_part=((.scrubber.schedule// "-") | if test(".*@.*") then (split(" @ ")|first) else . end)) |
5761
(.q_when_part=((.scrubber.schedule// "0") | if test(".*@.*") then (split(" @ ")|last) else "0" end)) |
58-
(.q_when_is_future=(.q_when_part > $current_dt)) |
62+
(.q_target=((.scrubber.schedule// "0") | if test(".*@.*") then (split(" @ ")|last|split(" (")|last|split(")")|first) else "0" end)) |
63+
(.q_not_before=((.scrubber.schedule// "0") | if test(".*@.*") then (split(" @ ")|last|split(" (")|first) else "0" end)) |
64+
(.q_when_is_future=(.q_target > $current_dt)) |
5965
(.q_vs_date=(.q_when_part > $extra_dt)) |
6066
{
6167
query_epoch: .epoch,
6268
query_seq: .info.stats.reported_seq,
6369
query_active: (.scrubber | if has("active") then .active else "bug" end),
6470
query_schedule: .q_stat_part,
65-
query_schedule_at: .q_when_part,
71+
#query_schedule_at: .q_when_part,
72+
query_schedule_at: .q_not_before,
73+
query_target_at: .q_target,
6674
query_last_duration: .info.stats.last_scrub_duration,
6775
query_last_stamp: .info.history.last_scrub_stamp,
6876
query_last_scrub: (.info.history.last_scrub| sub($spt;"x") ),

src/osd/PG.cc

Lines changed: 16 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1325,6 +1325,7 @@ unsigned int PG::scrub_requeue_priority(Scrub::scrub_prio_t with_priority, unsig
13251325

13261326

13271327
Scrub::schedule_result_t PG::start_scrubbing(
1328+
std::unique_ptr<Scrub::ScrubJob> candidate,
13281329
Scrub::OSDRestrictions osd_restrictions)
13291330
{
13301331
dout(10) << fmt::format(
@@ -1348,10 +1349,9 @@ Scrub::schedule_result_t PG::start_scrubbing(
13481349
get_pgbackend()->auto_repair_supported());
13491350

13501351
return m_scrubber->start_scrub_session(
1351-
osd_restrictions, pg_cond, m_planned_scrub);
1352+
std::move(candidate), osd_restrictions, pg_cond, m_planned_scrub);
13521353
}
13531354

1354-
13551355
double PG::next_deepscrub_interval() const
13561356
{
13571357
double deep_scrub_interval =
@@ -1360,14 +1360,22 @@ double PG::next_deepscrub_interval() const
13601360
deep_scrub_interval = cct->_conf->osd_deep_scrub_interval;
13611361
return info.history.last_deep_scrub_stamp + deep_scrub_interval;
13621362
}
1363-
void PG::on_scrub_schedule_input_change()
1363+
1364+
void PG::on_scrub_schedule_input_change(Scrub::delay_ready_t delay_ready)
13641365
{
1365-
if (is_active() && is_primary()) {
1366-
dout(20) << __func__ << ": active/primary" << dendl;
1366+
if (is_active() && is_primary() && !is_scrub_queued_or_active()) {
1367+
dout(10) << fmt::format(
1368+
"{}: active/primary. delay_ready={:c}", __func__,
1369+
(delay_ready == Scrub::delay_ready_t::delay_ready) ? 't'
1370+
: 'f')
1371+
<< dendl;
13671372
ceph_assert(m_scrubber);
1368-
m_scrubber->update_scrub_job(m_planned_scrub);
1373+
m_scrubber->update_scrub_job(delay_ready);
13691374
} else {
1370-
dout(20) << __func__ << ": inactive or non-primary" << dendl;
1375+
dout(10) << fmt::format(
1376+
"{}: inactive, non-primary - or already scrubbing",
1377+
__func__)
1378+
<< dendl;
13711379
}
13721380
}
13731381

@@ -2260,7 +2268,7 @@ void PG::handle_activate_map(PeeringCtx &rctx, epoch_t range_starts_at)
22602268
// on_scrub_schedule_input_change() as pool.info contains scrub scheduling
22612269
// parameters.
22622270
if (pool.info.last_change >= range_starts_at) {
2263-
on_scrub_schedule_input_change();
2271+
on_scrub_schedule_input_change(Scrub::delay_ready_t::delay_ready);
22642272
}
22652273
}
22662274

src/osd/PG.h

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -269,7 +269,7 @@ class PG : public DoutPrefixProvider,
269269
set_last_scrub_stamp(t, history, stats);
270270
return true;
271271
});
272-
on_scrub_schedule_input_change();
272+
on_scrub_schedule_input_change(Scrub::delay_ready_t::delay_ready);
273273
}
274274

275275
static void set_last_deep_scrub_stamp(
@@ -285,7 +285,7 @@ class PG : public DoutPrefixProvider,
285285
set_last_scrub_stamp(t, history, stats);
286286
return true;
287287
});
288-
on_scrub_schedule_input_change();
288+
on_scrub_schedule_input_change(Scrub::delay_ready_t::delay_ready);
289289
}
290290

291291
static void add_objects_scrubbed_count(
@@ -531,7 +531,7 @@ class PG : public DoutPrefixProvider,
531531
* - pg stat scrub timestamps
532532
* - etc
533533
*/
534-
void on_scrub_schedule_input_change();
534+
void on_scrub_schedule_input_change(Scrub::delay_ready_t delay_ready);
535535

536536
void scrub_requested(scrub_level_t scrub_level, scrub_type_t scrub_type) override;
537537

@@ -701,7 +701,8 @@ class PG : public DoutPrefixProvider,
701701
bool get_must_scrub() const;
702702

703703
Scrub::schedule_result_t start_scrubbing(
704-
Scrub::OSDRestrictions osd_restrictions);
704+
std::unique_ptr<Scrub::ScrubJob> candidate,
705+
Scrub::OSDRestrictions osd_restrictions);
705706

706707
unsigned int scrub_requeue_priority(
707708
Scrub::scrub_prio_t with_priority,

src/osd/osd_types_fmt.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
#include "osd/osd_types.h"
1010
#include <fmt/chrono.h>
1111
#include <fmt/ranges.h>
12+
#include <fmt/std.h>
1213
#if FMT_VERSION >= 90000
1314
#include <fmt/ostream.h>
1415
#endif

src/osd/scrubber/osd_scrub.cc

Lines changed: 59 additions & 74 deletions
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,12 @@ OsdScrub::~OsdScrub()
4949

5050
std::ostream& OsdScrub::gen_prefix(std::ostream& out, std::string_view fn) const
5151
{
52-
return out << m_log_prefix << fn << ": ";
52+
if (fn.starts_with("operator")) {
53+
// it's a lambda, and __func__ is not available
54+
return out << m_log_prefix;
55+
} else {
56+
return out << m_log_prefix << fn << ": ";
57+
}
5358
}
5459

5560
void OsdScrub::dump_scrubs(ceph::Formatter* f) const
@@ -82,15 +87,16 @@ bool OsdScrub::scrub_random_backoff() const
8287
return false;
8388
}
8489

90+
void OsdScrub::debug_log_all_jobs() const
91+
{
92+
m_queue.for_each_job([this](const Scrub::ScrubJob& sj) {
93+
dout(20) << fmt::format("\tscrub-queue jobs: {}", sj) << dendl;
94+
}, 20);
95+
}
96+
8597

8698
void OsdScrub::initiate_scrub(bool is_recovery_active)
8799
{
88-
const utime_t scrub_time = ceph_clock_now();
89-
dout(10) << fmt::format(
90-
"time now:{:s}, recovery is active?:{}", scrub_time,
91-
is_recovery_active)
92-
<< dendl;
93-
94100
if (auto blocked_pgs = get_blocked_pgs_count(); blocked_pgs > 0) {
95101
// some PGs managed by this OSD were blocked by a locked object during
96102
// scrub. This means we might not have the resources needed to scrub now.
@@ -101,53 +107,44 @@ void OsdScrub::initiate_scrub(bool is_recovery_active)
101107
<< dendl;
102108
}
103109

110+
const utime_t scrub_time = ceph_clock_now();
111+
104112
// check the OSD-wide environment conditions (scrub resources, time, etc.).
105113
// These may restrict the type of scrubs we are allowed to start, or just
106114
// prevent us from starting any non-operator-initiated scrub at all.
107-
auto env_restrictions =
115+
const auto env_restrictions =
108116
restrictions_on_scrubbing(is_recovery_active, scrub_time);
109117

118+
dout(10) << fmt::format("scrub scheduling (@tick) starts. "
119+
"time now:{:s}, recovery is active?:{} restrictions:{}",
120+
scrub_time, is_recovery_active, env_restrictions)
121+
<< dendl;
122+
110123
if (g_conf()->subsys.should_gather<ceph_subsys_osd, 20>() &&
111124
!env_restrictions.high_priority_only) {
112-
dout(20) << "scrub scheduling (@tick) starts" << dendl;
113-
auto all_jobs = m_queue.list_registered_jobs();
114-
for (const auto& sj : all_jobs) {
115-
dout(20) << fmt::format("\tscrub-queue jobs: {}", *sj) << dendl;
116-
}
125+
debug_log_all_jobs();
117126
}
118127

119-
// at this phase of the refactoring: minimal changes to the
120-
// queue interface used here: we ask for a list of
121-
// eligible targets (based on the known restrictions).
122-
// We try all elements of this list until a (possibly temporary) success.
123-
auto candidates = m_queue.ready_to_scrub(env_restrictions, scrub_time);
124-
if (candidates.empty()) {
128+
auto candidate = m_queue.pop_ready_pg(env_restrictions, scrub_time);
129+
if (!candidate) {
125130
dout(20) << "no PGs are ready for scrubbing" << dendl;
126131
return;
127132
}
128133

129-
for (const auto& candidate : candidates) {
130-
dout(20) << fmt::format("initiating scrub on pg[{}]", candidate) << dendl;
131-
132-
// we have a candidate to scrub. But we may fail when trying to initiate that
133-
// scrub. For some failures - we can continue with the next candidate. For
134-
// others - we should stop trying to scrub at this tick.
135-
auto res = initiate_a_scrub(candidate, env_restrictions);
134+
auto candidate_pg = candidate->pgid;
135+
auto res = initiate_a_scrub(std::move(candidate), env_restrictions);
136136

137-
if (res == schedule_result_t::target_specific_failure) {
138-
// continue with the next job.
139-
// \todo: consider separate handling of "no such PG", as - later on -
140-
// we should be removing both related targets.
141-
continue;
142-
} else if (res == schedule_result_t::osd_wide_failure) {
143-
// no point in trying the other candidates at this time
137+
switch (res) {
138+
case schedule_result_t::target_specific_failure:
139+
case schedule_result_t::osd_wide_failure:
140+
// No scrub this tick.
141+
// someone else will requeue the target, if needed.
144142
break;
145-
} else {
146-
// the happy path. We are done
147-
dout(20) << fmt::format("scrub initiated for pg[{}]", candidate.pgid)
148-
<< dendl;
143+
144+
case schedule_result_t::scrub_initiated:
145+
dout(20) << fmt::format("scrub initiated for pg[{}]", candidate_pg)
146+
<< dendl;
149147
break;
150-
}
151148
}
152149
}
153150

@@ -198,45 +195,52 @@ Scrub::OSDRestrictions OsdScrub::restrictions_on_scrubbing(
198195

199196

200197
Scrub::schedule_result_t OsdScrub::initiate_a_scrub(
201-
spg_t pgid,
198+
std::unique_ptr<Scrub::ScrubJob> candidate,
202199
Scrub::OSDRestrictions restrictions)
203200
{
204-
dout(20) << fmt::format("trying pg[{}]", pgid) << dendl;
201+
dout(20) << fmt::format("trying pg[{}]", candidate->pgid) << dendl;
205202

206203
// we have a candidate to scrub. We need some PG information to
207204
// know if scrubbing is allowed
208205

209-
auto locked_pg = m_osd_svc.get_locked_pg(pgid);
206+
auto locked_pg = m_osd_svc.get_locked_pg(candidate->pgid);
210207
if (!locked_pg) {
211-
// the PG was dequeued in the short timespan between creating the
212-
// candidates list (ready_to_scrub()) and here
213-
dout(5) << fmt::format("pg[{}] not found", pgid) << dendl;
208+
// the PG was dequeued in the short timespan between querying the
209+
// scrub queue - and now.
210+
dout(5) << fmt::format("pg[{}] not found", candidate->pgid) << dendl;
214211
return Scrub::schedule_result_t::target_specific_failure;
215212
}
216213

217-
// later on, here is where the scrub target would be dequeued
218-
return locked_pg->pg()->start_scrubbing(restrictions);
214+
// note: the 'candidate', which in this step is a copy of the scrub job,
215+
// was already dequeued. The "original" scrub job cannot be accessed from
216+
// here directly. Thus - we leave it to start_scrubbing() (via a call
217+
// to PgScrubber::start_scrub_session() to mark it as dequeued.
218+
return locked_pg->pg()->start_scrubbing(std::move(candidate), restrictions);
219219
}
220220

221+
221222
void OsdScrub::on_config_change()
222223
{
223-
auto to_notify = m_queue.list_registered_jobs();
224+
auto to_notify = m_queue.get_pgs([](const Scrub::ScrubJob& sj) -> bool {
225+
ceph_assert(sj.registered);
226+
return true;
227+
});
224228

225229
for (const auto& p : to_notify) {
226-
dout(30) << fmt::format("rescheduling pg[{}] scrubs", *p) << dendl;
227-
auto locked_pg = m_osd_svc.get_locked_pg(p->pgid);
230+
dout(30) << fmt::format("rescheduling pg[{}] scrubs", p) << dendl;
231+
auto locked_pg = m_osd_svc.get_locked_pg(p);
228232
if (!locked_pg)
229233
continue;
230234

231235
dout(15) << fmt::format(
232236
"updating scrub schedule on {}",
233237
(locked_pg->pg())->get_pgid())
234238
<< dendl;
235-
locked_pg->pg()->on_scrub_schedule_input_change();
239+
locked_pg->pg()->on_scrub_schedule_input_change(
240+
Scrub::delay_ready_t::no_delay);
236241
}
237242
}
238243

239-
240244
// ////////////////////////////////////////////////////////////////////////// //
241245
// CPU load tracking and related
242246

@@ -421,34 +425,15 @@ PerfCounters* OsdScrub::get_perf_counters(int pool_type, scrub_level_t level)
421425
// ////////////////////////////////////////////////////////////////////////// //
422426
// forwarders to the queue
423427

424-
void OsdScrub::update_job(
425-
Scrub::ScrubJobRef sjob,
426-
const Scrub::sched_params_t& suggested,
427-
bool reset_notbefore)
428-
{
429-
m_queue.update_job(sjob, suggested, reset_notbefore);
430-
}
431-
432-
void OsdScrub::delay_on_failure(
433-
Scrub::ScrubJobRef sjob,
434-
std::chrono::seconds delay,
435-
Scrub::delay_cause_t delay_cause,
436-
utime_t now_is)
437-
{
438-
m_queue.delay_on_failure(sjob, delay, delay_cause, now_is);
439-
}
440-
441428

442-
void OsdScrub::register_with_osd(
443-
Scrub::ScrubJobRef sjob,
444-
const Scrub::sched_params_t& suggested)
429+
void OsdScrub::enqueue_target(const Scrub::ScrubJob& sjob)
445430
{
446-
m_queue.register_with_osd(sjob, suggested);
431+
m_queue.enqueue_target(sjob);
447432
}
448433

449-
void OsdScrub::remove_from_osd_queue(Scrub::ScrubJobRef sjob)
434+
void OsdScrub::remove_from_osd_queue(spg_t pgid)
450435
{
451-
m_queue.remove_from_osd_queue(sjob);
436+
m_queue.remove_from_osd_queue(pgid);
452437
}
453438

454439
std::unique_ptr<Scrub::LocalResourceWrapper> OsdScrub::inc_scrubs_local(

0 commit comments

Comments
 (0)