@@ -49,7 +49,12 @@ OsdScrub::~OsdScrub()
4949
5050std::ostream& OsdScrub::gen_prefix (std::ostream& out, std::string_view fn) const
5151{
52- return out << m_log_prefix << fn << " : " ;
52+ if (fn.starts_with (" operator" )) {
53+ // it's a lambda, and __func__ is not available
54+ return out << m_log_prefix;
55+ } else {
56+ return out << m_log_prefix << fn << " : " ;
57+ }
5358}
5459
5560void OsdScrub::dump_scrubs (ceph::Formatter* f) const
@@ -82,15 +87,16 @@ bool OsdScrub::scrub_random_backoff() const
8287 return false ;
8388}
8489
90+ void OsdScrub::debug_log_all_jobs () const
91+ {
92+ m_queue.for_each_job ([this ](const Scrub::ScrubJob& sj) {
93+ dout (20 ) << fmt::format (" \t scrub-queue jobs: {}" , sj) << dendl;
94+ }, 20 );
95+ }
96+
8597
8698void OsdScrub::initiate_scrub (bool is_recovery_active)
8799{
88- const utime_t scrub_time = ceph_clock_now ();
89- dout (10 ) << fmt::format (
90- " time now:{:s}, recovery is active?:{}" , scrub_time,
91- is_recovery_active)
92- << dendl;
93-
94100 if (auto blocked_pgs = get_blocked_pgs_count (); blocked_pgs > 0 ) {
95101 // some PGs managed by this OSD were blocked by a locked object during
96102 // scrub. This means we might not have the resources needed to scrub now.
@@ -101,53 +107,44 @@ void OsdScrub::initiate_scrub(bool is_recovery_active)
101107 << dendl;
102108 }
103109
110+ const utime_t scrub_time = ceph_clock_now ();
111+
104112 // check the OSD-wide environment conditions (scrub resources, time, etc.).
105113 // These may restrict the type of scrubs we are allowed to start, or just
106114 // prevent us from starting any non-operator-initiated scrub at all.
107- auto env_restrictions =
115+ const auto env_restrictions =
108116 restrictions_on_scrubbing (is_recovery_active, scrub_time);
109117
118+ dout (10 ) << fmt::format (" scrub scheduling (@tick) starts. "
119+ " time now:{:s}, recovery is active?:{} restrictions:{}" ,
120+ scrub_time, is_recovery_active, env_restrictions)
121+ << dendl;
122+
110123 if (g_conf ()->subsys .should_gather <ceph_subsys_osd, 20 >() &&
111124 !env_restrictions.high_priority_only ) {
112- dout (20 ) << " scrub scheduling (@tick) starts" << dendl;
113- auto all_jobs = m_queue.list_registered_jobs ();
114- for (const auto & sj : all_jobs) {
115- dout (20 ) << fmt::format (" \t scrub-queue jobs: {}" , *sj) << dendl;
116- }
125+ debug_log_all_jobs ();
117126 }
118127
119- // at this phase of the refactoring: minimal changes to the
120- // queue interface used here: we ask for a list of
121- // eligible targets (based on the known restrictions).
122- // We try all elements of this list until a (possibly temporary) success.
123- auto candidates = m_queue.ready_to_scrub (env_restrictions, scrub_time);
124- if (candidates.empty ()) {
128+ auto candidate = m_queue.pop_ready_pg (env_restrictions, scrub_time);
129+ if (!candidate) {
125130 dout (20 ) << " no PGs are ready for scrubbing" << dendl;
126131 return ;
127132 }
128133
129- for (const auto & candidate : candidates) {
130- dout (20 ) << fmt::format (" initiating scrub on pg[{}]" , candidate) << dendl;
131-
132- // we have a candidate to scrub. But we may fail when trying to initiate that
133- // scrub. For some failures - we can continue with the next candidate. For
134- // others - we should stop trying to scrub at this tick.
135- auto res = initiate_a_scrub (candidate, env_restrictions);
134+ auto candidate_pg = candidate->pgid ;
135+ auto res = initiate_a_scrub (std::move (candidate), env_restrictions);
136136
137- if (res == schedule_result_t ::target_specific_failure) {
138- // continue with the next job.
139- // \todo: consider separate handling of "no such PG", as - later on -
140- // we should be removing both related targets.
141- continue ;
142- } else if (res == schedule_result_t ::osd_wide_failure) {
143- // no point in trying the other candidates at this time
137+ switch (res) {
138+ case schedule_result_t ::target_specific_failure:
139+ case schedule_result_t ::osd_wide_failure:
140+ // No scrub this tick.
141+ // someone else will requeue the target, if needed.
144142 break ;
145- } else {
146- // the happy path. We are done
147- dout (20 ) << fmt::format (" scrub initiated for pg[{}]" , candidate. pgid )
148- << dendl;
143+
144+ case schedule_result_t ::scrub_initiated:
145+ dout (20 ) << fmt::format (" scrub initiated for pg[{}]" , candidate_pg )
146+ << dendl;
149147 break ;
150- }
151148 }
152149}
153150
@@ -198,45 +195,52 @@ Scrub::OSDRestrictions OsdScrub::restrictions_on_scrubbing(
198195
199196
200197Scrub::schedule_result_t OsdScrub::initiate_a_scrub (
201- spg_t pgid ,
198+ std::unique_ptr<Scrub::ScrubJob> candidate ,
202199 Scrub::OSDRestrictions restrictions)
203200{
204- dout (20 ) << fmt::format (" trying pg[{}]" , pgid) << dendl;
201+ dout (20 ) << fmt::format (" trying pg[{}]" , candidate-> pgid ) << dendl;
205202
206203 // we have a candidate to scrub. We need some PG information to
207204 // know if scrubbing is allowed
208205
209- auto locked_pg = m_osd_svc.get_locked_pg (pgid);
206+ auto locked_pg = m_osd_svc.get_locked_pg (candidate-> pgid );
210207 if (!locked_pg) {
211- // the PG was dequeued in the short timespan between creating the
212- // candidates list (ready_to_scrub()) and here
213- dout (5 ) << fmt::format (" pg[{}] not found" , pgid) << dendl;
208+ // the PG was dequeued in the short timespan between querying the
209+ // scrub queue - and now.
210+ dout (5 ) << fmt::format (" pg[{}] not found" , candidate-> pgid ) << dendl;
214211 return Scrub::schedule_result_t ::target_specific_failure;
215212 }
216213
217- // later on, here is where the scrub target would be dequeued
218- return locked_pg->pg ()->start_scrubbing (restrictions);
214+ // note: the 'candidate', which in this step is a copy of the scrub job,
215+ // was already dequeued. The "original" scrub job cannot be accessed from
216+ // here directly. Thus - we leave it to start_scrubbing() (via a call
217+ // to PgScrubber::start_scrub_session() to mark it as dequeued.
218+ return locked_pg->pg ()->start_scrubbing (std::move (candidate), restrictions);
219219}
220220
221+
221222void OsdScrub::on_config_change ()
222223{
223- auto to_notify = m_queue.list_registered_jobs ();
224+ auto to_notify = m_queue.get_pgs ([](const Scrub::ScrubJob& sj) -> bool {
225+ ceph_assert (sj.registered );
226+ return true ;
227+ });
224228
225229 for (const auto & p : to_notify) {
226- dout (30 ) << fmt::format (" rescheduling pg[{}] scrubs" , * p) << dendl;
227- auto locked_pg = m_osd_svc.get_locked_pg (p-> pgid );
230+ dout (30 ) << fmt::format (" rescheduling pg[{}] scrubs" , p) << dendl;
231+ auto locked_pg = m_osd_svc.get_locked_pg (p);
228232 if (!locked_pg)
229233 continue ;
230234
231235 dout (15 ) << fmt::format (
232236 " updating scrub schedule on {}" ,
233237 (locked_pg->pg ())->get_pgid ())
234238 << dendl;
235- locked_pg->pg ()->on_scrub_schedule_input_change ();
239+ locked_pg->pg ()->on_scrub_schedule_input_change (
240+ Scrub::delay_ready_t ::no_delay);
236241 }
237242}
238243
239-
240244// ////////////////////////////////////////////////////////////////////////// //
241245// CPU load tracking and related
242246
@@ -421,34 +425,15 @@ PerfCounters* OsdScrub::get_perf_counters(int pool_type, scrub_level_t level)
421425// ////////////////////////////////////////////////////////////////////////// //
422426// forwarders to the queue
423427
424- void OsdScrub::update_job (
425- Scrub::ScrubJobRef sjob,
426- const Scrub::sched_params_t & suggested,
427- bool reset_notbefore)
428- {
429- m_queue.update_job (sjob, suggested, reset_notbefore);
430- }
431-
432- void OsdScrub::delay_on_failure (
433- Scrub::ScrubJobRef sjob,
434- std::chrono::seconds delay,
435- Scrub::delay_cause_t delay_cause,
436- utime_t now_is)
437- {
438- m_queue.delay_on_failure (sjob, delay, delay_cause, now_is);
439- }
440-
441428
442- void OsdScrub::register_with_osd (
443- Scrub::ScrubJobRef sjob,
444- const Scrub::sched_params_t & suggested)
429+ void OsdScrub::enqueue_target (const Scrub::ScrubJob& sjob)
445430{
446- m_queue.register_with_osd (sjob, suggested );
431+ m_queue.enqueue_target (sjob);
447432}
448433
449- void OsdScrub::remove_from_osd_queue (Scrub::ScrubJobRef sjob )
434+ void OsdScrub::remove_from_osd_queue (spg_t pgid )
450435{
451- m_queue.remove_from_osd_queue (sjob );
436+ m_queue.remove_from_osd_queue (pgid );
452437}
453438
454439std::unique_ptr<Scrub::LocalResourceWrapper> OsdScrub::inc_scrubs_local (
0 commit comments