Skip to content

Commit 603a29a

Browse files
committed
osd/scrub: relocate determine_scrub_time()
This aux func calculates the next scrub time based on configuration data, scrub history and the requested scrub flags. As part of creating unit-tests for ScrubQueue, determine_scrub_time() is moved to that component. A related change codifies the OSD services used by the scheduler. Signed-off-by: Ronen Friedman <[email protected]>
1 parent 7145287 commit 603a29a

File tree

5 files changed

+156
-87
lines changed

5 files changed

+156
-87
lines changed

src/osd/OSD.h

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -94,7 +94,7 @@ class MMonGetPurgedSnapsReply;
9494

9595
class OSD;
9696

97-
class OSDService {
97+
class OSDService : public Scrub::ScrubSchedListener {
9898
using OpSchedulerItem = ceph::osd::scheduler::OpSchedulerItem;
9999
public:
100100
OSD *osd;
@@ -147,7 +147,7 @@ class OSDService {
147147
superblock = block;
148148
}
149149

150-
int get_nodeid() const { return whoami; }
150+
int get_nodeid() const final { return whoami; }
151151

152152
std::atomic<epoch_t> max_oldest_map;
153153
private:
@@ -290,7 +290,9 @@ class OSDService {
290290
* @param allow_requested_repair_only
291291
* @return a Scrub::attempt_t detailing either a success, or the failure reason.
292292
*/
293-
Scrub::schedule_result_t initiate_a_scrub(spg_t pgid, bool allow_requested_repair_only);
293+
Scrub::schedule_result_t initiate_a_scrub(
294+
spg_t pgid,
295+
bool allow_requested_repair_only) final;
294296

295297

296298
private:

src/osd/scrubber/osd_scrub_sched.cc

Lines changed: 79 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -2,9 +2,7 @@
22
// vim: ts=8 sw=2 smarttab
33
#include "./osd_scrub_sched.h"
44

5-
#include "include/utime_fmt.h"
65
#include "osd/OSD.h"
7-
#include "osd/osd_types_fmt.h"
86

97
#include "pg_scrubber.h"
108

@@ -79,11 +77,12 @@ std::string ScrubQueue::ScrubJob::scheduling_state(utime_t now_is,
7977
#undef dout_context
8078
#define dout_context (cct)
8179
#undef dout_prefix
82-
#define dout_prefix \
83-
*_dout << "osd." << osd_service.whoami << " scrub-queue::" << __func__ << " "
80+
#define dout_prefix \
81+
*_dout << "osd." << osd_service.get_nodeid() << " scrub-queue::" << __func__ \
82+
<< " "
8483

8584

86-
ScrubQueue::ScrubQueue(CephContext* cct, OSDService& osds)
85+
ScrubQueue::ScrubQueue(CephContext* cct, Scrub::ScrubSchedListener& osds)
8786
: cct{cct}
8887
, osd_service{osds}
8988
{
@@ -98,7 +97,7 @@ ScrubQueue::ScrubQueue(CephContext* cct, OSDService& osds)
9897

9998
std::optional<double> ScrubQueue::update_load_average()
10099
{
101-
int hb_interval = cct->_conf->osd_heartbeat_interval;
100+
int hb_interval = conf()->osd_heartbeat_interval;
102101
int n_samples = 60 * 24 * 24;
103102
if (hb_interval > 1) {
104103
n_samples /= hb_interval;
@@ -220,6 +219,48 @@ void ScrubQueue::update_job(ScrubJobRef scrub_job,
220219
scrub_job->update_schedule(adjusted);
221220
}
222221

222+
ScrubQueue::sched_params_t ScrubQueue::determine_scrub_time(
223+
const requested_scrub_t& request_flags,
224+
const pg_info_t& pg_info,
225+
const pool_opts_t pool_conf) const
226+
{
227+
ScrubQueue::sched_params_t res;
228+
229+
//dout(15) << fmt::format(": requested_scrub_t: {}", request_flags) << dendl; // RRR
230+
dout(15) << ": requested_scrub_t: {}" << request_flags << dendl;
231+
232+
233+
if (request_flags.must_scrub || request_flags.need_auto) {
234+
235+
// Set the smallest time that isn't utime_t()
236+
res.proposed_time = PgScrubber::scrub_must_stamp();
237+
res.is_must = ScrubQueue::must_scrub_t::mandatory;
238+
// we do not need the interval data in this case
239+
240+
} else if (pg_info.stats.stats_invalid &&
241+
conf()->osd_scrub_invalid_stats) {
242+
res.proposed_time = time_now();
243+
res.is_must = ScrubQueue::must_scrub_t::mandatory;
244+
245+
} else {
246+
res.proposed_time = pg_info.history.last_scrub_stamp;
247+
res.min_interval = pool_conf.value_or(pool_opts_t::SCRUB_MIN_INTERVAL, 0.0);
248+
res.max_interval = pool_conf.value_or(pool_opts_t::SCRUB_MAX_INTERVAL, 0.0);
249+
}
250+
251+
dout(15) << fmt::format(
252+
": suggested: {} hist: {} v: {}/{} must: {} pool-min: {}",
253+
res.proposed_time,
254+
pg_info.history.last_scrub_stamp,
255+
(bool)pg_info.stats.stats_invalid,
256+
conf()->osd_scrub_invalid_stats,
257+
(res.is_must == must_scrub_t::mandatory ? "y" : "n"),
258+
res.min_interval)
259+
<< dendl;
260+
return res;
261+
}
262+
263+
223264
// used under jobs_lock
224265
void ScrubQueue::move_failed_pgs(utime_t now_is)
225266
{
@@ -237,7 +278,7 @@ void ScrubQueue::move_failed_pgs(utime_t now_is)
237278

238279
// determine the penalty time, after which the job should be reinstated
239280
utime_t after = now_is;
240-
after += cct->_conf->osd_scrub_sleep * 2 + utime_t{300'000ms};
281+
after += conf()->osd_scrub_sleep * 2 + utime_t{300'000ms};
241282

242283
// note: currently - not taking 'deadline' into account when determining
243284
// 'penalty_timeout'.
@@ -309,7 +350,7 @@ Scrub::schedule_result_t ScrubQueue::select_pg_and_scrub(
309350
dout(10) << " reg./pen. sizes: " << to_scrub.size() << " / "
310351
<< penalized.size() << dendl;
311352

312-
utime_t now_is = ceph_clock_now();
353+
utime_t now_is = time_now();
313354

314355
preconds.time_permit = scrub_time_permit(now_is);
315356
preconds.load_is_low = scrub_load_below_threshold();
@@ -489,28 +530,28 @@ ScrubQueue::scrub_schedule_t ScrubQueue::adjust_target_time(
489530

490531
if (g_conf()->subsys.should_gather<ceph_subsys_osd, 20>()) {
491532
dout(20) << "min t: " << times.min_interval
492-
<< " osd: " << cct->_conf->osd_scrub_min_interval
533+
<< " osd: " << conf()->osd_scrub_min_interval
493534
<< " max t: " << times.max_interval
494-
<< " osd: " << cct->_conf->osd_scrub_max_interval << dendl;
535+
<< " osd: " << conf()->osd_scrub_max_interval << dendl;
495536

496537
dout(20) << "at " << sched_n_dead.scheduled_at << " ratio "
497-
<< cct->_conf->osd_scrub_interval_randomize_ratio << dendl;
538+
<< conf()->osd_scrub_interval_randomize_ratio << dendl;
498539
}
499540

500541
if (times.is_must == ScrubQueue::must_scrub_t::not_mandatory) {
501542

502543
// unless explicitly requested, postpone the scrub with a random delay
503544
double scrub_min_interval = times.min_interval > 0
504545
? times.min_interval
505-
: cct->_conf->osd_scrub_min_interval;
546+
: conf()->osd_scrub_min_interval;
506547
double scrub_max_interval = times.max_interval > 0
507548
? times.max_interval
508-
: cct->_conf->osd_scrub_max_interval;
549+
: conf()->osd_scrub_max_interval;
509550

510551
sched_n_dead.scheduled_at += scrub_min_interval;
511552
double r = rand() / (double)RAND_MAX;
512553
sched_n_dead.scheduled_at +=
513-
scrub_min_interval * cct->_conf->osd_scrub_interval_randomize_ratio * r;
554+
scrub_min_interval * conf()->osd_scrub_interval_randomize_ratio * r;
514555

515556
if (scrub_max_interval <= 0) {
516557
sched_n_dead.deadline = utime_t{};
@@ -526,15 +567,15 @@ ScrubQueue::scrub_schedule_t ScrubQueue::adjust_target_time(
526567

527568
double ScrubQueue::scrub_sleep_time(bool must_scrub) const
528569
{
529-
double regular_sleep_period = cct->_conf->osd_scrub_sleep;
570+
double regular_sleep_period = conf()->osd_scrub_sleep;
530571

531-
if (must_scrub || scrub_time_permit(ceph_clock_now())) {
572+
if (must_scrub || scrub_time_permit(time_now())) {
532573
return regular_sleep_period;
533574
}
534575

535576
// relevant if scrubbing started during allowed time, but continued into
536577
// forbidden hours
537-
double extended_sleep = cct->_conf->osd_scrub_extended_sleep;
578+
double extended_sleep = conf()->osd_scrub_extended_sleep;
538579
dout(20) << "w/ extended sleep (" << extended_sleep << ")" << dendl;
539580
return std::max(extended_sleep, regular_sleep_period);
540581
}
@@ -550,9 +591,9 @@ bool ScrubQueue::scrub_load_below_threshold() const
550591
// allow scrub if below configured threshold
551592
long cpus = sysconf(_SC_NPROCESSORS_ONLN);
552593
double loadavg_per_cpu = cpus > 0 ? loadavgs[0] / cpus : loadavgs[0];
553-
if (loadavg_per_cpu < cct->_conf->osd_scrub_load_threshold) {
594+
if (loadavg_per_cpu < conf()->osd_scrub_load_threshold) {
554595
dout(20) << "loadavg per cpu " << loadavg_per_cpu << " < max "
555-
<< cct->_conf->osd_scrub_load_threshold << " = yes" << dendl;
596+
<< conf()->osd_scrub_load_threshold << " = yes" << dendl;
556597
return true;
557598
}
558599

@@ -565,7 +606,7 @@ bool ScrubQueue::scrub_load_below_threshold() const
565606
}
566607

567608
dout(20) << "loadavg " << loadavgs[0] << " >= max "
568-
<< cct->_conf->osd_scrub_load_threshold << " and ( >= daily_loadavg "
609+
<< conf()->osd_scrub_load_threshold << " and ( >= daily_loadavg "
569610
<< daily_loadavg << " or >= 15m avg " << loadavgs[2] << ") = no"
570611
<< dendl;
571612
return false;
@@ -616,22 +657,22 @@ bool ScrubQueue::scrub_time_permit(utime_t now) const
616657
time_t tt = now.sec();
617658
localtime_r(&tt, &bdt);
618659

619-
bool day_permit = isbetween_modulo(cct->_conf->osd_scrub_begin_week_day,
620-
cct->_conf->osd_scrub_end_week_day,
660+
bool day_permit = isbetween_modulo(conf()->osd_scrub_begin_week_day,
661+
conf()->osd_scrub_end_week_day,
621662
bdt.tm_wday);
622663
if (!day_permit) {
623664
dout(20) << "should run between week day "
624-
<< cct->_conf->osd_scrub_begin_week_day << " - "
625-
<< cct->_conf->osd_scrub_end_week_day << " now " << bdt.tm_wday
665+
<< conf()->osd_scrub_begin_week_day << " - "
666+
<< conf()->osd_scrub_end_week_day << " now " << bdt.tm_wday
626667
<< " - no" << dendl;
627668
return false;
628669
}
629670

630-
bool time_permit = isbetween_modulo(cct->_conf->osd_scrub_begin_hour,
631-
cct->_conf->osd_scrub_end_hour,
671+
bool time_permit = isbetween_modulo(conf()->osd_scrub_begin_hour,
672+
conf()->osd_scrub_end_hour,
632673
bdt.tm_hour);
633-
dout(20) << "should run between " << cct->_conf->osd_scrub_begin_hour << " - "
634-
<< cct->_conf->osd_scrub_end_hour << " now (" << bdt.tm_hour
674+
dout(20) << "should run between " << conf()->osd_scrub_begin_hour << " - "
675+
<< conf()->osd_scrub_end_hour << " now (" << bdt.tm_hour
635676
<< ") = " << (time_permit ? "yes" : "no") << dendl;
636677
return time_permit;
637678
}
@@ -694,34 +735,34 @@ bool ScrubQueue::can_inc_scrubs() const
694735
// inc_scrubs_local() failures
695736
std::lock_guard lck{resource_lock};
696737

697-
if (scrubs_local + scrubs_remote < cct->_conf->osd_max_scrubs) {
738+
if (scrubs_local + scrubs_remote < conf()->osd_max_scrubs) {
698739
return true;
699740
}
700741

701742
dout(20) << " == false. " << scrubs_local << " local + " << scrubs_remote
702-
<< " remote >= max " << cct->_conf->osd_max_scrubs << dendl;
743+
<< " remote >= max " << conf()->osd_max_scrubs << dendl;
703744
return false;
704745
}
705746

706747
bool ScrubQueue::inc_scrubs_local()
707748
{
708749
std::lock_guard lck{resource_lock};
709750

710-
if (scrubs_local + scrubs_remote < cct->_conf->osd_max_scrubs) {
751+
if (scrubs_local + scrubs_remote < conf()->osd_max_scrubs) {
711752
++scrubs_local;
712753
return true;
713754
}
714755

715756
dout(20) << ": " << scrubs_local << " local + " << scrubs_remote
716-
<< " remote >= max " << cct->_conf->osd_max_scrubs << dendl;
757+
<< " remote >= max " << conf()->osd_max_scrubs << dendl;
717758
return false;
718759
}
719760

720761
void ScrubQueue::dec_scrubs_local()
721762
{
722763
std::lock_guard lck{resource_lock};
723764
dout(20) << ": " << scrubs_local << " -> " << (scrubs_local - 1) << " (max "
724-
<< cct->_conf->osd_max_scrubs << ", remote " << scrubs_remote << ")"
765+
<< conf()->osd_max_scrubs << ", remote " << scrubs_remote << ")"
725766
<< dendl;
726767

727768
--scrubs_local;
@@ -732,24 +773,24 @@ bool ScrubQueue::inc_scrubs_remote()
732773
{
733774
std::lock_guard lck{resource_lock};
734775

735-
if (scrubs_local + scrubs_remote < cct->_conf->osd_max_scrubs) {
776+
if (scrubs_local + scrubs_remote < conf()->osd_max_scrubs) {
736777
dout(20) << ": " << scrubs_remote << " -> " << (scrubs_remote + 1)
737-
<< " (max " << cct->_conf->osd_max_scrubs << ", local "
778+
<< " (max " << conf()->osd_max_scrubs << ", local "
738779
<< scrubs_local << ")" << dendl;
739780
++scrubs_remote;
740781
return true;
741782
}
742783

743784
dout(20) << ": " << scrubs_local << " local + " << scrubs_remote
744-
<< " remote >= max " << cct->_conf->osd_max_scrubs << dendl;
785+
<< " remote >= max " << conf()->osd_max_scrubs << dendl;
745786
return false;
746787
}
747788

748789
void ScrubQueue::dec_scrubs_remote()
749790
{
750791
std::lock_guard lck{resource_lock};
751792
dout(20) << ": " << scrubs_remote << " -> " << (scrubs_remote - 1) << " (max "
752-
<< cct->_conf->osd_max_scrubs << ", local " << scrubs_local << ")"
793+
<< conf()->osd_max_scrubs << ", local " << scrubs_local << ")"
753794
<< dendl;
754795
--scrubs_remote;
755796
ceph_assert(scrubs_remote >= 0);
@@ -760,5 +801,5 @@ void ScrubQueue::dump_scrub_reservations(ceph::Formatter* f) const
760801
std::lock_guard lck{resource_lock};
761802
f->dump_int("scrubs_local", scrubs_local);
762803
f->dump_int("scrubs_remote", scrubs_remote);
763-
f->dump_int("osd_max_scrubs", cct->_conf->osd_max_scrubs);
804+
f->dump_int("osd_max_scrubs", conf()->osd_max_scrubs);
764805
}

0 commit comments

Comments
 (0)