Skip to content

Commit d0ec1c6

Browse files
authored
Merge pull request ceph#57272 from ronen-fr/wip-rf-cv
osd/scrub: modify deep scrub interval randomization Reviewed-by: Samuel Just <[email protected]>
2 parents 2d281f3 + 0de916d commit d0ec1c6

File tree

4 files changed

+53
-24
lines changed

4 files changed

+53
-24
lines changed

src/common/options/osd.yaml.in

Lines changed: 21 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -442,17 +442,32 @@ options:
442442
type: float
443443
level: advanced
444444
desc: Deep scrub each PG (i.e., verify data checksums) at least this often
445-
fmt_desc: The interval for "deep" scrubbing (fully reading all data). The
446-
``osd_scrub_load_threshold`` does not affect this setting.
445+
fmt_desc: The interval for "deep" scrubbing (fully reading all data).
447446
default: 7_day
448447
with_legacy: true
448+
- name: osd_deep_scrub_interval_cv
449+
type: float
450+
level: advanced
451+
desc: determining the amount of variation in the deep scrub interval
452+
long_desc: deep scrub intervals are varied by a random amount to prevent
453+
stampedes. This parameter determines the amount of variation.
454+
Technically - osd_deep_scrub_interval_cv is the coefficient of variation for
455+
the deep scrub interval.
456+
fmt_desc: The coefficient of variation for the deep scrub interval, specified as a
457+
ratio. On average, the next deep scrub for a PG is scheduled osd_deep_scrub_interval
458+
after the last deep scrub . The actual time is randomized to a normal distribution
459+
with a standard deviation of osd_deep_scrub_interval * osd_deep_scrub_interval_cv
460+
(clamped to within 2 standard deviations).
461+
The default value guarantees that 95% of the deep scrubs will be scheduled in the range
462+
[0.8 * osd_deep_scrub_interval, 1.2 * osd_deep_scrub_interval].
463+
min: 0
464+
max: 0.4
465+
default: 0.2
466+
with_legacy: false
449467
- name: osd_deep_scrub_randomize_ratio
450468
type: float
451469
level: advanced
452-
desc: Scrubs will randomly become deep scrubs at this rate (0.15 -> 15% of scrubs
453-
are deep)
454-
long_desc: This prevents a deep scrub 'stampede' by spreading deep scrubs so they
455-
are uniformly distributed over the week
470+
desc: deprecated. Has no effect.
456471
default: 0.15
457472
with_legacy: true
458473
- name: osd_deep_scrub_stride

src/osd/scrubber/pg_scrubber.cc

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -666,7 +666,7 @@ bool PgScrubber::reserve_local(const Scrub::SchedTarget& trgt)
666666
Scrub::sched_conf_t PgScrubber::populate_config_params() const
667667
{
668668
const pool_opts_t& pool_conf = m_pg->get_pgpool().info.opts;
669-
auto& conf = get_pg_cct()->_conf; // for brevity
669+
const auto& conf = get_pg_cct()->_conf; // for brevity
670670
Scrub::sched_conf_t configs;
671671

672672
// deep-scrub optimal interval
@@ -705,7 +705,7 @@ Scrub::sched_conf_t PgScrubber::populate_config_params() const
705705
std::max(configs.max_shallow.value_or(0.0), configs.deep_interval);
706706

707707
configs.interval_randomize_ratio = conf->osd_scrub_interval_randomize_ratio;
708-
configs.deep_randomize_ratio = conf->osd_deep_scrub_randomize_ratio;
708+
configs.deep_randomize_ratio = conf.get_val<double>("osd_deep_scrub_interval_cv");
709709
configs.mandatory_on_invalid = conf->osd_scrub_invalid_stats;
710710

711711
dout(15) << fmt::format("{}: updated config:{}", __func__, configs) << dendl;
@@ -2518,9 +2518,7 @@ PgScrubber::PgScrubber(PG* pg)
25182518
{
25192519
m_fsm = std::make_unique<ScrubMachine>(m_pg, this);
25202520
m_fsm->initiate();
2521-
2522-
m_scrub_job = std::make_optional<Scrub::ScrubJob>(
2523-
m_osds->cct, m_pg->pg_id, m_osds->get_nodeid());
2521+
m_scrub_job.emplace(m_osds->cct, m_pg->pg_id, m_osds->get_nodeid());
25242522
}
25252523

25262524
void PgScrubber::set_scrub_duration(std::chrono::milliseconds duration)

src/osd/scrubber/scrub_job.cc

Lines changed: 20 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -66,6 +66,7 @@ ScrubJob::ScrubJob(CephContext* cct, const spg_t& pg, int node_id)
6666
, shallow_target{pg, scrub_level_t::shallow}
6767
, deep_target{pg, scrub_level_t::deep}
6868
, cct{cct}
69+
, random_gen{random_dev()}
6970
, log_msg_prefix{fmt::format("osd.{} scrub-job:pg[{}]:", node_id, pgid)}
7071
{}
7172

@@ -242,6 +243,7 @@ utime_t ScrubJob::get_sched_time() const
242243
return earliest_target().sched_info.schedule.not_before;
243244
}
244245

246+
245247
void ScrubJob::adjust_deep_schedule(
246248
utime_t last_deep,
247249
const Scrub::sched_conf_t& app_conf,
@@ -258,13 +260,7 @@ void ScrubJob::adjust_deep_schedule(
258260

259261
auto& dp_times = deep_target.sched_info.schedule; // shorthand
260262

261-
if (!ScrubJob::requires_randomization(deep_target.urgency())) {
262-
// the target time is already set. Make sure to reset the n.b. and
263-
// the (irrelevant) deadline
264-
dp_times.not_before = dp_times.scheduled_at;
265-
dp_times.deadline = dp_times.scheduled_at;
266-
267-
} else {
263+
if (ScrubJob::requires_randomization(deep_target.urgency())) {
268264
utime_t adj_not_before = last_deep;
269265
utime_t adj_target = last_deep;
270266
dp_times.deadline = adj_target;
@@ -273,10 +269,18 @@ void ScrubJob::adjust_deep_schedule(
273269
// scrubs that are not already eligible for scrubbing.
274270
if ((modify_ready_targets == delay_ready_t::delay_ready) ||
275271
adj_not_before > scrub_clock_now) {
276-
adj_target += app_conf.deep_interval;
277-
double r = rand() / (double)RAND_MAX;
278-
adj_target += app_conf.deep_interval * app_conf.interval_randomize_ratio *
279-
r; // RRR fix
272+
double sdv = app_conf.deep_interval * app_conf.deep_randomize_ratio;
273+
std::normal_distribution<double> normal_dist{app_conf.deep_interval, sdv};
274+
auto next_delay = std::clamp(
275+
normal_dist(random_gen), app_conf.deep_interval - 2 * sdv,
276+
app_conf.deep_interval + 2 * sdv);
277+
adj_target += next_delay;
278+
dout(20) << fmt::format(
279+
"deep scrubbing: next_delay={:.0f} (interval={:.0f}, "
280+
"ratio={:.3f}), adjusted:{:s}",
281+
next_delay, app_conf.deep_interval,
282+
app_conf.deep_randomize_ratio, adj_target)
283+
<< dendl;
280284
}
281285

282286
// the deadline can be updated directly into the scrub-job
@@ -290,6 +294,11 @@ void ScrubJob::adjust_deep_schedule(
290294
}
291295
dp_times.scheduled_at = adj_target;
292296
dp_times.not_before = adj_not_before;
297+
} else {
298+
// the target time is already set. Make sure to reset the n.b. and
299+
// the (irrelevant) deadline
300+
dp_times.not_before = dp_times.scheduled_at;
301+
dp_times.deadline = dp_times.scheduled_at;
293302
}
294303

295304
dout(10) << fmt::format(

src/osd/scrubber/scrub_job.h

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
#include <compare>
77
#include <iostream>
88
#include <memory>
9+
#include <random>
910
#include <vector>
1011

1112
#include "common/ceph_atomic.h"
@@ -65,8 +66,9 @@ struct sched_conf_t {
6566

6667
/**
6768
* a randomization factor aimed at preventing 'thundering herd' problems
68-
* upon deep-scrubs common intervals. If polling a random number smaller
69-
* than that percentage, the next shallow scrub is upgraded to deep.
69+
* upon deep-scrubs common intervals. The actual deep scrub interval will
70+
* be selected with a normal distribution around the configured interval,
71+
* with a standard deviation of <deep_randomize_ratio> * <interval>.
7072
*/
7173
double deep_randomize_ratio{0.0};
7274

@@ -168,6 +170,11 @@ class ScrubJob {
168170

169171
CephContext* cct;
170172

173+
/// random generator for the randomization of the scrub times
174+
/// \todo consider using one common generator in the OSD service
175+
std::random_device random_dev;
176+
std::mt19937 random_gen;
177+
171178
ScrubJob(CephContext* cct, const spg_t& pg, int node_id);
172179

173180
/**

0 commit comments

Comments
 (0)