Skip to content

Commit e60c698

Browse files
committed
osd/scrub: remove the 'penalized jobs' queue
The 'penalized jobs' queue was used to track scrub jobs that had failed to acquire their replicas, and to prevent those jobs from being retried too quickly. This functionality will be replaced by a simple 'not before' delay (see the next commits). Signed-off-by: Ronen Friedman <[email protected]>
1 parent e7ccff5 commit e60c698

File tree

5 files changed

+12
-139
lines changed

5 files changed

+12
-139
lines changed

src/osd/osd_types.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -403,7 +403,7 @@ struct pg_t {
403403
uint32_t m_seed;
404404

405405
pg_t() : m_pool(0), m_seed(0) {}
406-
pg_t(ps_t seed, uint64_t pool) :
406+
constexpr pg_t(ps_t seed, uint64_t pool) :
407407
m_pool(pool), m_seed(seed) {}
408408
// cppcheck-suppress noExplicitConstructor
409409
pg_t(const ceph_pg& cpg) :
@@ -521,7 +521,7 @@ struct spg_t {
521521
pg_t pgid;
522522
shard_id_t shard;
523523
spg_t() : shard(shard_id_t::NO_SHARD) {}
524-
spg_t(pg_t pgid, shard_id_t shard) : pgid(pgid), shard(shard) {}
524+
constexpr spg_t(pg_t pgid, shard_id_t shard) : pgid(pgid), shard(shard) {}
525525
explicit spg_t(pg_t pgid) : pgid(pgid), shard(shard_id_t::NO_SHARD) {}
526526
auto operator<=>(const spg_t&) const = default;
527527
unsigned get_split_bits(unsigned pg_num) const {

src/osd/scrubber/osd_scrub_sched.cc

Lines changed: 5 additions & 103 deletions
Original file line numberDiff line numberDiff line change
@@ -188,97 +188,41 @@ sched_params_t ScrubQueue::determine_scrub_time(
188188
}
189189

190190

191-
// used under jobs_lock
192-
void ScrubQueue::move_failed_pgs(utime_t now_is)
193-
{
194-
int punished_cnt{0}; // for log/debug only
195-
196-
for (auto job = to_scrub.begin(); job != to_scrub.end();) {
197-
if ((*job)->resources_failure) {
198-
auto sjob = *job;
199-
200-
// last time it was scheduled for a scrub, this PG failed in securing
201-
// remote resources. Move it to the secondary scrub queue.
202-
203-
dout(15) << "moving " << sjob->pgid
204-
<< " state: " << ScrubJob::qu_state_text(sjob->state) << dendl;
205-
206-
// determine the penalty time, after which the job should be reinstated
207-
utime_t after = now_is;
208-
after += conf()->osd_scrub_sleep * 2 + utime_t{300'000ms};
209-
210-
// note: currently - not taking 'deadline' into account when determining
211-
// 'penalty_timeout'.
212-
sjob->penalty_timeout = after;
213-
sjob->resources_failure = false;
214-
sjob->updated = false; // as otherwise will be pardoned immediately
215-
216-
// place in the penalty list, and remove from the to-scrub group
217-
penalized.push_back(sjob);
218-
job = to_scrub.erase(job);
219-
punished_cnt++;
220-
} else {
221-
job++;
222-
}
223-
}
224-
225-
if (punished_cnt) {
226-
dout(15) << "# of jobs penalized: " << punished_cnt << dendl;
227-
}
228-
}
229-
230191
std::vector<ScrubTargetId> ScrubQueue::ready_to_scrub(
231192
OSDRestrictions restrictions, // note: 4B in size! (copy)
232193
utime_t scrub_tick)
233194
{
234195
dout(10) << fmt::format(
235-
" @{:s}: reg./pen. sizes: {} / {} ({})", scrub_tick,
236-
to_scrub.size(), penalized.size(), restrictions)
196+
" @{:s}: registered: {} ({})", scrub_tick,
197+
to_scrub.size(), restrictions)
237198
<< dendl;
199+
238200
// create a list of candidates (copying, as otherwise creating a deadlock):
239-
// - possibly restore penalized
240201
// - (if we didn't handle directly) remove invalid jobs
241202
// - create a copy of the to_scrub (possibly up to first not-ripe)
242-
// - same for the penalized (although that usually be a waste)
243203
// unlock, then try the lists
244-
245204
std::unique_lock lck{jobs_lock};
246205

247-
// pardon all penalized jobs that have deadlined (or were updated)
248-
scan_penalized(restore_penalized, scrub_tick);
249-
restore_penalized = false;
250-
251206
// remove the 'updated' flag from all entries
252207
std::for_each(
253208
to_scrub.begin(), to_scrub.end(),
254209
[](const auto& jobref) -> void { jobref->updated = false; });
255210

256-
// add failed scrub attempts to the penalized list
257-
move_failed_pgs(scrub_tick);
258-
259-
// collect all valid & ripe jobs from the two lists. Note that we must copy,
211+
// collect all valid & ripe jobs. Note that we must copy,
260212
// as when we use the lists we will not be holding jobs_lock (see
261213
// explanation above)
262214

263215
// and in this step 1 of the refactoring (Aug 2023): the set returned must be
264216
// transformed into a vector of targets (which, in this phase, are
265217
// the PG id-s).
266218
auto to_scrub_copy = collect_ripe_jobs(to_scrub, restrictions, scrub_tick);
267-
auto penalized_copy = collect_ripe_jobs(penalized, restrictions, scrub_tick);
268219
lck.unlock();
269220

270221
std::vector<ScrubTargetId> all_ready;
271222
std::transform(
272223
to_scrub_copy.cbegin(), to_scrub_copy.cend(),
273224
std::back_inserter(all_ready),
274225
[](const auto& jobref) -> ScrubTargetId { return jobref->pgid; });
275-
// not bothering to handle the "reached the penalized - so all should be
276-
// forgiven" case, as the penalty queue is destined to be removed in a
277-
// followup PR.
278-
std::transform(
279-
penalized_copy.cbegin(), penalized_copy.cend(),
280-
std::back_inserter(all_ready),
281-
[](const auto& jobref) -> ScrubTargetId { return jobref->pgid; });
282226
return all_ready;
283227
}
284228

@@ -389,71 +333,29 @@ Scrub::scrub_schedule_t ScrubQueue::adjust_target_time(
389333
}
390334

391335

392-
// note: called with jobs_lock held
393-
void ScrubQueue::scan_penalized(bool forgive_all, utime_t time_now)
394-
{
395-
dout(20) << time_now << (forgive_all ? " all " : " - ") << penalized.size()
396-
<< dendl;
397-
398-
// clear dead entries (deleted PGs, or those PGs we are no longer their
399-
// primary)
400-
rm_unregistered_jobs(penalized);
401-
402-
if (forgive_all) {
403-
404-
std::copy(penalized.begin(), penalized.end(), std::back_inserter(to_scrub));
405-
penalized.clear();
406-
407-
} else {
408-
409-
auto forgiven_last = std::partition(
410-
penalized.begin(),
411-
penalized.end(),
412-
[time_now](const auto& e) {
413-
return (*e).updated || ((*e).penalty_timeout <= time_now);
414-
});
415-
416-
std::copy(penalized.begin(), forgiven_last, std::back_inserter(to_scrub));
417-
penalized.erase(penalized.begin(), forgiven_last);
418-
dout(20) << "penalized after screening: " << penalized.size() << dendl;
419-
}
420-
}
421-
422336
void ScrubQueue::dump_scrubs(ceph::Formatter* f) const
423337
{
424338
ceph_assert(f != nullptr);
425339
std::lock_guard lck(jobs_lock);
426340

427341
f->open_array_section("scrubs");
428-
429342
std::for_each(
430343
to_scrub.cbegin(), to_scrub.cend(),
431344
[&f](const Scrub::ScrubJobRef& j) { j->dump(f); });
432-
433-
std::for_each(
434-
penalized.cbegin(), penalized.cend(),
435-
[&f](const Scrub::ScrubJobRef& j) { j->dump(f); });
436-
437345
f->close_section();
438346
}
439347

440348
ScrubQContainer ScrubQueue::list_registered_jobs() const
441349
{
442350
ScrubQContainer all_jobs;
443-
all_jobs.reserve(to_scrub.size() + penalized.size());
351+
all_jobs.reserve(to_scrub.size());
444352
dout(20) << " size: " << all_jobs.capacity() << dendl;
445353

446354
std::lock_guard lck{jobs_lock};
447-
448355
std::copy_if(to_scrub.begin(),
449356
to_scrub.end(),
450357
std::back_inserter(all_jobs),
451358
registered_job);
452-
std::copy_if(penalized.begin(),
453-
penalized.end(),
454-
std::back_inserter(all_jobs),
455-
registered_job);
456-
457359
return all_jobs;
458360
}
459361

src/osd/scrubber/osd_scrub_sched.h

Lines changed: 0 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,6 @@
3030
│ │
3131
│ │
3232
│ ScrubQContainer to_scrub <>────────┼────────┐
33-
│ ScrubQContainer penalized │ │
3433
│ │ │
3534
│ │ │
3635
│ OSD_wide resource counters │ │
@@ -146,11 +145,6 @@ class ScrubSchedListener {
146145
* the queue of PGs waiting to be scrubbed.
147146
* Main operations are scheduling/unscheduling a PG to be scrubbed at a certain
148147
* time.
149-
*
150-
* A "penalty" queue maintains those PGs that have failed to reserve the
151-
* resources of their replicas. The PGs in this list will be reinstated into the
152-
* scrub queue when all eligible PGs were already handled, or after a timeout
153-
* (or if their deadline has passed [[disabled at this time]]).
154148
*/
155149
class ScrubQueue {
156150
public:
@@ -282,8 +276,6 @@ class ScrubQueue {
282276
mutable ceph::mutex jobs_lock = ceph::make_mutex("ScrubQueue::jobs_lock");
283277

284278
Scrub::ScrubQContainer to_scrub; ///< scrub jobs (i.e. PGs) to scrub
285-
Scrub::ScrubQContainer penalized; ///< those that failed to reserve remote resources
286-
bool restore_penalized{false};
287279

288280
static inline constexpr auto registered_job = [](const auto& jobref) -> bool {
289281
return jobref->state == Scrub::qu_state_t::registered;
@@ -293,11 +285,6 @@ class ScrubQueue {
293285
return jobref->state == Scrub::qu_state_t::not_registered;
294286
};
295287

296-
/**
297-
* Are there scrub jobs that should be reinstated?
298-
*/
299-
void scan_penalized(bool forgive_all, utime_t time_now);
300-
301288
/**
302289
* clear dead entries (unregistered, or belonging to removed PGs) from a
303290
* queue. Job state is changed to match new status.
@@ -353,16 +340,6 @@ class ScrubQueue {
353340
Scrub::scrub_schedule_t adjust_target_time(
354341
const Scrub::sched_params_t& recomputed_params) const;
355342

356-
/**
357-
* Look for scrub jobs that have their 'resources_failure' set. These jobs
358-
* have failed to acquire remote resources last time we've initiated a scrub
359-
* session on them. They are now moved from the 'to_scrub' queue to the
360-
* 'penalized' set.
361-
*
362-
* locking: called with job_lock held
363-
*/
364-
void move_failed_pgs(utime_t now_is);
365-
366343
protected: // used by the unit-tests
367344
/**
368345
* unit-tests will override this function to return a mock time

src/osd/scrubber/scrub_job.h

Lines changed: 5 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -77,8 +77,6 @@ class ScrubJob final : public RefCountedObject {
7777
* 'updated' is a temporary flag, used to create a barrier after
7878
* 'sched_time' and 'deadline' (or any other job entry) were modified by
7979
* different task.
80-
* 'updated' also signals the need to move a job back from the penalized
81-
* queue to the regular one.
8280
*/
8381
std::atomic_bool updated{false};
8482

@@ -89,8 +87,6 @@ class ScrubJob final : public RefCountedObject {
8987
bool blocked{false};
9088
utime_t blocked_since{};
9189

92-
utime_t penalty_timeout{0, 0};
93-
9490
CephContext* cct;
9591

9692
bool high_priority{false};
@@ -231,12 +227,11 @@ struct formatter<Scrub::ScrubJob> {
231227
{
232228
return fmt::format_to(
233229
ctx.out(),
234-
"pg[{}] @ {:s} (dl:{:s}) - <{}> / failure: {} / pen. t.o.: {:s} / "
235-
"queue "
236-
"state: {:.7}",
237-
sjob.pgid, sjob.schedule.scheduled_at, sjob.schedule.deadline,
238-
sjob.registration_state(), sjob.resources_failure, sjob.penalty_timeout,
239-
sjob.state.load(std::memory_order_relaxed));
230+
"pg[{}] @ {:s} (dl:{:s}) - <{}> / failure: {} / queue state: "
231+
"{:.7}",
232+
sjob.pgid, sjob.schedule.scheduled_at,
233+
sjob.schedule.deadline, sjob.registration_state(),
234+
sjob.resources_failure, sjob.state.load(std::memory_order_relaxed));
240235
}
241236
};
242237

src/test/osd/test_scrub_sched.cc

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -57,7 +57,6 @@ class ScrubSchedTestWrapper : public ScrubQueue {
5757
void rm_unregistered_jobs()
5858
{
5959
ScrubQueue::rm_unregistered_jobs(to_scrub);
60-
ScrubQueue::rm_unregistered_jobs(penalized);
6160
}
6261

6362
ScrubQContainer collect_ripe_jobs()

0 commit comments

Comments
 (0)