Skip to content

Commit 2fec38b

Browse files
committed
osd/scrub: a single counters selection mechanism - step 1
Following the preceeding PR, the Scrubber now employs two methods for selecting the specific subset of performance counters to update (the replicated pool set or the EC one). The first method is using labeled counters, with 4 optional labels (Primary/Replica X Replicated/EC Pool). The second method is by naming the specific OSD counters to use in ScrubIoCounterSet objects, then selecting the appropriate set based on the pool type. This commit is the first step on the path to unifying the two methods - discarding the use of labeled counters, and only naming OSD counters. Signed-off-by: Ronen Friedman <[email protected]>
1 parent 3aa61b3 commit 2fec38b

File tree

8 files changed

+65
-23
lines changed

8 files changed

+65
-23
lines changed

src/osd/osd_perf_counters.cc

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -374,7 +374,11 @@ PerfCounters *build_osd_logger(CephContext *cct) {
374374
osd_plb.add_u64_counter(l_osd_scrub_ec_read_cnt, "scrub_ec_read_cnt", "scrub ec read calls count");
375375
osd_plb.add_u64_counter(l_osd_scrub_ec_read_bytes, "scrub_ec_read_bytes", "scrub ec read bytes read");
376376

377-
// scrub I/O performed for replicated pools
377+
// scrub (no EC vs. replicated differentiation)
378+
// scrub - replicated pools
379+
osd_plb.add_u64_counter(l_osd_scrub_rppool_active_started, "num_scrubs_past_reservation_replicated", "scrubs count replicated");
380+
// scrub - EC
381+
osd_plb.add_u64_counter(l_osd_scrub_ec_active_started, "num_scrubs_past_reservation_ec", "scrubs count ec");
378382

379383
return osd_plb.create_perf_counters();
380384
}
@@ -428,7 +432,6 @@ PerfCounters *build_scrub_labeled_perf(CephContext *cct, std::string label)
428432
scrub_perf.set_prio_default(PerfCountersBuilder::PRIO_INTERESTING);
429433

430434
scrub_perf.add_u64_counter(scrbcnt_started, "num_scrubs_started", "scrubs attempted count");
431-
scrub_perf.add_u64_counter(scrbcnt_active_started, "num_scrubs_past_reservation", "scrubs count");
432435
scrub_perf.add_u64_counter(scrbcnt_failed, "failed_scrubs", "failed scrubs count");
433436
scrub_perf.add_u64_counter(scrbcnt_successful, "successful_scrubs", "successful scrubs count");
434437
scrub_perf.add_time_avg(scrbcnt_failed_elapsed, "failed_scrubs_elapsed", "time to scrub failure");

src/osd/osd_perf_counters.h

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -159,6 +159,12 @@ enum osd_counter_idx_t {
159159
l_osd_scrub_ec_read_cnt, ///< read calls count
160160
l_osd_scrub_ec_read_bytes, ///< total bytes read
161161

162+
// scrub (no EC vs. replicated differentiation)
163+
// scrub - replicated pools
164+
l_osd_scrub_rppool_active_started, ///< scrubs that got past replicas reservation
165+
// scrub - EC
166+
l_osd_scrub_ec_active_started, /// scrubs that got past secondaries reservation
167+
162168
l_osd_last,
163169
};
164170

@@ -211,8 +217,6 @@ enum {
211217
// -- basic statistics --
212218
/// The number of times we started a scrub
213219
scrbcnt_started,
214-
/// # scrubs that got past replicas reservation
215-
scrbcnt_active_started,
216220
/// # successful scrubs
217221
scrbcnt_successful,
218222
/// time to complete a successful scrub

src/osd/scrubber/pg_scrubber.cc

Lines changed: 17 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -999,7 +999,7 @@ std::optional<uint64_t> PgScrubber::select_range()
999999

10001000
void PgScrubber::select_range_n_notify()
10011001
{
1002-
get_counters_set().inc(scrbcnt_chunks_selected);
1002+
get_labeled_counters()->inc(scrbcnt_chunks_selected);
10031003
auto num_chunk_objects = select_range();
10041004
if (num_chunk_objects.has_value()) {
10051005
// the next chunk to handle is not blocked
@@ -1010,7 +1010,7 @@ void PgScrubber::select_range_n_notify()
10101010
// we will wait for the objects range to become available for scrubbing
10111011
dout(10) << __func__ << ": selected chunk is busy" << dendl;
10121012
m_osds->queue_scrub_chunk_busy(m_pg, Scrub::scrub_prio_t::low_priority);
1013-
get_counters_set().inc(scrbcnt_chunks_busy);
1013+
get_labeled_counters()->inc(scrbcnt_chunks_busy);
10141014
}
10151015
}
10161016

@@ -1042,7 +1042,7 @@ bool PgScrubber::write_blocked_by_scrub(const hobject_t& soid)
10421042
return false;
10431043
}
10441044

1045-
get_counters_set().inc(scrbcnt_write_blocked);
1045+
get_labeled_counters()->inc(scrbcnt_write_blocked);
10461046
dout(20) << __func__ << " " << soid << " can preempt? "
10471047
<< preemption_data.is_preemptable() << " already preempted? "
10481048
<< preemption_data.was_preempted() << dendl;
@@ -2525,11 +2525,22 @@ void PgScrubber::set_scrub_duration(std::chrono::milliseconds duration)
25252525
});
25262526
}
25272527

2528-
PerfCounters& PgScrubber::get_counters_set() const
2528+
PerfCounters* PgScrubber::get_osd_perf_counters() const
25292529
{
2530-
return *m_osds->get_scrub_services().get_perf_counters(
2530+
return m_osds->logger;
2531+
}
2532+
2533+
const Scrub::ScrubCounterSet& PgScrubber::get_unlabeled_counters() const
2534+
{
2535+
return m_pg->pool.info.is_replicated() ? io_counters_replicated
2536+
: io_counters_ec;
2537+
}
2538+
2539+
PerfCounters* PgScrubber::get_labeled_counters() const
2540+
{
2541+
return m_osds->get_scrub_services().get_perf_counters(
25312542
(m_pg->pool.info.is_replicated() ? pg_pool_t::TYPE_REPLICATED
2532-
: pg_pool_t::TYPE_ERASURE),
2543+
: pg_pool_t::TYPE_ERASURE),
25332544
(m_is_deep ? scrub_level_t::deep : scrub_level_t::shallow));
25342545
}
25352546

src/osd/scrubber/pg_scrubber.h

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -143,7 +143,8 @@ static inline constexpr ScrubCounterSet io_counters_replicated{
143143
.omapgetheader_cnt = l_osd_scrub_omapgetheader_cnt,
144144
.omapgetheader_bytes = l_osd_scrub_omapgetheader_bytes,
145145
.omapget_cnt = l_osd_scrub_omapget_cnt,
146-
.omapget_bytes = l_osd_scrub_omapget_bytes
146+
.omapget_bytes = l_osd_scrub_omapget_bytes,
147+
.active_started_cnt = l_osd_scrub_rppool_active_started
147148
};
148149

149150
static inline constexpr ScrubCounterSet io_counters_ec{
@@ -154,7 +155,8 @@ static inline constexpr ScrubCounterSet io_counters_ec{
154155
.omapgetheader_cnt = l_osd_scrub_omapgetheader_cnt,
155156
.omapgetheader_bytes = l_osd_scrub_omapgetheader_bytes,
156157
.omapget_cnt = l_osd_scrub_omapget_cnt,
157-
.omapget_bytes = l_osd_scrub_omapget_bytes
158+
.omapget_bytes = l_osd_scrub_omapget_bytes,
159+
.active_started_cnt = l_osd_scrub_ec_active_started
158160
};
159161
} // namespace Scrub
160162

@@ -414,7 +416,9 @@ class PgScrubber : public ScrubPgIF,
414416
int get_whoami() const final;
415417
spg_t get_spgid() const final { return m_pg->get_pgid(); }
416418
PG* get_pg() const final { return m_pg; }
417-
PerfCounters& get_counters_set() const final;
419+
PerfCounters* get_osd_perf_counters() const final;
420+
const Scrub::ScrubCounterSet& get_unlabeled_counters() const final;
421+
PerfCounters* get_labeled_counters() const final;
418422

419423
/// delay next retry of this PG after a replica reservation failure
420424
void flag_reservations_failure();

src/osd/scrubber/scrub_machine.cc

Lines changed: 10 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -196,7 +196,9 @@ Session::Session(my_context ctx)
196196
dout(10) << "-- state -->> PrimaryActive/Session" << dendl;
197197
DECLARE_LOCALS; // 'scrbr' & 'pg_id' aliases
198198

199-
m_perf_set = &scrbr->get_counters_set();
199+
m_perf_set = scrbr->get_labeled_counters();
200+
m_osd_counters = scrbr->get_osd_perf_counters();
201+
m_counters_idx = &scrbr->get_unlabeled_counters();
200202
m_perf_set->inc(scrbcnt_started);
201203
}
202204

@@ -319,7 +321,7 @@ ActiveScrubbing::ActiveScrubbing(my_context ctx)
319321
DECLARE_LOCALS; // 'scrbr' & 'pg_id' aliases
320322
auto& session = context<Session>();
321323

322-
session.m_perf_set->inc(scrbcnt_active_started);
324+
session.m_osd_counters->inc(session.m_counters_idx->active_started_cnt);
323325
scrbr->get_clog()->debug()
324326
<< fmt::format("{} {} starts", pg_id, scrbr->get_op_mode_text());
325327

@@ -343,8 +345,9 @@ ActiveScrubbing::~ActiveScrubbing()
343345
session.m_abort_reason.value_or(Scrub::delay_cause_t::aborted));
344346

345347
auto logged_duration = ScrubClock::now() - session.m_session_started_at;
346-
session.m_perf_set->tinc(scrbcnt_failed_elapsed, logged_duration);
347-
session.m_perf_set->inc(scrbcnt_failed);
348+
session.m_osd_counters->tinc(session.m_counters_idx->failed_elapsed,
349+
logged_duration);
350+
session.m_osd_counters->inc(session.m_counters_idx->failed_cnt);
348351
}
349352
}
350353

@@ -718,13 +721,14 @@ sc::result WaitDigestUpdate::react(const ScrubFinished&)
718721
dout(10) << "WaitDigestUpdate::react(const ScrubFinished&)" << dendl;
719722
auto& session = context<Session>();
720723

721-
session.m_perf_set->inc(scrbcnt_successful);
724+
session.m_osd_counters->inc(session.m_counters_idx->successful_cnt);
722725

723726
// set the 'scrub duration'
724727
auto duration = machine.get_time_scrubbing();
725-
session.m_perf_set->tinc(scrbcnt_successful_elapsed, duration);
726728
scrbr->set_scrub_duration(duration_cast<milliseconds>(duration));
727729
session.m_session_started_at = ScrubTimePoint{};
730+
session.m_osd_counters->tinc(
731+
session.m_counters_idx->successful_elapsed, duration);
728732

729733
scrbr->scrub_finish();
730734
return transit<PrimaryIdle>();

src/osd/scrubber/scrub_machine.h

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -562,10 +562,17 @@ struct Session : sc::state<Session, PrimaryActive, ReservingReplicas>,
562562
/// it's an RAII wrapper around the state of 'holding reservations')
563563
std::optional<ReplicaReservations> m_reservations{std::nullopt};
564564

565-
/// the relevant set of performance counters for this session
565+
/// the relevant set of labeled performance counters for this session
566566
/// (relevant, i.e. for this pool type X scrub level)
567567
PerfCounters* m_perf_set{nullptr};
568568

569+
/// the OSD's unlabeled performance counters access point
570+
PerfCounters* m_osd_counters{nullptr};
571+
572+
/// the set of performance counters for this session (relevant, i.e. for
573+
/// this pool type)
574+
const ScrubCounterSet* m_counters_idx{nullptr};
575+
569576
/// the time when the session was initiated
570577
ScrubTimePoint m_session_started_at{ScrubClock::now()};
571578

src/osd/scrubber/scrub_machine_lstnr.h

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -57,10 +57,17 @@ struct ScrubMachineListener {
5757
virtual PG* get_pg() const = 0;
5858

5959
/**
60-
* access the set of performance counters relevant to this scrub
60+
* the OSD's performance counters interface ("logger")
61+
*/
62+
virtual PerfCounters* get_osd_perf_counters() const = 0;
63+
64+
virtual const Scrub::ScrubCounterSet& get_unlabeled_counters() const = 0;
65+
66+
/**
67+
* the set of labeled performance counters relevant to this scrub
6168
* (one of the four sets of counters maintained by the OSD)
6269
*/
63-
virtual PerfCounters& get_counters_set() const = 0;
70+
virtual PerfCounters* get_labeled_counters() const = 0;
6471

6572
using scrubber_callback_t = std::function<void(void)>;
6673
using scrubber_callback_cancel_token_t = Context*;

src/osd/scrubber_common.h

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -292,7 +292,7 @@ struct PgScrubBeListener {
292292
// defining a specific subset of performance counters. Each of the members
293293
// is set to (the index of) the corresponding performance counter.
294294
// Separate sets are used for replicated and erasure-coded pools.
295-
struct ScrubIoCounterSet {
295+
struct ScrubCounterSet {
296296
osd_counter_idx_t getattr_cnt; ///< get_attr calls count
297297
osd_counter_idx_t stats_cnt; ///< stats calls count
298298
osd_counter_idx_t read_cnt; ///< read calls count
@@ -301,7 +301,9 @@ struct ScrubIoCounterSet {
301301
osd_counter_idx_t omapgetheader_bytes; ///< bytes read by omap get header
302302
osd_counter_idx_t omapget_cnt; ///< omap get calls count
303303
osd_counter_idx_t omapget_bytes; ///< total bytes read by omap get
304+
osd_counter_idx_t active_started_cnt; ///< scrubs that got past reservation
304305
};
306+
305307
} // namespace Scrub
306308

307309

0 commit comments

Comments
 (0)