Skip to content

Commit 63ca1ff

Browse files
authored
Merge pull request ceph#65859 from ronen-fr/wip-rf-minchunk
osd/scrub: do not reduce min chunk on preemption Reviewed-by: Samuel Just <[email protected]>
2 parents fa31c2e + 3e8ec7d commit 63ca1ff

File tree

6 files changed

+37
-5
lines changed

6 files changed

+37
-5
lines changed

src/common/options/osd.yaml.in

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -379,8 +379,9 @@ options:
379379
type: int
380380
level: advanced
381381
desc: Minimum number of objects to deep-scrub in a single chunk
382-
fmt_desc: The minimal number of object store chunks to scrub during single operation.
383-
Ceph blocks writes to single chunk during scrub.
382+
fmt_desc: The minimum number of objects to scrub during single operation. Also
383+
serves as a minimal chunk size even after scrubbing is preempted by client
384+
operations and the effective chunk size is halved.
384385
default: 5
385386
see_also:
386387
- osd_scrub_chunk_max
@@ -1559,7 +1560,7 @@ options:
15591560
type: int
15601561
level: advanced
15611562
default: 512
1562-
fmt_desc: The maximum number of objects per backfill scan.p
1563+
fmt_desc: The maximum number of objects per backfill scan.
15631564
with_legacy: true
15641565
- name: osd_extblkdev_plugins
15651566
type: str

src/osd/osd_perf_counters.cc

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -407,6 +407,14 @@ PerfCounters *build_osd_logger(CephContext *cct) {
407407
l_osd_scrub_rppool_failed_elapsed,
408408
"failed_scrubs_replicated_elapsed",
409409
"time to scrub failure replicated");
410+
osd_plb.add_u64_counter(
411+
l_osd_scrub_rppool_write_intersects,
412+
"scrub_replicated_io_intersects",
413+
"client write op intersects chunk range");
414+
osd_plb.add_u64_counter(
415+
l_osd_scrub_rppool_write_blocked,
416+
"scrub_replicated_io_blocked",
417+
"write op did not preempt the scrub");
410418

411419
// the replica reservation process - replicated pool
412420
osd_plb.add_u64_counter(
@@ -456,6 +464,14 @@ PerfCounters *build_osd_logger(CephContext *cct) {
456464
osd_plb.add_time_avg(
457465
l_osd_scrub_ec_failed_elapsed, "failed_scrubs_ec_elapsed",
458466
"time to scrub failure ec");
467+
osd_plb.add_u64_counter(
468+
l_osd_scrub_ec_write_intersects,
469+
"scrub_ec_io_intersects",
470+
"client write op intersects chunk range");
471+
osd_plb.add_u64_counter(
472+
l_osd_scrub_ec_write_blocked,
473+
"scrub_ec_io_blocked",
474+
"write op did not preempt the scrub");
459475

460476
// the secondaries reservation process - EC
461477
osd_plb.add_u64_counter(

src/osd/osd_perf_counters.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -170,6 +170,8 @@ enum osd_counter_idx_t {
170170
l_osd_scrub_rppool_successful_elapsed, ///< time to complete a successful scrub
171171
l_osd_scrub_rppool_failed, ///< failed scrubs count
172172
l_osd_scrub_rppool_failed_elapsed, ///< time from start to failure
173+
l_osd_scrub_rppool_write_intersects, ///< client write op intersects chunk range
174+
l_osd_scrub_rppool_write_blocked, ///< write op did not preempt the scrub
173175

174176
// ---- scrub reservation process - replicated pools
175177

@@ -196,6 +198,8 @@ enum osd_counter_idx_t {
196198
l_osd_scrub_ec_successful_elapsed, ///< time to complete a successful scrub
197199
l_osd_scrub_ec_failed, ///< failed scrubs count
198200
l_osd_scrub_ec_failed_elapsed, ///< time from start to failure
201+
l_osd_scrub_ec_write_intersects, ///< client write op intersects chunk range
202+
l_osd_scrub_ec_write_blocked, ///< write op did not preempt the scrub
199203

200204
// ---- scrub reservation process - EC
201205

src/osd/scrubber/pg_scrubber.cc

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -911,8 +911,8 @@ std::optional<uint64_t> PgScrubber::select_range()
911911
const int max_from_conf = static_cast<int>(size_from_conf(
912912
m_is_deep, conf, osd_scrub_chunk_max, osd_shallow_scrub_chunk_max));
913913

914+
const int min_chunk_sz = std::max(3, min_from_conf);
914915
const int divisor = static_cast<int>(preemption_data.chunk_divisor());
915-
const int min_chunk_sz = std::max(3, min_from_conf / divisor);
916916
const int max_chunk_sz = std::max(min_chunk_sz, max_from_conf / divisor);
917917

918918
dout(10) << fmt::format(
@@ -1020,7 +1020,8 @@ bool PgScrubber::write_blocked_by_scrub(const hobject_t& soid)
10201020
return false;
10211021
}
10221022

1023-
get_labeled_counters()->inc(scrbcnt_write_blocked);
1023+
const auto& unlabeled_cntrs_idx = get_unlabeled_counters();
1024+
get_osd_perf_counters()->inc(unlabeled_cntrs_idx.write_intersects);
10241025
dout(20) << __func__ << " " << soid << " can preempt? "
10251026
<< preemption_data.is_preemptable() << " already preempted? "
10261027
<< preemption_data.was_preempted() << dendl;
@@ -1042,6 +1043,10 @@ bool PgScrubber::write_blocked_by_scrub(const hobject_t& soid)
10421043

10431044
return false;
10441045
}
1046+
1047+
get_osd_perf_counters()->inc(unlabeled_cntrs_idx.write_blocked);
1048+
// to be removed in version 'Umbrella':
1049+
get_labeled_counters()->inc(scrbcnt_write_blocked);
10451050
return true;
10461051
}
10471052

src/osd/scrubber/pg_scrubber.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -150,6 +150,8 @@ static inline constexpr ScrubCounterSet io_counters_replicated{
150150
.successful_elapsed = l_osd_scrub_rppool_successful_elapsed,
151151
.failed_cnt = l_osd_scrub_rppool_failed,
152152
.failed_elapsed = l_osd_scrub_rppool_failed_elapsed,
153+
.write_intersects = l_osd_scrub_rppool_write_intersects,
154+
.write_blocked = l_osd_scrub_rppool_write_blocked,
153155
// replica-reservation-related:
154156
.rsv_successful_cnt = l_osd_scrub_rppool_reserv_success,
155157
.rsv_successful_elapsed = l_osd_scrub_rppool_reserv_successful_elapsed,
@@ -175,6 +177,8 @@ static inline constexpr ScrubCounterSet io_counters_ec{
175177
.successful_elapsed = l_osd_scrub_ec_successful_elapsed,
176178
.failed_cnt = l_osd_scrub_ec_failed,
177179
.failed_elapsed = l_osd_scrub_ec_failed_elapsed,
180+
.write_intersects = l_osd_scrub_ec_write_intersects,
181+
.write_blocked = l_osd_scrub_ec_write_blocked,
178182
// replica-reservation-related:
179183
.rsv_successful_cnt = l_osd_scrub_ec_reserv_success,
180184
.rsv_successful_elapsed = l_osd_scrub_ec_reserv_successful_elapsed,

src/osd/scrubber_common.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -323,6 +323,8 @@ struct ScrubCounterSet {
323323
osd_counter_idx_t successful_elapsed; ///< time to complete a successful scrub
324324
osd_counter_idx_t failed_cnt; ///< failed scrubs count
325325
osd_counter_idx_t failed_elapsed; ///< time from start to failure
326+
osd_counter_idx_t write_intersects; ///< client write op intersects chunk range
327+
osd_counter_idx_t write_blocked; ///< write op did not preempt the scrub
326328
// reservation process related:
327329
osd_counter_idx_t rsv_successful_cnt; ///< completed reservation processes
328330
osd_counter_idx_t rsv_successful_elapsed; ///< time to all-reserved

0 commit comments

Comments
 (0)