Skip to content

Commit 52a1268

Browse files
authored
Merge pull request ceph#55340 from ronen-fr/wip-rf-reserver2
osd/scrub: use an AsyncReserver to handle scrub reservations on the replica side Reviewed-by: Samuel Just <[email protected]>-
2 parents c43054f + 6fa0fa3 commit 52a1268

File tree

16 files changed

+370
-200
lines changed

16 files changed

+370
-200
lines changed

src/common/AsyncReserver.h

Lines changed: 9 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,9 @@
1616
#define ASYNC_RESERVER_H
1717

1818
#include "common/Formatter.h"
19+
#include "common/ceph_context.h"
20+
#include "common/ceph_mutex.h"
21+
#include "include/Context.h"
1922

2023
#define rdout(x) lgeneric_subdout(cct,reserver,x)
2124

@@ -110,8 +113,10 @@ class AsyncReserver {
110113
if (it->second.empty()) {
111114
queues.erase(it);
112115
}
113-
f->queue(p.grant);
114-
p.grant = nullptr;
116+
if (p.grant) {
117+
f->queue(p.grant);
118+
p.grant = nullptr;
119+
}
115120
in_progress[p.item] = p;
116121
if (p.preempt) {
117122
preempt_by_prio.insert(std::make_pair(p.prio, p.item));
@@ -271,8 +276,7 @@ class AsyncReserver {
271276
* active reservations.
272277
*/
273278
bool request_reservation_or_fail(
274-
T item, ///< [in] reservation key
275-
Context *on_reserved ///< [in] callback to be called on reservation
279+
T item ///< [in] reservation key
276280
)
277281
{
278282
std::lock_guard l(lock);
@@ -285,7 +289,7 @@ class AsyncReserver {
285289
}
286290

287291
const unsigned prio = UINT_MAX;
288-
Reservation r(item, prio, on_reserved, nullptr);
292+
Reservation r(item, prio, nullptr, nullptr);
289293
queues[prio].push_back(r);
290294
queue_pointers.insert(std::make_pair(
291295
item, std::make_pair(prio, --(queues[prio]).end())));

src/common/options/osd.yaml.in

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -535,6 +535,16 @@ options:
535535
see_also:
536536
- osd_scrub_slow_reservation_response
537537
with_legacy: false
538+
- name: osd_scrub_disable_reservation_queuing
539+
type: bool
540+
level: advanced
541+
desc: Disable queuing of scrub reservations
542+
long_desc: When set - scrub replica reservations are responded to immediately, with
543+
either success or failure (the pre-Squid version behaviour). This configuration
544+
option is introduced to support mixed-version clusters and debugging, and will
545+
be removed in the next release.
546+
default: false
547+
with_legacy: false
538548
# where rados plugins are stored
539549
- name: osd_class_dir
540550
type: str

src/messages/MOSDScrubReserve.h

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@ class MOSDScrubReserve : public MOSDFastDispatchOp {
3737
reservation_nonce_t reservation_nonce{0};
3838
/// 'false' if the (legacy) primary is expecting an immediate
3939
/// granted / denied response
40-
bool wait_for_resources{false};
40+
bool wait_for_resources{true};
4141

4242
epoch_t get_map_epoch() const override {
4343
return map_epoch;
@@ -63,23 +63,23 @@ class MOSDScrubReserve : public MOSDFastDispatchOp {
6363
}
6464

6565
void print(std::ostream& out) const {
66-
out << "MOSDScrubReserve(" << pgid << " ";
66+
out << "MOSDScrubReserve(" << pgid << ",";
6767
switch (type) {
6868
case REQUEST:
69-
out << (wait_for_resources ? "QREQUEST " : "REQUEST ");
69+
out << (wait_for_resources ? "QREQUEST" : "REQUEST");
7070
break;
7171
case GRANT:
72-
out << "GRANT ";
72+
out << "GRANT";
7373
break;
7474
case REJECT:
75-
out << "REJECT ";
75+
out << "REJECT";
7676
break;
7777
case RELEASE:
78-
out << "RELEASE ";
78+
out << "RELEASE";
7979
break;
8080
}
81-
out << "e" << map_epoch << " from: " << from
82-
<< " reservation_nonce: " << reservation_nonce << ")";
81+
out << ",e:" << map_epoch << ",from:" << from
82+
<< ",reservation_nonce:" << reservation_nonce << ")";
8383
return;
8484
}
8585

src/osd/OSD.cc

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -273,6 +273,8 @@ OSDService::OSDService(OSD *osd, ceph::async::io_context_pool& poolctx) :
273273
cct->_conf->osd_min_recovery_priority),
274274
snap_reserver(cct, &reserver_finisher,
275275
cct->_conf->osd_max_trimming_pgs),
276+
scrub_reserver(cct, &reserver_finisher,
277+
cct->_conf->osd_max_scrubs),
276278
recovery_ops_active(0),
277279
recovery_ops_reserved(0),
278280
recovery_paused(false),
@@ -2891,7 +2893,7 @@ will start to track new ops received afterwards.";
28912893
f->close_section();
28922894
} else if (prefix == "dump_scrub_reservations") {
28932895
f->open_object_section("scrub_reservations");
2894-
service.get_scrub_services().resource_bookkeeper().dump_scrub_reservations(f);
2896+
service.get_scrub_services().dump_scrub_reservations(f);
28952897
f->close_section();
28962898
} else if (prefix == "get_latest_osdmap") {
28972899
get_latest_osdmap();
@@ -9889,6 +9891,9 @@ void OSD::handle_conf_change(const ConfigProxy& conf,
98899891
if (changed.count("osd_max_trimming_pgs")) {
98909892
service.snap_reserver.set_max(cct->_conf->osd_max_trimming_pgs);
98919893
}
9894+
if (changed.count("osd_max_scrubs")) {
9895+
service.scrub_reserver.set_max(cct->_conf->osd_max_scrubs);
9896+
}
98929897
if (changed.count("osd_op_complaint_time") ||
98939898
changed.count("osd_op_log_threshold")) {
98949899
op_tracker.set_complaint_and_threshold(cct->_conf->osd_op_complaint_time,

src/osd/OSD.h

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -251,6 +251,14 @@ class OSDService : public Scrub::ScrubSchedListener {
251251
*/
252252
std::optional<PGLockWrapper> get_locked_pg(spg_t pgid) final;
253253

254+
/**
255+
* the entity that counts the number of active replica scrub
256+
* operations, and grant scrub reservation requests asynchronously.
257+
*/
258+
AsyncReserver<spg_t, Finisher>& get_scrub_reserver() {
259+
return scrub_reserver;
260+
}
261+
254262
private:
255263
// -- agent shared state --
256264
ceph::mutex agent_lock = ceph::make_mutex("OSDService::agent_lock");
@@ -494,6 +502,8 @@ class OSDService : public Scrub::ScrubSchedListener {
494502
void send_pg_created();
495503

496504
AsyncReserver<spg_t, Finisher> snap_reserver;
505+
/// keeping track of replicas being reserved for scrubbing
506+
AsyncReserver<spg_t, Finisher> scrub_reserver;
497507
void queue_recovery_context(PG *pg,
498508
GenContext<ThreadPool::TPHandle&> *c,
499509
uint64_t cost,

src/osd/scrubber/osd_scrub.cc

Lines changed: 8 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,14 @@ void OsdScrub::dump_scrubs(ceph::Formatter* f) const
5757
m_queue.dump_scrubs(f);
5858
}
5959

60+
void OsdScrub::dump_scrub_reservations(ceph::Formatter* f) const
61+
{
62+
m_resource_bookkeeper.dump_scrub_reservations(f);
63+
f->open_array_section("remote_scrub_reservations");
64+
m_osd_svc.get_scrub_reserver().dump(f);
65+
f->close_section();
66+
}
67+
6068
void OsdScrub::log_fwd(std::string_view text)
6169
{
6270
dout(20) << text << dendl;
@@ -468,21 +476,6 @@ void OsdScrub::dec_scrubs_local()
468476
m_resource_bookkeeper.dec_scrubs_local();
469477
}
470478

471-
bool OsdScrub::inc_scrubs_remote(pg_t pgid)
472-
{
473-
return m_resource_bookkeeper.inc_scrubs_remote(pgid);
474-
}
475-
476-
void OsdScrub::enqueue_remote_reservation(pg_t pgid)
477-
{
478-
m_resource_bookkeeper.enqueue_remote_reservation(pgid);
479-
}
480-
481-
void OsdScrub::dec_scrubs_remote(pg_t pgid)
482-
{
483-
m_resource_bookkeeper.dec_scrubs_remote(pgid);
484-
}
485-
486479
void OsdScrub::mark_pg_scrub_blocked(spg_t blocked_pg)
487480
{
488481
m_queue.mark_pg_scrub_blocked(blocked_pg);

src/osd/scrubber/osd_scrub.h

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,8 @@ class OsdScrub {
5151

5252
void dump_scrubs(ceph::Formatter* f) const; ///< fwd to the queue
5353

54+
void dump_scrub_reservations(ceph::Formatter* f) const;
55+
5456
/**
5557
* on_config_change() (the refactored "OSD::sched_all_scrubs()")
5658
*
@@ -69,9 +71,6 @@ class OsdScrub {
6971
std::unique_ptr<Scrub::LocalResourceWrapper> inc_scrubs_local(
7072
bool is_high_priority);
7173
void dec_scrubs_local();
72-
bool inc_scrubs_remote(pg_t pgid);
73-
void enqueue_remote_reservation(pg_t pgid);
74-
void dec_scrubs_remote(pg_t pgid);
7574

7675
// counting the number of PGs stuck while scrubbing, waiting for objects
7776
void mark_pg_scrub_blocked(spg_t blocked_pg);

src/osd/scrubber/osd_scrub_sched.h

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -108,6 +108,7 @@ ScrubQueue interfaces (main functions):
108108
// clang-format on
109109

110110
#include <optional>
111+
#include "common/AsyncReserver.h"
111112
#include "utime.h"
112113
#include "osd/scrubber/scrub_job.h"
113114
#include "osd/PG.h"
@@ -135,6 +136,12 @@ class ScrubSchedListener {
135136
*/
136137
virtual std::optional<PGLockWrapper> get_locked_pg(spg_t pgid) = 0;
137138

139+
/**
140+
* allow access to the scrub_reserver, the AsyncReserver that keeps track
141+
* of 'remote replica reservations'.
142+
*/
143+
virtual AsyncReserver<spg_t, Finisher>& get_scrub_reserver() = 0;
144+
138145
virtual ~ScrubSchedListener() {}
139146
};
140147

src/osd/scrubber/pg_scrubber.cc

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -397,6 +397,15 @@ void PgScrubber::send_scrub_is_finished(epoch_t epoch_queued)
397397
dout(10) << "scrubber event --<< " << __func__ << dendl;
398398
}
399399

400+
void PgScrubber::send_granted_by_reserver(const AsyncScrubResData& req)
401+
{
402+
dout(10) << "scrubber event -->> granted_by_reserver" << dendl;
403+
if (check_interval(req.request_epoch)) {
404+
m_fsm->process_event(Scrub::ReserverGranted{req});
405+
}
406+
dout(10) << "scrubber event --<< granted_by_reserver" << dendl;
407+
}
408+
400409
// -----------------
401410

402411
bool PgScrubber::is_reserving() const

src/osd/scrubber/pg_scrubber.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -227,6 +227,8 @@ class PgScrubber : public ScrubPgIF,
227227

228228
void send_scrub_is_finished(epoch_t epoch_queued) final;
229229

230+
void send_granted_by_reserver(const AsyncScrubResData& req) final;
231+
230232
/**
231233
* we allow some number of preemptions of the scrub, which mean we do
232234
* not block. Then we start to block. Once we start blocking, we do

0 commit comments

Comments
 (0)