Skip to content

Commit d9cf7a0

Browse files
authored
Merge pull request ceph#60185 from xxhdx1985126/wip-68306
crimson/osd/pg_recovery: trigger BackfillState events synchronously Reviewed-by: Matan Breizman <[email protected]> Reviewed-by: Radosław Zarzyński <[email protected]>
2 parents ef4e55b + 03ed3af commit d9cf7a0

File tree

9 files changed

+39
-124
lines changed

9 files changed

+39
-124
lines changed

src/crimson/osd/backfill_state.h

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -96,6 +96,9 @@ struct BackfillState {
9696
BackfillListener& backfill_listener;
9797
std::unique_ptr<PeeringFacade> peering_state;
9898
std::unique_ptr<PGFacade> pg;
99+
void post_event(const sc::event_base &e) {
100+
sc::state_machine<BackfillMachine, Initial>::post_event(e);
101+
}
99102
};
100103

101104
private:
@@ -298,6 +301,10 @@ struct BackfillState {
298301
const std::vector<pg_shard_t> &peers);
299302

300303

304+
void post_event(boost::intrusive_ptr<const sc::event_base> evt) {
305+
backfill_machine.post_event(*std::move(evt));
306+
}
307+
301308
bool is_triggered() const {
302309
return backfill_machine.triggering_event() != nullptr;
303310
}

src/crimson/osd/osd_operation_external_tracking.h

Lines changed: 0 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -342,13 +342,6 @@ struct EventBackendRegistry<osd::RecoverySubRequest> {
342342
}
343343
};
344344

345-
template <>
346-
struct EventBackendRegistry<osd::BackfillRecovery> {
347-
static std::tuple<> get_backends() {
348-
return {};
349-
}
350-
};
351-
352345
template <>
353346
struct EventBackendRegistry<osd::PGAdvanceMap> {
354347
static std::tuple<> get_backends() {

src/crimson/osd/osd_operations/background_recovery.cc

Lines changed: 0 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -175,41 +175,7 @@ PglogBasedRecovery::do_recovery()
175175
});
176176
}
177177

178-
PGPeeringPipeline &BackfillRecovery::peering_pp(PG &pg)
179-
{
180-
return pg.peering_request_pg_pipeline;
181-
}
182-
183-
BackfillRecovery::interruptible_future<bool>
184-
BackfillRecovery::do_recovery()
185-
{
186-
LOG_PREFIX(BackfillRecovery::do_recovery);
187-
DEBUGDPPI("{}", *pg, __func__);
188-
189-
if (pg->has_reset_since(epoch_started)) {
190-
DEBUGDPPI("{}: pg got reset since epoch_started={}",
191-
*pg, __func__, epoch_started);
192-
return seastar::make_ready_future<bool>(false);
193-
}
194-
// TODO: limits
195-
return enter_stage<interruptor>(
196-
// process_event() of our boost::statechart machine is non-reentrant.
197-
// with the backfill_pipeline we protect it from a second entry from
198-
// the implementation of BackfillListener.
199-
// additionally, this stage serves to synchronize with PeeringEvent.
200-
peering_pp(*pg).process
201-
).then_interruptible([this] {
202-
pg->get_recovery_handler()->dispatch_backfill_event(std::move(evt));
203-
return handle.complete();
204-
}).then_interruptible([] {
205-
return seastar::make_ready_future<bool>(false);
206-
}).finally([this] {
207-
handle.exit();
208-
});
209-
}
210-
211178
template class BackgroundRecoveryT<UrgentRecovery>;
212179
template class BackgroundRecoveryT<PglogBasedRecovery>;
213-
template class BackgroundRecoveryT<BackfillRecovery>;
214180

215181
} // namespace crimson::osd

src/crimson/osd/osd_operations/background_recovery.h

Lines changed: 3 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -52,10 +52,9 @@ class BackgroundRecoveryT : public PhasedOperationT<T> {
5252

5353
/// represent a recovery initiated for serving a client request
5454
///
55-
/// unlike @c PglogBasedRecovery and @c BackfillRecovery,
56-
/// @c UrgentRecovery is not throttled by the scheduler. and it
57-
/// utilizes @c RecoveryBackend directly to recover the unreadable
58-
/// object.
55+
/// unlike @c PglogBasedRecovery, @c UrgentRecovery is not throttled
56+
/// by the scheduler. and it utilizes @c RecoveryBackend directly to
57+
/// recover the unreadable object.
5958
class UrgentRecovery final : public BackgroundRecoveryT<UrgentRecovery> {
6059
public:
6160
UrgentRecovery(
@@ -107,49 +106,9 @@ class PglogBasedRecovery final : public BackgroundRecoveryT<PglogBasedRecovery>
107106
bool cancelled = false;
108107
};
109108

110-
class BackfillRecovery final : public BackgroundRecoveryT<BackfillRecovery> {
111-
public:
112-
113-
template <class EventT>
114-
BackfillRecovery(
115-
Ref<PG> pg,
116-
ShardServices &ss,
117-
epoch_t epoch_started,
118-
const EventT& evt);
119-
120-
PipelineHandle& get_handle() { return handle; }
121-
122-
std::tuple<
123-
OperationThrottler::BlockingEvent,
124-
PGPeeringPipeline::Process::BlockingEvent
125-
> tracking_events;
126-
127-
private:
128-
boost::intrusive_ptr<const boost::statechart::event_base> evt;
129-
PipelineHandle handle;
130-
131-
static PGPeeringPipeline &peering_pp(PG &pg);
132-
interruptible_future<bool> do_recovery() override;
133-
};
134-
135-
template <class EventT>
136-
BackfillRecovery::BackfillRecovery(
137-
Ref<PG> pg,
138-
ShardServices &ss,
139-
const epoch_t epoch_started,
140-
const EventT& evt)
141-
: BackgroundRecoveryT(
142-
std::move(pg),
143-
ss,
144-
epoch_started,
145-
crimson::osd::scheduler::scheduler_class_t::background_best_effort),
146-
evt(evt.intrusive_from_this())
147-
{}
148-
149109
}
150110

151111
#if FMT_VERSION >= 90000
152-
template <> struct fmt::formatter<crimson::osd::BackfillRecovery> : fmt::ostream_formatter {};
153112
template <> struct fmt::formatter<crimson::osd::PglogBasedRecovery> : fmt::ostream_formatter {};
154113
template <> struct fmt::formatter<crimson::osd::UrgentRecovery> : fmt::ostream_formatter {};
155114
template <class T> struct fmt::formatter<crimson::osd::BackgroundRecoveryT<T>> : fmt::ostream_formatter {};

src/crimson/osd/osd_operations/peering_event.h

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,6 @@ namespace crimson::osd {
2121
class OSD;
2222
class ShardServices;
2323
class PG;
24-
class BackfillRecovery;
2524

2625
template <class T>
2726
class PeeringEvent : public PhasedOperationT<T> {

src/crimson/osd/pg.h

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -64,7 +64,6 @@ namespace crimson::os {
6464

6565
namespace crimson::osd {
6666
class OpsExecuter;
67-
class BackfillRecovery;
6867
class SnapTrimEvent;
6968
class PglogBasedRecovery;
7069

@@ -892,7 +891,6 @@ class PG : public boost::intrusive_ref_counter<
892891
friend class RepRequest;
893892
friend class LogMissingRequest;
894893
friend class LogMissingRequestReply;
895-
friend class BackfillRecovery;
896894
friend struct PGFacade;
897895
friend class InternalClientRequest;
898896
friend class WatchTimeoutRequest;

src/crimson/osd/pg_recovery.cc

Lines changed: 12 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -477,17 +477,6 @@ void PGRecovery::_committed_pushed_object(epoch_t epoch,
477477
}
478478
}
479479

480-
template <class EventT>
481-
void PGRecovery::start_backfill_recovery(const EventT& evt)
482-
{
483-
using BackfillRecovery = crimson::osd::BackfillRecovery;
484-
std::ignore = pg->get_shard_services().start_operation<BackfillRecovery>(
485-
static_cast<crimson::osd::PG*>(pg),
486-
pg->get_shard_services(),
487-
pg->get_osdmap_epoch(),
488-
evt);
489-
}
490-
491480
void PGRecovery::request_replica_scan(
492481
const pg_shard_t& target,
493482
const hobject_t& begin,
@@ -520,7 +509,8 @@ void PGRecovery::request_primary_scan(
520509
).then_interruptible([this] (BackfillInterval bi) {
521510
logger().debug("request_primary_scan:{}", __func__);
522511
using BackfillState = crimson::osd::BackfillState;
523-
start_backfill_recovery(BackfillState::PrimaryScanned{ std::move(bi) });
512+
backfill_state->process_event(
513+
BackfillState::PrimaryScanned{ std::move(bi) }.intrusive_from_this());
524514
});
525515
}
526516

@@ -542,7 +532,13 @@ void PGRecovery::enqueue_push(
542532
}).then_interruptible([this, obj] {
543533
logger().debug("enqueue_push:{}", __func__);
544534
using BackfillState = crimson::osd::BackfillState;
545-
start_backfill_recovery(BackfillState::ObjectPushed(std::move(obj)));
535+
if (backfill_state->is_triggered()) {
536+
backfill_state->post_event(
537+
BackfillState::ObjectPushed(std::move(obj)).intrusive_from_this());
538+
} else {
539+
backfill_state->process_event(
540+
BackfillState::ObjectPushed(std::move(obj)).intrusive_from_this());
541+
}
546542
});
547543
}
548544

@@ -643,8 +639,6 @@ void PGRecovery::backfilled()
643639

644640
void PGRecovery::backfill_suspended()
645641
{
646-
// We are not creating a new BackfillRecovery request here, as we
647-
// need to cancel the backfill synchronously (before this method returns).
648642
using BackfillState = crimson::osd::BackfillState;
649643
backfill_state->process_event(
650644
BackfillState::SuspendBackfill{}.intrusive_from_this());
@@ -673,13 +667,7 @@ void PGRecovery::on_activate_complete()
673667
void PGRecovery::on_backfill_reserved()
674668
{
675669
logger().debug("{}", __func__);
676-
// yes, it's **not** backfilling yet. The PG_STATE_BACKFILLING
677-
// will be set after on_backfill_reserved() returns.
678-
// Backfill needs to take this into consideration when scheduling
679-
// events -- they must be mutually exclusive with PeeringEvent
680-
// instances. Otherwise the execution might begin without having
681-
// the state updated.
682-
ceph_assert(!pg->get_peering_state().is_backfilling());
670+
ceph_assert(pg->get_peering_state().is_backfilling());
683671
// let's be lazy with creating the backfill stuff
684672
using BackfillState = crimson::osd::BackfillState;
685673
if (!backfill_state) {
@@ -694,5 +682,6 @@ void PGRecovery::on_backfill_reserved()
694682
// it may be we either start a completely new backfill (first
695683
// event since last on_activate_complete()) or to resume already
696684
// (but stopped one).
697-
start_backfill_recovery(BackfillState::Triggered{});
685+
backfill_state->process_event(
686+
BackfillState::Triggered{}.intrusive_from_this());
698687
}

src/crimson/osd/recovery_backend.cc

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -335,11 +335,10 @@ RecoveryBackend::handle_scan_digest(
335335
bi.clear_objects();
336336
::decode_noclear(bi.objects, p);
337337
}
338-
shard_services.start_operation<crimson::osd::BackfillRecovery>(
339-
static_cast<crimson::osd::PG*>(&pg),
340-
shard_services,
341-
pg.get_osdmap_epoch(),
342-
crimson::osd::BackfillState::ReplicaScanned{ m.from, std::move(bi) });
338+
auto recovery_handler = pg.get_recovery_handler();
339+
recovery_handler->dispatch_backfill_event(
340+
crimson::osd::BackfillState::ReplicaScanned{
341+
m.from, std::move(bi) }.intrusive_from_this());
343342
return seastar::now();
344343
}
345344

src/osd/PeeringState.cc

Lines changed: 13 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -5079,10 +5079,10 @@ PeeringState::Backfilling::Backfilling(my_context ctx)
50795079

50805080
DECLARE_LOCALS;
50815081
ps->backfill_reserved = true;
5082-
pl->on_backfill_reserved();
50835082
ps->state_clear(PG_STATE_BACKFILL_TOOFULL);
50845083
ps->state_clear(PG_STATE_BACKFILL_WAIT);
50855084
ps->state_set(PG_STATE_BACKFILLING);
5085+
pl->on_backfill_reserved();
50865086
pl->publish_stats_to_osd();
50875087
}
50885088

@@ -5128,13 +5128,18 @@ PeeringState::Backfilling::react(const DeferBackfill &c)
51285128
ps->state_clear(PG_STATE_BACKFILLING);
51295129
suspend_backfill();
51305130

5131-
pl->schedule_event_after(
5132-
std::make_shared<PGPeeringEvent>(
5133-
ps->get_osdmap_epoch(),
5134-
ps->get_osdmap_epoch(),
5135-
RequestBackfill()),
5136-
c.delay);
5137-
return transit<NotBackfilling>();
5131+
if (ps->needs_backfill()) {
5132+
pl->schedule_event_after(
5133+
std::make_shared<PGPeeringEvent>(
5134+
ps->get_osdmap_epoch(),
5135+
ps->get_osdmap_epoch(),
5136+
RequestBackfill()),
5137+
c.delay);
5138+
return transit<NotBackfilling>();
5139+
} else {
5140+
// raced with MOSDPGBackfill::OP_BACKFILL_FINISH, ignore
5141+
return discard_event();
5142+
}
51385143
}
51395144

51405145
boost::statechart::result

0 commit comments

Comments
 (0)