Skip to content

Commit c43542f

Browse files
committed
crimson/osd: cancel ongoing pglog-based recoveries on recovery defering
Previously, we rely on checking `PG::is_recovery()` in `PGRecevery::start_recovery_ops()` to determine whether it's still valid to proceed the recovery. This turns out to be inefficient, for example: 1. PG `P` is under recovery, and `PGRecovery::start_recovery_ops()` is called; 2. PG `P`'s recovery is deferred, and `PGRecovery::start_recovery_ops()` is blocked on waiting for external replies; 3. Before the arrivals of external replies, PG `P` resumes recovery and a new round of `PGRecovery::start_recovery_ops()` starts; 4. The external replies arrives and the old `PGRecovery::start_recovery_ops()` continues as `PG:is_recovering()` returns true. In the above case, we get two duplicated recovery ops, which is incorrect. Fixes: https://tracker.ceph.com/issues/67380 Signed-off-by: Xuehan Xu <[email protected]>
1 parent 601fcfa commit c43542f

File tree

10 files changed

+66
-7
lines changed

10 files changed

+66
-7
lines changed

src/crimson/osd/osd_operations/background_recovery.cc

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -158,6 +158,8 @@ PglogBasedRecovery::PglogBasedRecovery(
158158
PglogBasedRecovery::interruptible_future<bool>
159159
PglogBasedRecovery::do_recovery()
160160
{
161+
LOG_PREFIX(PglogBasedRecovery::do_recovery);
162+
DEBUGDPPI("{}: {}", *pg, __func__, *this);
161163
if (pg->has_reset_since(epoch_started)) {
162164
return seastar::make_ready_future<bool>(false);
163165
}
@@ -167,6 +169,7 @@ PglogBasedRecovery::do_recovery()
167169
interruptor>([this] (auto&& trigger) {
168170
return pg->get_recovery_handler()->start_recovery_ops(
169171
trigger,
172+
*this,
170173
crimson::common::local_conf()->osd_recovery_max_single_start);
171174
});
172175
});

src/crimson/osd/osd_operations/background_recovery.h

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -91,8 +91,20 @@ class PglogBasedRecovery final : public BackgroundRecoveryT<PglogBasedRecovery>
9191
RecoveryBackend::RecoveryBlockingEvent
9292
> tracking_events;
9393

94+
void cancel() {
95+
cancelled = true;
96+
}
97+
98+
bool is_cancelled() const {
99+
return cancelled;
100+
}
101+
102+
epoch_t get_epoch_started() const {
103+
return epoch_started;
104+
}
94105
private:
95106
interruptible_future<bool> do_recovery() override;
107+
bool cancelled = false;
96108
};
97109

98110
class BackfillRecovery final : public BackgroundRecoveryT<BackfillRecovery> {

src/crimson/osd/pg.cc

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1659,6 +1659,7 @@ void PG::on_change(ceph::os::Transaction &t) {
16591659
peering_state.state_clear(PG_STATE_SNAPTRIM);
16601660
peering_state.state_clear(PG_STATE_SNAPTRIM_ERROR);
16611661
snap_mapper.reset_backend();
1662+
reset_pglog_based_recovery_op();
16621663
}
16631664

16641665
void PG::context_registry_on_change() {
@@ -1798,4 +1799,19 @@ void PG::PGLogEntryHandler::remove(const hobject_t &soid) {
17981799
DEBUGDPP("remove {} on pglog rollback", *pg, soid);
17991800
pg->remove_maybe_snapmapped_object(*t, soid);
18001801
}
1802+
1803+
void PG::set_pglog_based_recovery_op(PglogBasedRecovery *op) {
1804+
ceph_assert(!pglog_based_recovery_op);
1805+
pglog_based_recovery_op = op;
1806+
}
1807+
1808+
void PG::reset_pglog_based_recovery_op() {
1809+
pglog_based_recovery_op = nullptr;
1810+
}
1811+
1812+
void PG::cancel_pglog_based_recovery_op() {
1813+
ceph_assert(pglog_based_recovery_op);
1814+
pglog_based_recovery_op->cancel();
1815+
reset_pglog_based_recovery_op();
1816+
}
18011817
}

src/crimson/osd/pg.h

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,7 @@ namespace crimson::osd {
6464
class OpsExecuter;
6565
class BackfillRecovery;
6666
class SnapTrimEvent;
67+
class PglogBasedRecovery;
6768

6869
class PG : public boost::intrusive_ref_counter<
6970
PG,
@@ -433,6 +434,10 @@ class PG : public boost::intrusive_ref_counter<
433434
recovery_handler->backfill_cancelled();
434435
}
435436

437+
void on_recovery_cancelled() final {
438+
cancel_pglog_based_recovery_op();
439+
}
440+
436441
void on_recovery_reserved() final {
437442
recovery_handler->start_pglogbased_recovery();
438443
}
@@ -838,13 +843,18 @@ class PG : public boost::intrusive_ref_counter<
838843
return can_discard_replica_op(m, m.get_map_epoch());
839844
}
840845

846+
void set_pglog_based_recovery_op(PglogBasedRecovery *op) final;
847+
void reset_pglog_based_recovery_op() final;
848+
void cancel_pglog_based_recovery_op();
849+
841850
private:
842851
// instead of seastar::gate, we use a boolean flag to indicate
843852
// whether the system is shutting down, as we don't need to track
844853
// continuations here.
845854
bool stopping = false;
846855

847856
PGActivationBlocker wait_for_active_blocker;
857+
PglogBasedRecovery* pglog_based_recovery_op = nullptr;
848858

849859
friend std::ostream& operator<<(std::ostream&, const PG& pg);
850860
friend class ClientRequest;

src/crimson/osd/pg_recovery.cc

Lines changed: 15 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -24,29 +24,33 @@ namespace {
2424

2525
using std::map;
2626
using std::set;
27+
using PglogBasedRecovery = crimson::osd::PglogBasedRecovery;
2728

2829
void PGRecovery::start_pglogbased_recovery()
2930
{
30-
using PglogBasedRecovery = crimson::osd::PglogBasedRecovery;
31-
(void) pg->get_shard_services().start_operation<PglogBasedRecovery>(
31+
auto [op, fut] = pg->get_shard_services().start_operation<PglogBasedRecovery>(
3232
static_cast<crimson::osd::PG*>(pg),
3333
pg->get_shard_services(),
3434
pg->get_osdmap_epoch(),
3535
float(0.001));
36+
pg->set_pglog_based_recovery_op(op.get());
3637
}
3738

3839
PGRecovery::interruptible_future<bool>
3940
PGRecovery::start_recovery_ops(
4041
RecoveryBackend::RecoveryBlockingEvent::TriggerI& trigger,
42+
PglogBasedRecovery &recover_op,
4143
size_t max_to_start)
4244
{
4345
assert(pg->is_primary());
4446
assert(pg->is_peered());
4547

46-
if (!pg->is_recovering() && !pg->is_backfilling()) {
47-
logger().debug("recovery raced and were queued twice, ignoring!");
48+
if (pg->has_reset_since(recover_op.get_epoch_started()) ||
49+
recover_op.is_cancelled()) {
50+
logger().debug("recovery {} cancelled.", recover_op);
4851
return seastar::make_ready_future<bool>(false);
4952
}
53+
ceph_assert(pg->is_recovering());
5054

5155
// in ceph-osd the do_recovery() path handles both the pg log-based
5256
// recovery and the backfill, albeit they are separated at the layer
@@ -68,12 +72,15 @@ PGRecovery::start_recovery_ops(
6872
return interruptor::parallel_for_each(started,
6973
[] (auto&& ifut) {
7074
return std::move(ifut);
71-
}).then_interruptible([this] {
75+
}).then_interruptible([this, &recover_op] {
7276
//TODO: maybe we should implement a recovery race interruptor in the future
73-
if (!pg->is_recovering() && !pg->is_backfilling()) {
74-
logger().debug("recovery raced and were queued twice, ignoring!");
77+
if (pg->has_reset_since(recover_op.get_epoch_started()) ||
78+
recover_op.is_cancelled()) {
79+
logger().debug("recovery {} cancelled.", recover_op);
7580
return seastar::make_ready_future<bool>(false);
7681
}
82+
ceph_assert(pg->is_recovering());
83+
ceph_assert(!pg->is_backfilling());
7784

7885
bool done = !pg->get_peering_state().needs_recovery();
7986
if (done) {
@@ -101,6 +108,7 @@ PGRecovery::start_recovery_ops(
101108
pg->get_osdmap_epoch(),
102109
PeeringState::RequestBackfill{});
103110
}
111+
pg->reset_pglog_based_recovery_op();
104112
}
105113
return seastar::make_ready_future<bool>(!done);
106114
});

src/crimson/osd/pg_recovery.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717

1818
namespace crimson::osd {
1919
class UrgentRecovery;
20+
class PglogBasedRecovery;
2021
}
2122

2223
class MOSDPGBackfillRemove;
@@ -32,6 +33,7 @@ class PGRecovery : public crimson::osd::BackfillState::BackfillListener {
3233

3334
interruptible_future<bool> start_recovery_ops(
3435
RecoveryBackend::RecoveryBlockingEvent::TriggerI&,
36+
crimson::osd::PglogBasedRecovery &recover_op,
3537
size_t max_to_start);
3638
void on_backfill_reserved();
3739
void dispatch_backfill_event(

src/crimson/osd/pg_recovery_listener.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111

1212
namespace crimson::osd {
1313
class ShardServices;
14+
class PglogBasedRecovery;
1415
};
1516

1617
class RecoveryBackend;
@@ -38,4 +39,7 @@ class PGRecoveryListener {
3839
virtual void publish_stats_to_osd() = 0;
3940
virtual OSDriver &get_osdriver() = 0;
4041
virtual SnapMapper &get_snap_mapper() = 0;
42+
virtual void set_pglog_based_recovery_op(
43+
crimson::osd::PglogBasedRecovery *op) = 0;
44+
virtual void reset_pglog_based_recovery_op() = 0;
4145
};

src/osd/PG.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -612,6 +612,7 @@ class PG : public DoutPrefixProvider,
612612

613613
void on_backfill_reserved() override;
614614
void on_backfill_canceled() override;
615+
void on_recovery_cancelled() override {}
615616
void on_recovery_reserved() override;
616617

617618
bool is_forced_recovery_or_backfill() const {

src/osd/PeeringState.cc

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5827,6 +5827,7 @@ PeeringState::Recovering::react(const DeferRecovery &evt)
58275827
ps->state_set(PG_STATE_RECOVERY_WAIT);
58285828
pl->cancel_local_background_io_reservation();
58295829
release_reservations(true);
5830+
pl->on_recovery_cancelled();
58305831
pl->schedule_event_after(
58315832
std::make_shared<PGPeeringEvent>(
58325833
ps->get_osdmap_epoch(),
@@ -5844,6 +5845,7 @@ PeeringState::Recovering::react(const UnfoundRecovery &evt)
58445845
ps->state_set(PG_STATE_RECOVERY_UNFOUND);
58455846
pl->cancel_local_background_io_reservation();
58465847
release_reservations(true);
5848+
pl->on_recovery_cancelled();
58475849
return transit<NotRecovering>();
58485850
}
58495851

src/osd/PeeringState.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -419,6 +419,7 @@ class PeeringState : public MissingLoc::MappingInfo {
419419
virtual void on_backfill_reserved() = 0;
420420
virtual void on_backfill_canceled() = 0;
421421
virtual void on_recovery_reserved() = 0;
422+
virtual void on_recovery_cancelled() = 0;
422423

423424
// ================recovery space accounting ================
424425
virtual bool try_reserve_recovery_space(

0 commit comments

Comments
 (0)