Skip to content

Commit 1d6d750

Browse files
authored
Merge pull request ceph#60041 from xxhdx1985126/wip-68286
crimson/osd/pg_shard_manager: discard outdated operations when the corresponding pgs are already removed Reviewed-by: Matan Breizman <[email protected]>
2 parents 47292f6 + 1f1051d commit 1d6d750

File tree

12 files changed

+82
-14
lines changed

12 files changed

+82
-14
lines changed

src/crimson/osd/osd_operation.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -217,6 +217,9 @@ class TrackableOperationT : public OperationT<T> {
217217

218218
public:
219219
static constexpr bool is_trackable = true;
220+
virtual bool requires_pg() const {
221+
return true;
222+
}
220223
};
221224

222225
template <class T>

src/crimson/osd/osd_operations/client_request.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,10 @@ class ClientRequest final : public PhasedOperationT<ClientRequest>,
4242
unsigned instance_id = 0;
4343

4444
public:
45+
epoch_t get_epoch_sent_at() const {
46+
return m->get_map_epoch();
47+
}
48+
4549
/**
4650
* instance_handle_t
4751
*

src/crimson/osd/osd_operations/logmissing_request.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,9 @@ class LogMissingRequest final : public PhasedOperationT<LogMissingRequest> {
3636
}
3737
PipelineHandle &get_handle() { return handle; }
3838
epoch_t get_epoch() const { return req->get_min_epoch(); }
39+
epoch_t get_epoch_sent_at() const {
40+
return req->get_map_epoch();
41+
}
3942

4043
ConnectionPipeline &get_connection_pipeline();
4144

src/crimson/osd/osd_operations/logmissing_request_reply.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,9 @@ class LogMissingRequestReply final : public PhasedOperationT<LogMissingRequestRe
3636
}
3737
PipelineHandle &get_handle() { return handle; }
3838
epoch_t get_epoch() const { return req->get_min_epoch(); }
39+
epoch_t get_epoch_sent_at() const {
40+
return req->get_map_epoch();
41+
}
3942

4043
ConnectionPipeline &get_connection_pipeline();
4144

src/crimson/osd/osd_operations/peering_event.h

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,10 @@ class PeeringEvent : public PhasedOperationT<T> {
4444
float delay = 0;
4545
PGPeeringEvent evt;
4646

47+
epoch_t get_epoch_sent_at() const {
48+
return evt.get_epoch_sent();
49+
}
50+
4751
const pg_shard_t get_from() const {
4852
return from;
4953
}
@@ -84,6 +88,10 @@ class PeeringEvent : public PhasedOperationT<T> {
8488
evt(std::forward<Args>(args)...)
8589
{}
8690

91+
bool requires_pg() const final {
92+
return evt.requires_pg;
93+
}
94+
8795
void print(std::ostream &) const final;
8896
void dump_detail(ceph::Formatter* f) const final;
8997
seastar::future<> with_pg(

src/crimson/osd/osd_operations/pg_advance_map.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,10 @@ class PGAdvanceMap : public PhasedOperationT<PGAdvanceMap> {
5050
PGPeeringPipeline::Process::BlockingEvent
5151
> tracking_events;
5252

53+
epoch_t get_epoch_sent_at() const {
54+
return to;
55+
}
56+
5357
private:
5458
PGPeeringPipeline &peering_pp(PG &pg);
5559
};

src/crimson/osd/osd_operations/recovery_subrequest.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,9 @@ class RecoverySubRequest final : public PhasedOperationT<RecoverySubRequest> {
3939
}
4040
PipelineHandle &get_handle() { return handle; }
4141
epoch_t get_epoch() const { return m->get_min_epoch(); }
42+
epoch_t get_epoch_sent_at() const {
43+
return m->get_map_epoch();
44+
}
4245

4346
ConnectionPipeline &get_connection_pipeline();
4447

src/crimson/osd/osd_operations/replicated_request.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,9 @@ class RepRequest final : public PhasedOperationT<RepRequest> {
3636
}
3737
PipelineHandle &get_handle() { return handle; }
3838
epoch_t get_epoch() const { return req->get_min_epoch(); }
39+
epoch_t get_epoch_sent_at() const {
40+
return req->get_map_epoch();
41+
}
3942

4043
ConnectionPipeline &get_connection_pipeline();
4144

src/crimson/osd/osd_operations/scrub_events.h

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -27,11 +27,11 @@ class RemoteScrubEventBaseT : public PhasedOperationT<T> {
2727
crimson::net::ConnectionRef l_conn;
2828
crimson::net::ConnectionXcoreRef r_conn;
2929

30-
epoch_t epoch;
3130
spg_t pgid;
3231

3332
protected:
3433
using interruptor = InterruptibleOperation::interruptor;
34+
epoch_t epoch;
3535

3636
template <typename U=void>
3737
using ifut = InterruptibleOperation::interruptible_future<U>;
@@ -40,7 +40,7 @@ class RemoteScrubEventBaseT : public PhasedOperationT<T> {
4040
public:
4141
RemoteScrubEventBaseT(
4242
crimson::net::ConnectionRef conn, epoch_t epoch, spg_t pgid)
43-
: l_conn(std::move(conn)), epoch(epoch), pgid(pgid) {}
43+
: l_conn(std::move(conn)), pgid(pgid), epoch(epoch) {}
4444

4545
PGPeeringPipeline &get_peering_pipeline(PG &pg);
4646

@@ -117,6 +117,10 @@ class ScrubRequested final : public RemoteScrubEventBaseT<ScrubRequested> {
117117
: RemoteScrubEventBaseT<ScrubRequested>(std::forward<Args>(base_args)...),
118118
deep(deep) {}
119119

120+
epoch_t get_epoch_sent_at() const {
121+
return epoch;
122+
}
123+
120124
void print(std::ostream &out) const final {
121125
out << "(deep=" << deep << ")";
122126
}
@@ -141,6 +145,10 @@ class ScrubMessage final : public RemoteScrubEventBaseT<ScrubMessage> {
141145
ceph_assert(scrub::PGScrubber::is_scrub_message(*m));
142146
}
143147

148+
epoch_t get_epoch_sent_at() const {
149+
return epoch;
150+
}
151+
144152
void print(std::ostream &out) const final {
145153
out << "(m=" << *m << ")";
146154
}

src/crimson/osd/pg_shard_manager.h

Lines changed: 34 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -256,18 +256,40 @@ class PGShardManager {
256256
auto &opref = *op;
257257
return opref.template with_blocking_event<
258258
PGMap::PGCreationBlockingEvent
259-
>([&target_shard_services, &opref](auto &&trigger) {
260-
return target_shard_services.wait_for_pg(
261-
std::move(trigger), opref.get_pgid());
262-
}).safe_then([&logger, &target_shard_services, &opref](Ref<PG> pgref) {
263-
logger.debug("{}: have_pg", opref);
264-
return opref.with_pg(target_shard_services, pgref);
265-
}).handle_error(
266-
crimson::ct_error::ecanceled::handle([&logger, &opref](auto) {
267-
logger.debug("{}: pg creation canceled, dropping", opref);
268-
return seastar::now();
269-
})
270-
).then([op=std::move(op)] {});
259+
>([&target_shard_services, &opref, &logger](auto &&trigger) mutable {
260+
auto pg = target_shard_services.get_pg(opref.get_pgid());
261+
auto fut = ShardServices::wait_for_pg_ertr::make_ready_future<Ref<PG>>(pg);
262+
if (!pg) {
263+
if (opref.requires_pg()) {
264+
auto osdmap = target_shard_services.get_map();
265+
if (!osdmap->is_up_acting_osd_shard(
266+
opref.get_pgid(), target_shard_services.local_state.whoami)) {
267+
logger.debug(
268+
"pg {} for {} is no longer here, discarding",
269+
opref.get_pgid(), opref);
270+
opref.get_handle().exit();
271+
auto _fut = seastar::now();
272+
if (osdmap->get_epoch() > opref.get_epoch_sent_at()) {
273+
_fut = target_shard_services.send_incremental_map(
274+
std::ref(opref.get_foreign_connection()),
275+
opref.get_epoch_sent_at() + 1);
276+
}
277+
return _fut;
278+
}
279+
}
280+
fut = target_shard_services.wait_for_pg(
281+
std::move(trigger), opref.get_pgid());
282+
}
283+
return fut.safe_then([&logger, &target_shard_services, &opref](Ref<PG> pgref) {
284+
logger.debug("{}: have_pg", opref);
285+
return opref.with_pg(target_shard_services, pgref);
286+
}).handle_error(
287+
crimson::ct_error::ecanceled::handle([&logger, &opref](auto) {
288+
logger.debug("{}: pg creation canceled, dropping", opref);
289+
return seastar::now();
290+
})
291+
);
292+
}).then([op=std::move(op)] {});
271293
}
272294

273295
seastar::future<> load_pgs(crimson::os::FuturizedStore& store);

0 commit comments

Comments
 (0)