Skip to content

Commit 40eadba

Browse files
authored
Merge pull request ceph#53151 from xxhdx1985126/wip-crimson-backfill-fixes
crimson/osd/backfill_state: fixes two corner cases in backfilling Reviewed-by: Radosław Zarzyński <[email protected]>
2 parents cb8d669 + 5b9c08e commit 40eadba

File tree

5 files changed

+40
-12
lines changed

5 files changed

+40
-12
lines changed

src/crimson/osd/backfill_state.cc

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -73,6 +73,8 @@ BackfillState::Initial::Initial(my_context ctx)
7373
}
7474
ceph_assert(peering_state().get_backfill_targets().size());
7575
ceph_assert(!backfill_state().last_backfill_started.is_max());
76+
backfill_state().replicas_in_backfill =
77+
peering_state().get_backfill_targets().size();
7678
}
7779

7880
boost::statechart::result
@@ -350,9 +352,14 @@ BackfillState::Enqueuing::Enqueuing(my_context ctx)
350352
logger().debug("{}: reached end for current local chunk",
351353
__func__);
352354
post_event(RequestPrimaryScanning{});
353-
} else if (backfill_state().progress_tracker->tracked_objects_completed()) {
354-
post_event(RequestDone{});
355355
} else {
356+
if (backfill_state().progress_tracker->tracked_objects_completed()
357+
&& Enqueuing::all_enqueued(peering_state(),
358+
backfill_state().backfill_info,
359+
backfill_state().peer_backfill_info)) {
360+
backfill_state().last_backfill_started = hobject_t::get_max();
361+
backfill_listener().update_peers_last_backfill(hobject_t::get_max());
362+
}
356363
logger().debug("{}: reached end for both local and all peers "
357364
"but still has in-flight operations", __func__);
358365
post_event(RequestWaiting{});
@@ -470,8 +477,6 @@ BackfillState::Waiting::react(ObjectPushed evt)
470477
backfill_state().backfill_info,
471478
backfill_state().peer_backfill_info)) {
472479
return transit<Enqueuing>();
473-
} else if (backfill_state().progress_tracker->tracked_objects_completed()) {
474-
return transit<Done>();
475480
} else {
476481
// we still have something to wait on
477482
logger().debug("Waiting::react() on ObjectPushed; still waiting");

src/crimson/osd/backfill_state.h

Lines changed: 15 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,9 @@ struct BackfillState {
5555
struct Triggered : sc::event<Triggered> {
5656
};
5757

58+
struct RequestDone : sc::event<RequestDone> {
59+
};
60+
5861
private:
5962
// internal events
6063
struct RequestPrimaryScanning : sc::event<RequestPrimaryScanning> {
@@ -66,9 +69,6 @@ struct BackfillState {
6669
struct RequestWaiting : sc::event<RequestWaiting> {
6770
};
6871

69-
struct RequestDone : sc::event<RequestDone> {
70-
};
71-
7272
class ProgressTracker;
7373

7474
public:
@@ -149,7 +149,6 @@ struct BackfillState {
149149
sc::transition<RequestPrimaryScanning, PrimaryScanning>,
150150
sc::transition<RequestReplicasScanning, ReplicasScanning>,
151151
sc::transition<RequestWaiting, Waiting>,
152-
sc::transition<RequestDone, Done>,
153152
sc::transition<sc::event_base, Crashed>>;
154153
explicit Enqueuing(my_context);
155154

@@ -206,6 +205,7 @@ struct BackfillState {
206205
using reactions = boost::mpl::list<
207206
sc::custom_reaction<ObjectPushed>,
208207
sc::custom_reaction<PrimaryScanned>,
208+
sc::transition<RequestDone, Done>,
209209
sc::transition<sc::event_base, Crashed>>;
210210
explicit PrimaryScanning(my_context);
211211
sc::result react(ObjectPushed);
@@ -218,6 +218,7 @@ struct BackfillState {
218218
using reactions = boost::mpl::list<
219219
sc::custom_reaction<ObjectPushed>,
220220
sc::custom_reaction<ReplicaScanned>,
221+
sc::transition<RequestDone, Done>,
221222
sc::transition<sc::event_base, Crashed>>;
222223
explicit ReplicasScanning(my_context);
223224
// collect scanning result; if all results are collected, transition
@@ -241,6 +242,7 @@ struct BackfillState {
241242
StateHelper<Waiting> {
242243
using reactions = boost::mpl::list<
243244
sc::custom_reaction<ObjectPushed>,
245+
sc::transition<RequestDone, Done>,
244246
sc::transition<sc::event_base, Crashed>>;
245247
explicit Waiting(my_context);
246248
sc::result react(ObjectPushed);
@@ -266,12 +268,21 @@ struct BackfillState {
266268
hobject_t get_last_backfill_started() const {
267269
return last_backfill_started;
268270
}
271+
272+
void backfill_target_done() {
273+
ceph_assert(replicas_in_backfill > 0);
274+
replicas_in_backfill--;
275+
if (!replicas_in_backfill) {
276+
backfill_machine.process_event(RequestDone{});
277+
}
278+
}
269279
private:
270280
hobject_t last_backfill_started;
271281
BackfillInterval backfill_info;
272282
std::map<pg_shard_t, BackfillInterval> peer_backfill_info;
273283
BackfillMachine backfill_machine;
274284
std::unique_ptr<ProgressTracker> progress_tracker;
285+
size_t replicas_in_backfill = 0;
275286
};
276287

277288
// BackfillListener -- an interface used by the backfill FSM to request

src/crimson/osd/pg_recovery.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,9 @@ class PGRecovery : public crimson::osd::BackfillState::BackfillListener {
3636
void on_backfill_reserved();
3737
void dispatch_backfill_event(
3838
boost::intrusive_ptr<const boost::statechart::event_base> evt);
39+
void backfill_target_finished() {
40+
backfill_state->backfill_target_done();
41+
}
3942

4043
seastar::future<> stop() { return seastar::now(); }
4144
private:

src/crimson/osd/recovery_backend.cc

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -167,8 +167,8 @@ RecoveryBackend::handle_backfill_finish_ack(
167167
logger().debug("{}", __func__);
168168
ceph_assert(pg.is_primary());
169169
ceph_assert(crimson::common::local_conf()->osd_kill_backfill_at != 3);
170-
// TODO:
171-
// finish_recovery_op(hobject_t::get_max());
170+
auto recovery_handler = pg.get_recovery_handler();
171+
recovery_handler->backfill_target_finished();
172172
return seastar::now();
173173
}
174174

@@ -264,8 +264,9 @@ RecoveryBackend::scan_for_backfill(
264264
bi.end = std::move(next);
265265
bi.version = pg.get_info().last_update;
266266
bi.objects = std::move(*version_map);
267-
logger().debug("{} BackfillInterval filled, leaving",
268-
"scan_for_backfill");
267+
logger().debug("{} BackfillInterval filled, leaving, {}",
268+
"scan_for_backfill",
269+
bi);
269270
return seastar::make_ready_future<BackfillInterval>(std::move(bi));
270271
});
271272
});

src/test/crimson/test_backfill.cc

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,6 @@
1+
// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2+
// vim: ts=8 sw=2 smarttab
3+
14
#include <algorithm>
25
#include <cstdlib>
36
#include <deque>
@@ -308,6 +311,9 @@ void BackfillFixture::maybe_flush()
308311
void BackfillFixture::update_peers_last_backfill(
309312
const hobject_t& new_last_backfill)
310313
{
314+
if (new_last_backfill.is_max()) {
315+
schedule_event(crimson::osd::BackfillState::RequestDone{});
316+
}
311317
}
312318

313319
bool BackfillFixture::budget_available() const
@@ -355,6 +361,7 @@ TEST(backfill, same_primary_same_replica)
355361
reference_store.objs
356362
).get_result();
357363

364+
cluster_fixture.next_round();
358365
cluster_fixture.next_round();
359366
EXPECT_CALL(cluster_fixture, backfilled);
360367
cluster_fixture.next_round();
@@ -377,6 +384,7 @@ TEST(backfill, one_empty_replica)
377384
cluster_fixture.next_round();
378385
cluster_fixture.next_round();
379386
cluster_fixture.next_round(2);
387+
cluster_fixture.next_round();
380388
EXPECT_CALL(cluster_fixture, backfilled);
381389
cluster_fixture.next_round();
382390
EXPECT_TRUE(cluster_fixture.all_stores_look_like(reference_store));

0 commit comments

Comments
 (0)