Skip to content

Commit 4fe79d9

Browse files
authored
Merge pull request ceph#62710 from bill-scales/ec_backfill
osd: EC Optimizations: Backfill changes for partial writes
2 parents 9336c9e + babb801 commit 4fe79d9

15 files changed

+564
-137
lines changed

src/crimson/osd/CMakeLists.txt

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -56,7 +56,6 @@ add_executable(crimson-osd
5656
${PROJECT_SOURCE_DIR}/src/osd/MissingLoc.cc
5757
${PROJECT_SOURCE_DIR}/src/osd/PGLog.cc
5858
${PROJECT_SOURCE_DIR}/src/osd/SnapMapper.cc
59-
${PROJECT_SOURCE_DIR}/src/osd/recovery_types.cc
6059
${PROJECT_SOURCE_DIR}/src/osd/osd_perf_counters.cc
6160
${PROJECT_SOURCE_DIR}/src/mgr/OSDPerfMetricTypes.cc
6261
watch.cc

src/crimson/osd/backfill_facades.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,11 @@ struct PeeringFacade final : BackfillState::PeeringFacade {
6262
const std::vector<pg_shard_t> &peers) override {
6363
return peering_state.prepare_backfill_for_missing(soid, v, peers);
6464
}
65+
66+
const pg_pool_t& get_pool() const override {
67+
return peering_state.get_pgpool().info;
68+
}
69+
6570
PeeringFacade(PeeringState& peering_state)
6671
: peering_state(peering_state) {
6772
}

src/crimson/osd/backfill_state.cc

Lines changed: 76 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -85,7 +85,7 @@ BackfillState::Initial::react(const BackfillState::Triggered& evt)
8585
ceph_assert(backfill_state().last_backfill_started == \
8686
peering_state().earliest_backfill());
8787
ceph_assert(peering_state().is_backfilling());
88-
// initialize BackfillIntervals
88+
// initialize ReplicaBackfillIntervals
8989
for (const auto& bt : peering_state().get_backfill_targets()) {
9090
backfill_state().peer_backfill_info[bt].reset(
9191
peering_state().get_peer_last_backfill(bt));
@@ -134,13 +134,52 @@ void BackfillState::Enqueuing::maybe_update_range()
134134
if (e.is_update()) {
135135
DEBUGDPP("maybe_update_range(lambda): {} updated to ver {}",
136136
pg(), e.soid, e.version);
137-
primary_bi.objects.erase(e.soid);
138-
primary_bi.objects.insert(std::make_pair(e.soid,
139-
e.version));
137+
if (e.written_shards.empty()) {
138+
// Log entry updates all shards, replace all entries for e.soid
139+
primary_bi.objects.erase(e.soid);
140+
primary_bi.objects.insert(
141+
std::make_pair(e.soid,
142+
std::make_pair(shard_id_t::NO_SHARD,
143+
e.version)));
144+
} else {
145+
// Update backfill interval for shards modified by log entry
146+
std::map<shard_id_t,eversion_t> versions;
147+
// Create map from existing entries in backfill entry
148+
const auto & [begin, end] = primary_bi.objects.equal_range(e.soid);
149+
for (const auto & entry : std::ranges::subrange(begin, end)) {
150+
const auto & [shard, version] = entry.second;
151+
versions[shard] = version;
152+
}
153+
// Update entries in map that are modified by log entry
154+
bool uses_default = false;
155+
for (const auto & shard : peering_state().get_backfill_targets()) {
156+
if (e.is_written_shard(shard.shard)) {
157+
versions.erase(shard.shard);
158+
uses_default = true;
159+
} else {
160+
if (!versions.contains(shard.shard)) {
161+
versions[shard.shard] = e.prior_version;
162+
}
163+
//Else: keep existing version
164+
}
165+
}
166+
if (uses_default) {
167+
versions[shard_id_t::NO_SHARD] = e.version;
168+
} else {
169+
versions.erase(shard_id_t::NO_SHARD);
170+
}
171+
// Erase and recreate backfill interval for e.soid using map
172+
primary_bi.objects.erase(e.soid);
173+
for (auto & [shard, version] : versions) {
174+
primary_bi.objects.insert(
175+
std::make_pair(e.soid,
176+
std::make_pair(shard, version)));
177+
}
178+
}
140179
} else if (e.is_delete()) {
141180
DEBUGDPP("maybe_update_range(lambda): {} removed",
142181
pg(), e.soid);
143-
primary_bi.objects.erase(e.soid);
182+
primary_bi.objects.erase(e.soid); // Erase all entries for e.soid
144183
}
145184
}
146185
};
@@ -168,8 +207,8 @@ void BackfillState::Enqueuing::trim_backfill_infos()
168207

169208
/* static */ bool BackfillState::Enqueuing::all_enqueued(
170209
const PeeringFacade& peering_state,
171-
const BackfillInterval& backfill_info,
172-
const std::map<pg_shard_t, BackfillInterval>& peer_backfill_info)
210+
const PrimaryBackfillInterval& backfill_info,
211+
const std::map<pg_shard_t, ReplicaBackfillInterval>& peer_backfill_info)
173212
{
174213
const bool all_local_enqueued = \
175214
backfill_info.extends_to_end() && backfill_info.empty();
@@ -184,7 +223,8 @@ void BackfillState::Enqueuing::trim_backfill_infos()
184223
}
185224

186225
hobject_t BackfillState::Enqueuing::earliest_peer_backfill(
187-
const std::map<pg_shard_t, BackfillInterval>& peer_backfill_info) const
226+
const std::map<pg_shard_t,
227+
ReplicaBackfillInterval>& peer_backfill_info) const
188228
{
189229
hobject_t e = hobject_t::get_max();
190230
for (const pg_shard_t& bt : peering_state().get_backfill_targets()) {
@@ -196,8 +236,8 @@ hobject_t BackfillState::Enqueuing::earliest_peer_backfill(
196236
}
197237

198238
bool BackfillState::Enqueuing::should_rescan_replicas(
199-
const std::map<pg_shard_t, BackfillInterval>& peer_backfill_info,
200-
const BackfillInterval& backfill_info) const
239+
const std::map<pg_shard_t, ReplicaBackfillInterval>& peer_backfill_info,
240+
const PrimaryBackfillInterval& backfill_info) const
201241
{
202242
const auto& targets = peering_state().get_backfill_targets();
203243
return std::any_of(std::begin(targets), std::end(targets),
@@ -208,8 +248,8 @@ bool BackfillState::Enqueuing::should_rescan_replicas(
208248
}
209249

210250
bool BackfillState::Enqueuing::should_rescan_primary(
211-
const std::map<pg_shard_t, BackfillInterval>& peer_backfill_info,
212-
const BackfillInterval& backfill_info) const
251+
const std::map<pg_shard_t, ReplicaBackfillInterval>& peer_backfill_info,
252+
const PrimaryBackfillInterval& backfill_info) const
213253
{
214254
return backfill_info.begin <= earliest_peer_backfill(peer_backfill_info) &&
215255
!backfill_info.extends_to_end() && backfill_info.empty();
@@ -218,7 +258,7 @@ bool BackfillState::Enqueuing::should_rescan_primary(
218258
void BackfillState::Enqueuing::trim_backfilled_object_from_intervals(
219259
BackfillState::Enqueuing::result_t&& result,
220260
hobject_t& last_backfill_started,
221-
std::map<pg_shard_t, BackfillInterval>& peer_backfill_info)
261+
std::map<pg_shard_t, ReplicaBackfillInterval>& peer_backfill_info)
222262
{
223263
std::for_each(std::begin(result.pbi_targets), std::end(result.pbi_targets),
224264
[&peer_backfill_info] (const auto& bt) {
@@ -257,13 +297,28 @@ BackfillState::Enqueuing::update_on_peers(const hobject_t& check)
257297
result_t result { {}, primary_bi.begin };
258298
std::map<hobject_t, std::pair<eversion_t, std::vector<pg_shard_t>>> backfills;
259299

300+
std::map<shard_id_t,eversion_t> versions;
301+
auto it = primary_bi.objects.begin();
302+
const hobject_t& hoid = it->first;
303+
eversion_t obj_v;
304+
while (it != primary_bi.objects.end() && it->first == hoid) {
305+
obj_v = std::max(obj_v, it->second.second);
306+
versions[it->second.first] = it->second.second;
307+
++it;
308+
}
309+
260310
for (const auto& bt : peering_state().get_backfill_targets()) {
261311
const auto& peer_bi = backfill_state().peer_backfill_info.at(bt);
262312

263313
// Find all check peers that have the wrong version
264-
if (const eversion_t& obj_v = primary_bi.objects.begin()->second;
265-
check == primary_bi.begin && check == peer_bi.begin) {
266-
if (peer_bi.objects.begin()->second != obj_v) {
314+
if (check == primary_bi.begin && check == peer_bi.begin) {
315+
eversion_t replicaobj_v;
316+
if (versions.contains(bt.shard)) {
317+
replicaobj_v = versions.at(bt.shard);
318+
} else {
319+
replicaobj_v = versions.at(shard_id_t::NO_SHARD);
320+
}
321+
if (peer_bi.objects.begin()->second != replicaobj_v) {
267322
std::ignore = backfill_state().progress_tracker->enqueue_push(
268323
primary_bi.begin);
269324
auto &[v, peers] = backfills[primary_bi.begin];
@@ -298,8 +353,9 @@ BackfillState::Enqueuing::update_on_peers(const hobject_t& check)
298353
}
299354

300355
bool BackfillState::Enqueuing::Enqueuing::all_emptied(
301-
const BackfillInterval& local_backfill_info,
302-
const std::map<pg_shard_t, BackfillInterval>& peer_backfill_info) const
356+
const PrimaryBackfillInterval& local_backfill_info,
357+
const std::map<pg_shard_t,
358+
ReplicaBackfillInterval>& peer_backfill_info) const
303359
{
304360
const auto& targets = peering_state().get_backfill_targets();
305361
const auto replicas_emptied =
@@ -459,8 +515,8 @@ BackfillState::PrimaryScanning::react(ObjectPushed evt)
459515

460516
// -- ReplicasScanning
461517
bool BackfillState::ReplicasScanning::replica_needs_scan(
462-
const BackfillInterval& replica_backfill_info,
463-
const BackfillInterval& local_backfill_info)
518+
const ReplicaBackfillInterval& replica_backfill_info,
519+
const PrimaryBackfillInterval& local_backfill_info)
464520
{
465521
return replica_backfill_info.empty() && \
466522
replica_backfill_info.begin <= local_backfill_info.begin && \

src/crimson/osd/backfill_state.h

Lines changed: 27 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515

1616
#include "osd/recovery_types.h"
1717
#include "osd/PGLog.h"
18+
#include "osd/PeeringState.h"
1819

1920
namespace crimson::osd {
2021

@@ -27,16 +28,16 @@ struct BackfillState {
2728

2829
// events comes first
2930
struct PrimaryScanned : sc::event<PrimaryScanned> {
30-
BackfillInterval result;
31-
PrimaryScanned(BackfillInterval&& result)
31+
PrimaryBackfillInterval result;
32+
PrimaryScanned(PrimaryBackfillInterval&& result)
3233
: result(std::move(result)) {
3334
}
3435
};
3536

3637
struct ReplicaScanned : sc::event<ReplicaScanned> {
3738
pg_shard_t from;
38-
BackfillInterval result;
39-
ReplicaScanned(pg_shard_t from, BackfillInterval&& result)
39+
ReplicaBackfillInterval result;
40+
ReplicaScanned(pg_shard_t from, ReplicaBackfillInterval&& result)
4041
: from(std::move(from)),
4142
result(std::move(result)) {
4243
}
@@ -166,8 +167,8 @@ struct BackfillState {
166167
// completed yet.
167168
static bool all_enqueued(
168169
const PeeringFacade& peering_state,
169-
const BackfillInterval& backfill_info,
170-
const std::map<pg_shard_t, BackfillInterval>& peer_backfill_info);
170+
const PrimaryBackfillInterval& backfill_info,
171+
const std::map<pg_shard_t, ReplicaBackfillInterval>& peer_backfill_info);
171172

172173
private:
173174
void maybe_update_range();
@@ -176,33 +177,35 @@ struct BackfillState {
176177
// these methods take BackfillIntervals instead of extracting them from
177178
// the state to emphasize the relationships across the main loop.
178179
bool all_emptied(
179-
const BackfillInterval& local_backfill_info,
180-
const std::map<pg_shard_t, BackfillInterval>& peer_backfill_info) const;
180+
const PrimaryBackfillInterval& local_backfill_info,
181+
const std::map<pg_shard_t,
182+
ReplicaBackfillInterval>& peer_backfill_info) const;
181183
hobject_t earliest_peer_backfill(
182-
const std::map<pg_shard_t, BackfillInterval>& peer_backfill_info) const;
184+
const std::map<pg_shard_t,
185+
ReplicaBackfillInterval>& peer_backfill_info) const;
183186
bool should_rescan_replicas(
184-
const std::map<pg_shard_t, BackfillInterval>& peer_backfill_info,
185-
const BackfillInterval& backfill_info) const;
187+
const std::map<pg_shard_t, ReplicaBackfillInterval>& peer_backfill_info,
188+
const PrimaryBackfillInterval& backfill_info) const;
186189
// indicate whether a particular acting primary needs to scanned again
187190
// to process next piece of the hobject_t's namespace.
188191
// the logic is per analogy to replica_needs_scan(). See comments there.
189192
bool should_rescan_primary(
190-
const std::map<pg_shard_t, BackfillInterval>& peer_backfill_info,
191-
const BackfillInterval& backfill_info) const;
193+
const std::map<pg_shard_t, ReplicaBackfillInterval>& peer_backfill_info,
194+
const PrimaryBackfillInterval& backfill_info) const;
192195

193196
// the result_t is intermediary between {remove,update}_on_peers() and
194-
// updating BackfillIntervals in trim_backfilled_object_from_intervals.
195-
// This step is important because it affects the main loop's condition,
196-
// and thus deserves to be exposed instead of being called deeply from
197-
// {remove,update}_on_peers().
197+
// updating ReplicaBackfillIntervals in
198+
// trim_backfilled_object_from_intervals. This step is important
199+
// because it affects the main loop's condition, and thus deserves to be
200+
// exposed instead of being called deeply from {remove,update}_on_peers().
198201
struct [[nodiscard]] result_t {
199202
std::set<pg_shard_t> pbi_targets;
200203
hobject_t new_last_backfill_started;
201204
};
202205
void trim_backfilled_object_from_intervals(
203206
result_t&&,
204207
hobject_t& last_backfill_started,
205-
std::map<pg_shard_t, BackfillInterval>& peer_backfill_info);
208+
std::map<pg_shard_t, ReplicaBackfillInterval>& peer_backfill_info);
206209
result_t remove_on_peers(const hobject_t& check);
207210
result_t update_on_peers(const hobject_t& check);
208211
};
@@ -242,12 +245,12 @@ struct BackfillState {
242245
sc::result react(Triggered);
243246

244247
// indicate whether a particular peer should be scanned to retrieve
245-
// BackfillInterval for new range of hobject_t namespace.
248+
// ReplicaBackfillInterval for new range of hobject_t namespace.
246249
// true when bi.objects is exhausted, replica bi's end is not MAX,
247250
// and primary bi'begin is further than the replica's one.
248251
static bool replica_needs_scan(
249-
const BackfillInterval& replica_backfill_info,
250-
const BackfillInterval& local_backfill_info);
252+
const ReplicaBackfillInterval& replica_backfill_info,
253+
const PrimaryBackfillInterval& local_backfill_info);
251254

252255
private:
253256
std::set<pg_shard_t> waiting_on_backfill;
@@ -339,8 +342,8 @@ struct BackfillState {
339342
backfill_suspend_state.should_go_enqueuing = true;
340343
}
341344
hobject_t last_backfill_started;
342-
BackfillInterval backfill_info;
343-
std::map<pg_shard_t, BackfillInterval> peer_backfill_info;
345+
PrimaryBackfillInterval backfill_info;
346+
std::map<pg_shard_t, ReplicaBackfillInterval> peer_backfill_info;
344347
BackfillMachine backfill_machine;
345348
std::unique_ptr<ProgressTracker> progress_tracker;
346349
size_t replicas_in_backfill = 0;
@@ -408,6 +411,7 @@ struct BackfillState::PeeringFacade {
408411
const hobject_t &soid,
409412
const eversion_t &v,
410413
const std::vector<pg_shard_t> &peers) = 0;
414+
virtual const pg_pool_t& get_pool() const = 0;
411415
virtual ~PeeringFacade() {}
412416
};
413417

src/crimson/osd/pg_recovery.cc

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -512,11 +512,12 @@ void PGRecovery::request_primary_scan(
512512
{
513513
logger().debug("{}", __func__);
514514
using crimson::common::local_conf;
515-
std::ignore = pg->get_recovery_backend()->scan_for_backfill(
515+
std::ignore = pg->get_recovery_backend()->scan_for_backfill_primary(
516516
begin,
517517
local_conf()->osd_backfill_scan_min,
518-
local_conf()->osd_backfill_scan_max
519-
).then_interruptible([this] (BackfillInterval bi) {
518+
local_conf()->osd_backfill_scan_max,
519+
pg->get_peering_state().get_backfill_targets()
520+
).then_interruptible([this] (PrimaryBackfillInterval bi) {
520521
logger().debug("request_primary_scan:{}", __func__);
521522
using BackfillState = crimson::osd::BackfillState;
522523
backfill_state->process_event(

0 commit comments

Comments
 (0)