Skip to content

Commit e837655

Browse files
authored
Merge pull request ceph#58868 from xxhdx1985126/wip-crimson-clean_region-based-clone-recovery
crimson/osd/pg: properly propagate snap mapper updates and do clean-region-based clone objects recovery Reviewed-by: Samuel Just <[email protected]> Reviewed-by: Matan Breizman <[email protected]>
2 parents 3e2426b + 0d37bc9 commit e837655

File tree

13 files changed

+238
-127
lines changed

13 files changed

+238
-127
lines changed

src/crimson/osd/ec_backend.cc

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,6 @@ ECBackend::submit_transaction(const std::set<pg_shard_t> &pg_shards,
3232
std::vector<pg_log_entry_t>&& log_entries)
3333
{
3434
// todo
35-
return {seastar::now(),
36-
seastar::make_ready_future<crimson::osd::acked_peers_t>()};
35+
return make_ready_future<rep_op_ret_t>(seastar::now(),
36+
seastar::make_ready_future<crimson::osd::acked_peers_t>());
3737
}

src/crimson/osd/object_metadata_helper.cc

Lines changed: 19 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,6 @@
1+
// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:nil -*-
2+
// vim: ts=8 sw=2 smarttab expandtab
3+
14
#include "crimson/osd/object_metadata_helper.h"
25

36
namespace {
@@ -23,12 +26,26 @@ subsets_t calc_clone_subsets(
2326
subsets_t subsets;
2427
logger().debug("{}: {} clone_overlap {} ",
2528
__func__, soid, snapset.clone_overlap);
26-
29+
assert(missing.get_items().contains(soid));
30+
const pg_missing_item &missing_item = missing.get_items().at(soid);
31+
auto dirty_regions = missing_item.clean_regions.get_dirty_regions();
32+
if (dirty_regions.empty()) {
33+
logger().debug(
34+
"{} {} not touched, no need to recover, skipping",
35+
__func__,
36+
soid);
37+
return subsets;
38+
}
2739
uint64_t size = snapset.clone_size[soid.snap];
2840
if (size) {
2941
subsets.data_subset.insert(0, size);
3042
}
3143

44+
// let data_subset store only the modified content of the object.
45+
subsets.data_subset.intersection_of(dirty_regions);
46+
logger().debug("{} {} data_subset {}",
47+
__func__, soid, subsets.data_subset);
48+
3249
// TODO: make sure CEPH_FEATURE_OSD_CACHEPOOL is not supported in Crimson
3350
// Skips clone subsets if caching was enabled (allow_incomplete_clones).
3451

@@ -140,7 +157,7 @@ subsets_t calc_head_subsets(
140157
subsets.data_subset.insert(0, obj_size);
141158
}
142159
assert(missing.get_items().contains(head));
143-
const pg_missing_item missing_item = missing.get_items().at(head);
160+
const pg_missing_item &missing_item = missing.get_items().at(head);
144161
// let data_subset store only the modified content of the object.
145162
subsets.data_subset.intersection_of(missing_item.clean_regions.get_dirty_regions());
146163
logger().debug("{} {} data_subset {}",

src/crimson/osd/ops_executer.cc

Lines changed: 2 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -871,6 +871,7 @@ OpsExecuter::interruptible_future<> OpsExecuter::snap_map_modify(
871871
ceph::os::Transaction& txn)
872872
{
873873
logger().debug("{}: soid {}, snaps {}", __func__, soid, snaps);
874+
// TODO: avoid seastar::async https://tracker.ceph.com/issues/67704
874875
return interruptor::async([soid, snaps, &snap_mapper,
875876
_t=osdriver.get_transaction(&txn)]() mutable {
876877
assert(std::size(snaps) > 0);
@@ -974,6 +975,7 @@ std::unique_ptr<OpsExecuter::CloningContext> OpsExecuter::execute_clone(
974975
0
975976
};
976977
encode(cloned_snaps, cloning_ctx->log_entry.snaps);
978+
cloning_ctx->log_entry.clean_regions.mark_data_region_dirty(0, initial_obs.oi.size);
977979

978980
return cloning_ctx;
979981
}
@@ -1019,14 +1021,6 @@ OpsExecuter::flush_clone_metadata(
10191021
update_clone_overlap();
10201022
if (cloning_ctx) {
10211023
std::move(*cloning_ctx).apply_to(log_entries, *obc);
1022-
const auto& coid = log_entries.front().soid;
1023-
const auto& cloned_snaps = obc->ssc->snapset.clone_snaps[coid.snap];
1024-
maybe_snap_mapped = snap_map_clone(
1025-
coid,
1026-
std::set<snapid_t>{std::begin(cloned_snaps), std::end(cloned_snaps)},
1027-
snap_mapper,
1028-
osdriver,
1029-
txn);
10301024
}
10311025
if (snapc.seq > obc->ssc->snapset.seq) {
10321026
// update snapset with latest snap context

src/crimson/osd/ops_executer.h

Lines changed: 41 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121
#include "os/Transaction.h"
2222
#include "osd/osd_types.h"
2323

24+
#include "crimson/common/coroutine.h"
2425
#include "crimson/common/errorator.h"
2526
#include "crimson/common/interruptible_future.h"
2627
#include "crimson/common/type_helpers.h"
@@ -272,6 +273,7 @@ class OpsExecuter : public seastar::enable_lw_shared_from_this<OpsExecuter> {
272273
OSDriver& osdriver,
273274
ceph::os::Transaction& txn);
274275

276+
public:
275277
static interruptible_future<> snap_map_remove(
276278
const hobject_t& soid,
277279
SnapMapper& snap_mapper,
@@ -290,6 +292,7 @@ class OpsExecuter : public seastar::enable_lw_shared_from_this<OpsExecuter> {
290292
OSDriver& osdriver,
291293
ceph::os::Transaction& txn);
292294

295+
private:
293296
// this gizmo could be wrapped in std::optional for the sake of lazy
294297
// initialization. we don't need it for ops that doesn't have effect
295298
// TODO: verify the init overhead of chunked_fifo
@@ -424,7 +427,7 @@ class OpsExecuter : public seastar::enable_lw_shared_from_this<OpsExecuter> {
424427
const std::vector<OSDOp>& ops,
425428
SnapMapper& snap_mapper,
426429
OSDriver& osdriver,
427-
MutFunc&& mut_func) &&;
430+
MutFunc mut_func) &&;
428431
std::vector<pg_log_entry_t> prepare_transaction(
429432
const std::vector<OSDOp>& ops);
430433
void fill_op_params(modified_by m);
@@ -510,60 +513,60 @@ OpsExecuter::flush_changes_n_do_ops_effects(
510513
const std::vector<OSDOp>& ops,
511514
SnapMapper& snap_mapper,
512515
OSDriver& osdriver,
513-
MutFunc&& mut_func) &&
516+
MutFunc mut_func) &&
514517
{
515518
const bool want_mutate = !txn.empty();
516519
// osd_op_params are instantiated by every wr-like operation.
517520
assert(osd_op_params || !want_mutate);
518521
assert(obc);
519-
rep_op_fut_t maybe_mutated =
520-
interruptor::make_ready_future<rep_op_fut_tuple>(
521-
seastar::now(),
522-
interruptor::make_interruptible(osd_op_errorator::now()));
522+
523+
auto submitted = interruptor::now();
524+
auto all_completed =
525+
interruptor::make_interruptible(osd_op_errorator::now());
526+
523527
if (cloning_ctx) {
524528
ceph_assert(want_mutate);
525529
}
530+
526531
if (want_mutate) {
527-
maybe_mutated = flush_clone_metadata(
532+
auto log_entries = co_await flush_clone_metadata(
528533
prepare_transaction(ops),
529534
snap_mapper,
530535
osdriver,
531-
txn
532-
).then_interruptible([mut_func=std::move(mut_func),
533-
this](auto&& log_entries) mutable {
534-
if (auto log_rit = log_entries.rbegin(); log_rit != log_entries.rend()) {
535-
ceph_assert(log_rit->version == osd_op_params->at_version);
536-
}
537-
auto [submitted, all_completed] =
538-
std::forward<MutFunc>(mut_func)(std::move(txn),
539-
std::move(obc),
540-
std::move(*osd_op_params),
541-
std::move(log_entries));
542-
return interruptor::make_ready_future<rep_op_fut_tuple>(
543-
std::move(submitted),
544-
osd_op_ierrorator::future<>(std::move(all_completed)));
545-
});
536+
txn);
537+
538+
if (auto log_rit = log_entries.rbegin(); log_rit != log_entries.rend()) {
539+
ceph_assert(log_rit->version == osd_op_params->at_version);
540+
}
541+
542+
auto [_submitted, _all_completed] = co_await mut_func(
543+
std::move(txn),
544+
std::move(obc),
545+
std::move(*osd_op_params),
546+
std::move(log_entries));
547+
548+
submitted = std::move(_submitted);
549+
all_completed = std::move(_all_completed);
546550
}
551+
547552
apply_stats();
548553

549-
if (__builtin_expect(op_effects.empty(), true)) {
550-
return maybe_mutated;
551-
} else {
552-
return maybe_mutated.then_unpack_interruptible(
553-
// need extra ref pg due to apply_stats() which can be executed after
554-
// informing snap mapper
555-
[this, pg=this->pg](auto&& submitted, auto&& all_completed) mutable {
556-
return interruptor::make_ready_future<rep_op_fut_tuple>(
557-
std::move(submitted),
558-
all_completed.safe_then_interruptible([this, pg=std::move(pg)] {
559-
// let's do the cleaning of `op_effects` in destructor
560-
return interruptor::do_for_each(op_effects,
561-
[pg=std::move(pg)](auto& op_effect) {
562-
return op_effect->execute(pg);
563-
});
564-
}));
554+
if (op_effects.size()) [[unlikely]] {
555+
// need extra ref pg due to apply_stats() which can be executed after
556+
// informing snap mapper
557+
all_completed =
558+
std::move(all_completed).safe_then_interruptible([this, pg=this->pg] {
559+
// let's do the cleaning of `op_effects` in destructor
560+
return interruptor::do_for_each(op_effects,
561+
[pg=std::move(pg)](auto& op_effect) {
562+
return op_effect->execute(pg);
563+
});
565564
});
566565
}
566+
567+
co_return std::make_tuple(
568+
std::move(submitted),
569+
std::move(all_completed));
567570
}
568571

569572
template <class Func>

src/crimson/osd/osd_operations/snaptrim_event.cc

Lines changed: 34 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -263,16 +263,18 @@ SnapTrimObjSubEvent::adjust_snaps(
263263
ghobject_t{coid, ghobject_t::NO_GEN, shard_id_t::NO_SHARD},
264264
OI_ATTR,
265265
bl);
266-
add_log_entry(
266+
auto &loge = add_log_entry(
267267
pg_log_entry_t::MODIFY,
268268
coid,
269269
obc->obs.oi.prior_version,
270270
0,
271271
osd_reqid_t(),
272272
obc->obs.oi.mtime,
273273
0);
274-
return OpsExecuter::snap_map_modify(
275-
coid, new_snaps, pg->snap_mapper, pg->osdriver, txn);
274+
bufferlist snapsbl;
275+
encode(new_snaps, snapsbl);
276+
loge.snaps.swap(snapsbl);
277+
return interruptor::now();
276278
}
277279

278280
void SnapTrimObjSubEvent::update_head(
@@ -400,32 +402,8 @@ SnapTrimObjSubEvent::start()
400402
// lock both clone's and head's obcs
401403
co_await pg->obc_loader.with_obc<RWState::RWWRITE>(
402404
coid,
403-
[this](auto head_obc, auto clone_obc) {
404-
logger().debug("{}: got clone_obc={}", *this, clone_obc->get_oid());
405-
return enter_stage<interruptor>(
406-
client_pp().process
407-
).then_interruptible(
408-
[this,clone_obc=std::move(clone_obc), head_obc=std::move(head_obc)]() mutable {
409-
logger().debug("{}: processing clone_obc={}", *this, clone_obc->get_oid());
410-
return remove_or_update(
411-
clone_obc, head_obc
412-
).safe_then_interruptible([clone_obc, this](auto&& txn) mutable {
413-
auto [submitted, all_completed] = pg->submit_transaction(
414-
std::move(clone_obc),
415-
std::move(txn),
416-
std::move(osd_op_p),
417-
std::move(log_entries));
418-
return submitted.then_interruptible(
419-
[this, all_completed=std::move(all_completed)]() mutable {
420-
return enter_stage<interruptor>(
421-
client_pp().wait_repop
422-
).then_interruptible([all_completed=std::move(all_completed)]() mutable{
423-
return std::move(all_completed);
424-
});
425-
});
426-
});
427-
});
428-
},
405+
std::bind(&SnapTrimObjSubEvent::process_and_submit,
406+
this, std::placeholders::_1, std::placeholders::_2),
429407
false
430408
).handle_error_interruptible(
431409
remove_or_update_iertr::pass_further{},
@@ -436,6 +414,33 @@ SnapTrimObjSubEvent::start()
436414
co_await interruptor::make_interruptible(handle.complete());
437415
}
438416

417+
ObjectContextLoader::load_obc_iertr::future<>
418+
SnapTrimObjSubEvent::process_and_submit(ObjectContextRef head_obc,
419+
ObjectContextRef clone_obc) {
420+
logger().debug("{}: got clone_obc={}", *this, clone_obc->get_oid());
421+
422+
co_await enter_stage<interruptor>(client_pp().process);
423+
424+
logger().debug("{}: processing clone_obc={}", *this, clone_obc->get_oid());
425+
426+
auto txn = co_await remove_or_update(clone_obc, head_obc);
427+
428+
auto [submitted, all_completed] = co_await pg->submit_transaction(
429+
std::move(clone_obc),
430+
std::move(txn),
431+
std::move(osd_op_p),
432+
std::move(log_entries)
433+
);
434+
435+
co_await std::move(submitted);
436+
437+
co_await enter_stage<interruptor>(client_pp().wait_repop);
438+
439+
co_await std::move(all_completed);
440+
441+
co_return;
442+
}
443+
439444
void SnapTrimObjSubEvent::print(std::ostream &lhs) const
440445
{
441446
lhs << "SnapTrimObjSubEvent("

src/crimson/osd/osd_operations/snaptrim_event.h

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -113,6 +113,10 @@ class SnapTrimObjSubEvent : public PhasedOperationT<SnapTrimObjSubEvent> {
113113
private:
114114
object_stat_sum_t delta_stats;
115115

116+
ObjectContextLoader::load_obc_iertr::future<> process_and_submit(
117+
ObjectContextRef head_obc,
118+
ObjectContextRef clone_obc);
119+
116120
snap_trim_obj_subevent_ret_t remove_clone(
117121
ObjectContextRef obc,
118122
ObjectContextRef head_obc,
@@ -134,7 +138,7 @@ class SnapTrimObjSubEvent : public PhasedOperationT<SnapTrimObjSubEvent> {
134138
remove_or_update_iertr::future<ceph::os::Transaction>
135139
remove_or_update(ObjectContextRef obc, ObjectContextRef head_obc);
136140

137-
void add_log_entry(
141+
pg_log_entry_t& add_log_entry(
138142
int _op,
139143
const hobject_t& _soid,
140144
const eversion_t& pv,
@@ -152,6 +156,7 @@ class SnapTrimObjSubEvent : public PhasedOperationT<SnapTrimObjSubEvent> {
152156
mt,
153157
return_code);
154158
osd_op_p.at_version.version++;
159+
return log_entries.back();
155160
}
156161

157162
Ref<PG> pg;

0 commit comments

Comments
 (0)