Skip to content

Commit 4c7c472

Browse files
authored
Merge pull request ceph#59543 from xxhdx1985126/wip-63844
crimson/osd/pg: do_osd_ops_execute fixes Reviewed-by: Matan Breizman <[email protected]> Reviewed-by: Yingxin Cheng <[email protected]>
2 parents 82de5f0 + 1716224 commit 4c7c472

File tree

7 files changed

+73
-71
lines changed

7 files changed

+73
-71
lines changed

src/crimson/osd/object_context.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -76,6 +76,11 @@ class ObjectContext : public ceph::common::intrusive_lru_base<
7676
ObjectContext(hobject_t hoid) : lock(hoid),
7777
obs(std::move(hoid)) {}
7878

79+
void update_from(const ObjectContext &obc) {
80+
obs = obc.obs;
81+
ssc = obc.ssc;
82+
}
83+
7984
const hobject_t &get_oid() const {
8085
return obs.oi.soid;
8186
}

src/crimson/osd/object_context_loader.cc

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
#include "crimson/osd/object_context_loader.h"
22
#include "osd/osd_types_fmt.h"
3+
#include "osd/object_state_fmt.h"
34

45
SET_SUBSYS(osd);
56

@@ -111,7 +112,7 @@ using crimson::common::local_conf;
111112
return std::invoke(std::move(func), obc);
112113
}
113114
).finally([FNAME, this, obc=ObjectContextRef(obc)] {
114-
DEBUGDPP("released object {}", dpp, obc->get_oid());
115+
DEBUGDPP("released object {}, {}", dpp, obc->get_oid(), obc->obs);
115116
if constexpr (track) {
116117
obc->remove_from(obc_set_accessing);
117118
}
@@ -125,7 +126,7 @@ using crimson::common::local_conf;
125126
return std::invoke(std::move(func), obc);
126127
}
127128
).finally([FNAME, this, obc=ObjectContextRef(obc)] {
128-
DEBUGDPP("released object {}", dpp, obc->get_oid());
129+
DEBUGDPP("released object {}, {}", dpp, obc->get_oid(), obc->obs);
129130
if constexpr (track) {
130131
obc->remove_from(obc_set_accessing);
131132
}

src/crimson/osd/ops_executer.h

Lines changed: 5 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -552,11 +552,7 @@ OpsExecuter::flush_changes_n_do_ops_effects(
552552

553553
template <class Func>
554554
struct OpsExecuter::RollbackHelper {
555-
interruptible_future<> rollback_obc_if_modified(const std::error_code& e);
556-
ObjectContextRef get_obc() const {
557-
assert(ox);
558-
return ox->obc;
559-
}
555+
void rollback_obc_if_modified(const std::error_code& e);
560556
seastar::lw_shared_ptr<OpsExecuter> ox;
561557
Func func;
562558
};
@@ -569,8 +565,7 @@ OpsExecuter::create_rollbacker(Func&& func) {
569565

570566

571567
template <class Func>
572-
OpsExecuter::interruptible_future<>
573-
OpsExecuter::RollbackHelper<Func>::rollback_obc_if_modified(
568+
void OpsExecuter::RollbackHelper<Func>::rollback_obc_if_modified(
574569
const std::error_code& e)
575570
{
576571
// Oops, an operation had failed. do_osd_ops() altogether with
@@ -580,12 +575,6 @@ OpsExecuter::RollbackHelper<Func>::rollback_obc_if_modified(
580575
// we maintain and we did it for both reading and writing.
581576
// Now all modifications must be reverted.
582577
//
583-
// Let's just reload from the store. Evicting from the shared
584-
// LRU would be tricky as next MOSDOp (the one at `get_obc`
585-
// phase) could actually already finished the lookup. Fortunately,
586-
// this is supposed to live on cold paths, so performance is not
587-
// a concern -- simplicity wins.
588-
//
589578
// The conditional's purpose is to efficiently handle hot errors
590579
// which may appear as a result of e.g. CEPH_OSD_OP_CMPXATTR or
591580
// CEPH_OSD_OP_OMAP_CMP. These are read-like ops and clients
@@ -600,7 +589,9 @@ OpsExecuter::RollbackHelper<Func>::rollback_obc_if_modified(
600589
ox->obc->get_oid(),
601590
e,
602591
need_rollback);
603-
return need_rollback ? func(*ox->obc) : interruptor::now();
592+
if (need_rollback) {
593+
func(ox->obc);
594+
}
604595
}
605596

606597
// PgOpsExecuter -- a class for executing ops targeting a certain PG.

src/crimson/osd/pg.cc

Lines changed: 55 additions & 53 deletions
Original file line numberDiff line numberDiff line change
@@ -964,6 +964,15 @@ PG::BackgroundProcessLock::lock() noexcept
964964
return interruptor::make_interruptible(mutex.lock());
965965
}
966966

967+
// We may need to rollback the ObjectContext on failed op execution.
968+
// Copy the current obc before mutating it in order to recover on failures.
969+
ObjectContextRef duplicate_obc(const ObjectContextRef &obc) {
970+
ObjectContextRef object_context = new ObjectContext(obc->obs.oi.soid);
971+
object_context->obs = obc->obs;
972+
object_context->ssc = new SnapSetContext(*obc->ssc);
973+
return object_context;
974+
}
975+
967976
template <class Ret, class SuccessFunc, class FailureFunc>
968977
PG::do_osd_ops_iertr::future<PG::pg_rep_op_fut_t<Ret>>
969978
PG::do_osd_ops_execute(
@@ -976,9 +985,9 @@ PG::do_osd_ops_execute(
976985
FailureFunc&& failure_func)
977986
{
978987
assert(ox);
979-
auto rollbacker = ox->create_rollbacker([this] (auto& obc) {
980-
return obc_loader.reload_obc(obc).handle_error_interruptible(
981-
load_obc_ertr::assert_all{"can't live with object state messed up"});
988+
auto rollbacker = ox->create_rollbacker(
989+
[object_context=duplicate_obc(obc)] (auto& obc) mutable {
990+
obc->update_from(*object_context);
982991
});
983992
auto failure_func_ptr = seastar::make_lw_shared(std::move(failure_func));
984993
return interruptor::do_for_each(ops, [ox](OSDOp& osd_op) {
@@ -1040,23 +1049,21 @@ PG::do_osd_ops_execute(
10401049
std::move(log_entries));
10411050
});
10421051
}).safe_then_unpack_interruptible(
1043-
[success_func=std::move(success_func), rollbacker, this, failure_func_ptr]
1052+
[success_func=std::move(success_func), rollbacker, this, failure_func_ptr, obc]
10441053
(auto submitted_fut, auto _all_completed_fut) mutable {
10451054

10461055
auto all_completed_fut = _all_completed_fut.safe_then_interruptible_tuple(
10471056
std::move(success_func),
10481057
crimson::ct_error::object_corrupted::handle(
1049-
[rollbacker, this] (const std::error_code& e) mutable {
1058+
[rollbacker, this, obc] (const std::error_code& e) mutable {
10501059
// this is a path for EIO. it's special because we want to fix the obejct
10511060
// and try again. that is, the layer above `PG::do_osd_ops` is supposed to
10521061
// restart the execution.
1053-
return rollbacker.rollback_obc_if_modified(e).then_interruptible(
1054-
[obc=rollbacker.get_obc(), this] {
1055-
return repair_object(obc->obs.oi.soid,
1056-
obc->obs.oi.version
1057-
).then_interruptible([] {
1058-
return do_osd_ops_iertr::future<Ret>{crimson::ct_error::eagain::make()};
1059-
});
1062+
rollbacker.rollback_obc_if_modified(e);
1063+
return repair_object(obc->obs.oi.soid,
1064+
obc->obs.oi.version
1065+
).then_interruptible([] {
1066+
return do_osd_ops_iertr::future<Ret>{crimson::ct_error::eagain::make()};
10601067
});
10611068
}), OpsExecuter::osd_op_errorator::all_same_way(
10621069
[rollbacker, failure_func_ptr]
@@ -1065,11 +1072,8 @@ PG::do_osd_ops_execute(
10651072
ceph_assert(e.value() == EDQUOT ||
10661073
e.value() == ENOSPC ||
10671074
e.value() == EAGAIN);
1068-
return rollbacker.rollback_obc_if_modified(e).then_interruptible(
1069-
[e, failure_func_ptr] {
1070-
// no need to record error log
1071-
return (*failure_func_ptr)(e);
1072-
});
1075+
rollbacker.rollback_obc_if_modified(e);
1076+
return (*failure_func_ptr)(e);
10731077
}));
10741078

10751079
return PG::do_osd_ops_iertr::make_ready_future<pg_rep_op_fut_t<Ret>>(
@@ -1081,45 +1085,42 @@ PG::do_osd_ops_execute(
10811085
rollbacker, failure_func_ptr]
10821086
(const std::error_code& e) mutable {
10831087
ceph_tid_t rep_tid = shard_services.get_tid();
1084-
return rollbacker.rollback_obc_if_modified(e).then_interruptible(
1085-
[&, op_info, m, obc,
1086-
this, e, rep_tid, failure_func_ptr] {
1087-
// record error log
1088-
auto maybe_submit_error_log =
1089-
seastar::make_ready_future<std::optional<eversion_t>>(std::nullopt);
1090-
// call submit_error_log only for non-internal clients
1091-
if constexpr (!std::is_same_v<Ret, void>) {
1092-
if(op_info.may_write()) {
1093-
maybe_submit_error_log =
1094-
submit_error_log(m, op_info, obc, e, rep_tid);
1095-
}
1088+
rollbacker.rollback_obc_if_modified(e);
1089+
// record error log
1090+
auto maybe_submit_error_log =
1091+
interruptor::make_ready_future<std::optional<eversion_t>>(std::nullopt);
1092+
// call submit_error_log only for non-internal clients
1093+
if constexpr (!std::is_same_v<Ret, void>) {
1094+
if(op_info.may_write()) {
1095+
maybe_submit_error_log =
1096+
submit_error_log(m, op_info, obc, e, rep_tid);
10961097
}
1097-
return maybe_submit_error_log.then(
1098-
[this, failure_func_ptr, e, rep_tid] (auto version) {
1099-
auto all_completed =
1100-
[this, failure_func_ptr, e, rep_tid, version] {
1101-
if (version.has_value()) {
1102-
return complete_error_log(rep_tid, version.value()).then(
1103-
[failure_func_ptr, e] {
1104-
return (*failure_func_ptr)(e);
1105-
});
1106-
} else {
1098+
}
1099+
return maybe_submit_error_log.then_interruptible(
1100+
[this, failure_func_ptr, e, rep_tid] (auto version) {
1101+
auto all_completed =
1102+
[this, failure_func_ptr, e, rep_tid, version] {
1103+
if (version.has_value()) {
1104+
return complete_error_log(rep_tid, version.value()
1105+
).then_interruptible([failure_func_ptr, e] {
11071106
return (*failure_func_ptr)(e);
1108-
}
1109-
};
1110-
return PG::do_osd_ops_iertr::make_ready_future<pg_rep_op_fut_t<Ret>>(
1111-
std::move(seastar::now()),
1112-
std::move(all_completed())
1113-
);
1114-
});
1107+
});
1108+
} else {
1109+
return (*failure_func_ptr)(e);
1110+
}
1111+
};
1112+
return PG::do_osd_ops_iertr::make_ready_future<pg_rep_op_fut_t<Ret>>(
1113+
std::move(seastar::now()),
1114+
std::move(all_completed())
1115+
);
11151116
});
11161117
}));
11171118
}
11181119

1119-
seastar::future<> PG::complete_error_log(const ceph_tid_t& rep_tid,
1120+
PG::interruptible_future<> PG::complete_error_log(const ceph_tid_t& rep_tid,
11201121
const eversion_t& version)
11211122
{
1122-
auto result = seastar::now();
1123+
auto result = interruptor::now();
11231124
auto last_complete = peering_state.get_info().last_complete;
11241125
ceph_assert(log_entry_update_waiting_on.contains(rep_tid));
11251126
auto& log_update = log_entry_update_waiting_on[rep_tid];
@@ -1133,8 +1134,9 @@ seastar::future<> PG::complete_error_log(const ceph_tid_t& rep_tid,
11331134
} else {
11341135
logger().debug("complete_error_log: rep_tid {} awaiting update from {}",
11351136
rep_tid, log_update.waiting_on);
1136-
result = log_update.all_committed.get_shared_future().then(
1137-
[this, last_complete, rep_tid, version] {
1137+
result = interruptor::make_interruptible(
1138+
log_update.all_committed.get_shared_future()
1139+
).then_interruptible([this, last_complete, rep_tid, version] {
11381140
logger().debug("complete_error_log: rep_tid {} awaited ", rep_tid);
11391141
peering_state.complete_write(version, last_complete);
11401142
ceph_assert(!log_entry_update_waiting_on.contains(rep_tid));
@@ -1144,7 +1146,7 @@ seastar::future<> PG::complete_error_log(const ceph_tid_t& rep_tid,
11441146
return result;
11451147
}
11461148

1147-
seastar::future<std::optional<eversion_t>> PG::submit_error_log(
1149+
PG::interruptible_future<std::optional<eversion_t>> PG::submit_error_log(
11481150
Ref<MOSDOp> m,
11491151
const OpInfo &op_info,
11501152
ObjectContextRef obc,
@@ -1175,7 +1177,7 @@ seastar::future<std::optional<eversion_t>> PG::submit_error_log(
11751177

11761178
return seastar::do_with(log_entries, set<pg_shard_t>{},
11771179
[this, t=std::move(t), rep_tid](auto& log_entries, auto& waiting_on) mutable {
1178-
return seastar::do_for_each(get_acting_recovery_backfill(),
1180+
return interruptor::do_for_each(get_acting_recovery_backfill(),
11791181
[this, log_entries, waiting_on, rep_tid]
11801182
(auto& i) mutable {
11811183
pg_shard_t peer(i);
@@ -1200,7 +1202,7 @@ seastar::future<std::optional<eversion_t>> PG::submit_error_log(
12001202
return shard_services.send_to_osd(peer.osd,
12011203
std::move(log_m),
12021204
get_osdmap_epoch());
1203-
}).then([this, waiting_on, t=std::move(t), rep_tid] () mutable {
1205+
}).then_interruptible([this, waiting_on, t=std::move(t), rep_tid] () mutable {
12041206
waiting_on.insert(pg_whoami);
12051207
logger().debug("submit_error_log: inserting rep_tid {}", rep_tid);
12061208
log_entry_update_waiting_on.insert(

src/crimson/osd/pg.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -619,9 +619,9 @@ class PG : public boost::intrusive_ref_counter<
619619

620620
void print(std::ostream& os) const;
621621
void dump_primary(Formatter*);
622-
seastar::future<> complete_error_log(const ceph_tid_t& rep_tid,
622+
interruptible_future<> complete_error_log(const ceph_tid_t& rep_tid,
623623
const eversion_t& version);
624-
seastar::future<std::optional<eversion_t>> submit_error_log(
624+
interruptible_future<std::optional<eversion_t>> submit_error_log(
625625
Ref<MOSDOp> m,
626626
const OpInfo &op_info,
627627
ObjectContextRef obc,

src/crimson/osd/pg_backend.cc

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@
3030
#include "replicated_recovery_backend.h"
3131
#include "ec_backend.h"
3232
#include "exceptions.h"
33+
#include "osd/object_state_fmt.h"
3334

3435
namespace {
3536
seastar::logger& logger() {
@@ -928,6 +929,7 @@ PGBackend::create_iertr::future<> PGBackend::create(
928929
ceph::os::Transaction& txn,
929930
object_stat_sum_t& delta_stats)
930931
{
932+
logger().debug("{} obc existed: {}, osd_op {}", __func__, os, osd_op);
931933
if (os.exists && !os.oi.is_whiteout() &&
932934
(osd_op.op.flags & CEPH_OSD_OP_FLAG_EXCL)) {
933935
// this is an exclusive create

src/osd/osd_types.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4268,6 +4268,7 @@ struct OSDOp {
42684268
}
42694269
};
42704270
std::ostream& operator<<(std::ostream& out, const OSDOp& op);
4271+
template <> struct fmt::formatter<OSDOp> : fmt::ostream_formatter {};
42714272

42724273
struct pg_log_op_return_item_t {
42734274
int32_t rval;

0 commit comments

Comments
 (0)