Skip to content

Commit b6d2724

Browse files
committed
crimson/os/seastore/omap_manager: only mutate the parent when
merge/balance can proceed Fixes: https://tracker.ceph.com/issues/71448 Signed-off-by: Xuehan Xu <[email protected]>
1 parent 2530126 commit b6d2724

File tree

3 files changed

+46
-34
lines changed

3 files changed

+46
-34
lines changed

src/crimson/os/seastore/omap_manager/btree/omap_btree_node.h

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -102,10 +102,11 @@ struct OMapNode : LogicalChildNode {
102102

103103
using make_balanced_iertr = base_iertr;
104104
using make_balanced_ret = make_balanced_iertr::future
105-
<std::tuple<OMapNodeRef, OMapNodeRef, std::optional<std::string>>>;
105+
<std::tuple<OMapNodeRef, OMapNodeRef, std::string>>;
106106
virtual make_balanced_ret make_balanced(
107107
omap_context_t oc,
108-
OMapNodeRef _right) = 0;
108+
OMapNodeRef _right,
109+
uint32_t pivot_idx) = 0;
109110

110111
virtual omap_node_meta_t get_node_meta() const = 0;
111112
virtual bool extent_will_overflow(

src/crimson/os/seastore/omap_manager/btree/omap_btree_node_impl.cc

Lines changed: 40 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -396,29 +396,24 @@ OMapInnerNode::make_full_merge(omap_context_t oc, OMapNodeRef right)
396396
}
397397

398398
OMapInnerNode::make_balanced_ret
399-
OMapInnerNode::make_balanced(omap_context_t oc, OMapNodeRef _right)
399+
OMapInnerNode::make_balanced(
400+
omap_context_t oc, OMapNodeRef _right, uint32_t pivot_idx)
400401
{
401402
LOG_PREFIX(OMapInnerNode::make_balanced);
402403
DEBUGT("l: {}, r: {}", oc.t, *this, *_right);
403404
ceph_assert(_right->get_type() == TYPE);
404405
auto &right = *_right->cast<OMapInnerNode>();
405-
auto pivot_idx = get_balance_pivot_idx(*this, right);
406-
if (!pivot_idx) {
407-
return make_balanced_ret(
408-
interruptible::ready_future_marker{},
409-
std::make_tuple(OMapNodeRef{}, OMapNodeRef{}, std::nullopt));
410-
}
411406
return oc.tm.alloc_extents<OMapInnerNode>(oc.t, oc.hint,
412407
OMAP_INNER_BLOCK_SIZE, 2)
413408
.si_then([this, &right, pivot_idx, oc] (auto &&replacement_pair){
414409
auto replacement_left = replacement_pair.front();
415410
auto replacement_right = replacement_pair.back();
416-
this->balance_child_ptrs(oc.t, *this, right, *pivot_idx,
411+
this->balance_child_ptrs(oc.t, *this, right, pivot_idx,
417412
*replacement_left, *replacement_right);
418413
return make_balanced_ret(
419414
interruptible::ready_future_marker{},
420415
std::make_tuple(replacement_left, replacement_right,
421-
balance_into_new_nodes(*this, right, *pivot_idx,
416+
balance_into_new_nodes(*this, right, pivot_idx,
422417
*replacement_left, *replacement_right)));
423418
}).handle_error_interruptible(
424419
crimson::ct_error::enospc::assert_failure{"unexpected enospc"},
@@ -435,6 +430,12 @@ OMapInnerNode::do_merge(
435430
OMapNodeRef r)
436431
{
437432
LOG_PREFIX(OMapInnerNode::do_merge);
433+
if (!is_mutable()) {
434+
auto mut = oc.tm.get_mutable_extent(oc.t, this)->cast<OMapInnerNode>();
435+
auto mut_liter = mut->iter_idx(liter->get_offset());
436+
auto mut_riter = mut->iter_idx(riter->get_offset());
437+
return mut->do_merge(oc, mut_liter, mut_riter, l, r);
438+
}
438439
DEBUGT("make_full_merge l {} r {} liter {} riter {}",
439440
oc.t, *l, *r, liter->get_key(), riter->get_key());
440441
return l->make_full_merge(oc, r
@@ -490,19 +491,33 @@ OMapInnerNode::do_balance(
490491
OMapNodeRef r)
491492
{
492493
LOG_PREFIX(OMapInnerNode::do_balance);
494+
std::optional<uint32_t> pivot_idx = 0;
495+
if (get_meta().depth > 2) {
496+
pivot_idx = OMapInnerNode::get_balance_pivot_idx(
497+
static_cast<OMapInnerNode&>(*l), static_cast<OMapInnerNode&>(*r));
498+
} else {
499+
pivot_idx = OMapLeafNode::get_balance_pivot_idx(
500+
static_cast<OMapLeafNode&>(*l), static_cast<OMapLeafNode&>(*r));
501+
}
502+
if (!pivot_idx) {
503+
return merge_entry_ret(
504+
interruptible::ready_future_marker{},
505+
mutation_result_t(mutation_status_t::SUCCESS,
506+
std::nullopt, std::nullopt));
507+
}
508+
if (!is_mutable()) {
509+
auto mut = oc.tm.get_mutable_extent(oc.t, this)->cast<OMapInnerNode>();
510+
auto mut_liter = mut->iter_idx(liter->get_offset());
511+
auto mut_riter = mut->iter_idx(riter->get_offset());
512+
return mut->do_balance(oc, mut_liter, mut_riter, l, r);
513+
}
493514
DEBUGT("balanced l {} r {} liter {} riter {}",
494515
oc.t, *l, *r, liter->get_key(), riter->get_key());
495-
return l->make_balanced(oc, r
516+
return l->make_balanced(oc, r, *pivot_idx
496517
).si_then([FNAME, liter=liter, riter=riter, l=l, r=r, oc, this](auto tuple) {
497518
auto [replacement_l, replacement_r, replacement_pivot] = tuple;
498-
if (!replacement_pivot) {
499-
return merge_entry_ret(
500-
interruptible::ready_future_marker{},
501-
mutation_result_t(mutation_status_t::SUCCESS,
502-
std::nullopt, std::nullopt));
503-
}
504-
replacement_l->init_range(l->get_begin(), *replacement_pivot);
505-
replacement_r->init_range(*replacement_pivot, r->get_end());
519+
replacement_l->init_range(l->get_begin(), replacement_pivot);
520+
replacement_r->init_range(replacement_pivot, r->get_end());
506521
DEBUGT("to update parent: {} {} {}",
507522
oc.t, *this, *replacement_l, *replacement_r);
508523
if (get_meta().depth > 2) { // l and r are inner nodes
@@ -522,7 +537,7 @@ OMapInnerNode::do_balance(
522537
liter,
523538
replacement_l->get_laddr(),
524539
maybe_get_delta_buffer());
525-
bool overflow = extent_will_overflow(replacement_pivot->size(),
540+
bool overflow = extent_will_overflow(replacement_pivot.size(),
526541
std::nullopt);
527542
if (!overflow) {
528543
this->update_child_ptr(
@@ -532,7 +547,7 @@ OMapInnerNode::do_balance(
532547
journal_inner_insert(
533548
riter,
534549
replacement_r->get_laddr(),
535-
*replacement_pivot,
550+
replacement_pivot,
536551
maybe_get_delta_buffer());
537552
std::vector<laddr_t> dec_laddrs{l->get_laddr(), r->get_laddr()};
538553
return dec_ref(oc, dec_laddrs
@@ -550,7 +565,7 @@ OMapInnerNode::do_balance(
550565
this->remove_child_ptr(riter.get_offset());
551566
journal_inner_remove(riter, maybe_get_delta_buffer());
552567
return make_split_insert(
553-
oc, riter, *replacement_pivot, replacement_r
568+
oc, riter, replacement_pivot, replacement_r
554569
).si_then([this, oc, l = l, r = r](auto mresult) {
555570
std::vector<laddr_t> dec_laddrs{
556571
l->get_laddr(),
@@ -574,17 +589,11 @@ OMapInnerNode::merge_entry(
574589
{
575590
LOG_PREFIX(OMapInnerNode::merge_entry);
576591
DEBUGT("{}, parent: {}", oc.t, *entry, *this);
577-
if (!is_mutable()) {
578-
auto mut = oc.tm.get_mutable_extent(oc.t, this)->cast<OMapInnerNode>();
579-
auto mut_iter = mut->iter_idx(iter->get_offset());
580-
return mut->merge_entry(oc, mut_iter, entry);
581-
}
582592
auto is_left = (iter + 1) == iter_cend();
583593
auto donor_iter = is_left ? iter - 1 : iter + 1;
584594
return get_child_node(oc, donor_iter
585595
).si_then([=, this](auto &&donor) mutable {
586596
ceph_assert(!donor->is_btree_root());
587-
LOG_PREFIX(OMapInnerNode::merge_entry);
588597
auto [l, r] = is_left ?
589598
std::make_pair(donor, entry) : std::make_pair(entry, donor);
590599
auto [liter, riter] = is_left ?
@@ -826,13 +835,14 @@ OMapLeafNode::make_full_merge(omap_context_t oc, OMapNodeRef right)
826835
}
827836

828837
OMapLeafNode::make_balanced_ret
829-
OMapLeafNode::make_balanced(omap_context_t oc, OMapNodeRef _right)
838+
OMapLeafNode::make_balanced(
839+
omap_context_t oc, OMapNodeRef _right, uint32_t pivot_idx)
830840
{
831841
ceph_assert(_right->get_type() == TYPE);
832842
LOG_PREFIX(OMapLeafNode::make_balanced);
833843
DEBUGT("this: {}", oc.t, *this);
834844
return oc.tm.alloc_extents<OMapLeafNode>(oc.t, oc.hint, get_len(), 2)
835-
.si_then([this, _right] (auto &&replacement_pair) {
845+
.si_then([this, _right, pivot_idx] (auto &&replacement_pair) {
836846
auto replacement_left = replacement_pair.front();
837847
auto replacement_right = replacement_pair.back();
838848
auto &right = *_right->cast<OMapLeafNode>();
@@ -841,7 +851,7 @@ OMapLeafNode::make_balanced(omap_context_t oc, OMapNodeRef _right)
841851
std::make_tuple(
842852
replacement_left, replacement_right,
843853
balance_into_new_nodes(
844-
*this, right,
854+
*this, right, pivot_idx,
845855
*replacement_left, *replacement_right)));
846856
}).handle_error_interruptible(
847857
crimson::ct_error::enospc::assert_failure{"unexpected enospc"},

src/crimson/os/seastore/omap_manager/btree/omap_btree_node_impl.h

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -189,7 +189,7 @@ struct OMapInnerNode
189189
omap_context_t oc, OMapNodeRef right) final;
190190

191191
make_balanced_ret make_balanced(
192-
omap_context_t oc, OMapNodeRef right) final;
192+
omap_context_t oc, OMapNodeRef right, uint32_t pivot_idx) final;
193193

194194
using make_split_insert_iertr = base_iertr;
195195
using make_split_insert_ret = make_split_insert_iertr::future<mutation_result_t>;
@@ -436,7 +436,8 @@ struct OMapLeafNode
436436

437437
make_balanced_ret make_balanced(
438438
omap_context_t oc,
439-
OMapNodeRef _right) final;
439+
OMapNodeRef _right,
440+
uint32_t pivot_idx) final;
440441

441442
static constexpr extent_types_t TYPE = extent_types_t::OMAP_LEAF;
442443
extent_types_t get_type() const final {

0 commit comments

Comments
 (0)