Skip to content

Commit 681c4c5

Browse files
authored
Merge pull request ceph#55831 from xxhdx1985126/wip-seastore-lba-pointer-consolidation
crimson/os/seastore: lba pointer consolidation Reviewed-by: Yingxin Cheng <[email protected]>
2 parents 75dca86 + bfc2aba commit 681c4c5

File tree

10 files changed

+139
-112
lines changed

10 files changed

+139
-112
lines changed

src/crimson/os/seastore/btree/fixed_kv_btree.h

Lines changed: 23 additions & 51 deletions
Original file line numberDiff line numberDiff line change
@@ -505,6 +505,7 @@ class FixedKVBtree {
505505
Transaction::get_extent_ret ret;
506506

507507
if constexpr (std::is_base_of_v<typename internal_node_t::base_t, child_node_t>) {
508+
assert(i->get_val() != P_ADDR_ZERO);
508509
ret = c.trans.get_extent(
509510
i->get_val().maybe_relative_to(node->get_paddr()),
510511
&child_node);
@@ -515,6 +516,9 @@ class FixedKVBtree {
515516
ret = c.trans.get_extent(
516517
i->get_val().pladdr.get_paddr().maybe_relative_to(node->get_paddr()),
517518
&child_node);
519+
if (i->get_val().pladdr.get_paddr() == P_ADDR_ZERO) {
520+
assert(ret == Transaction::get_extent_ret::ABSENT);
521+
}
518522
}
519523
if (ret == Transaction::get_extent_ret::PRESENT) {
520524
if (child_node->is_stable_written()) {
@@ -533,7 +537,7 @@ class FixedKVBtree {
533537
}
534538
} else if (child_node->is_pending()) {
535539
if (child_node->is_mutation_pending()) {
536-
auto &prior = (child_node_t &)*child_node->prior_instance;
540+
auto &prior = (child_node_t &)*child_node->get_prior_instance();
537541
assert(prior.is_valid());
538542
assert(prior.is_parent_valid());
539543
if (node->is_mutation_pending()) {
@@ -565,18 +569,8 @@ class FixedKVBtree {
565569
auto pos = n.lower_bound_offset(i->get_key());
566570
assert(pos < n.get_node_size());
567571
child = n.children[pos];
568-
if (is_valid_child_ptr(child)) {
569-
auto c = (child_node_t*)child;
570-
assert(c->has_parent_tracker());
571-
assert(c->get_parent_node().get() == &n);
572-
}
573572
} else {
574573
child = node->children[i->get_offset()];
575-
if (is_valid_child_ptr(child)) {
576-
auto c = (child_node_t*)child;
577-
assert(c->has_parent_tracker());
578-
assert(c->get_parent_node().get() == node.get());
579-
}
580574
}
581575

582576
if (!is_valid_child_ptr(child)) {
@@ -592,6 +586,23 @@ class FixedKVBtree {
592586
: true);
593587
}
594588
}
589+
if (child == get_reserved_ptr()) {
590+
if constexpr(
591+
!std::is_base_of_v<typename internal_node_t::base_t,
592+
child_node_t>) {
593+
assert(i->get_val().pladdr.is_paddr());
594+
assert(i->get_val().pladdr.get_paddr() == P_ADDR_ZERO);
595+
} else {
596+
ceph_abort();
597+
}
598+
}
599+
} else {
600+
auto c = (child_node_t*)child;
601+
assert(c->has_parent_tracker());
602+
assert(c->get_parent_node().get() == node.get()
603+
|| (node->is_pending() && c->is_stable()
604+
&& c->get_parent_node().get() == &node->get_stable_for_key(
605+
i->get_key())));
595606
}
596607
} else {
597608
ceph_abort("impossible");
@@ -1034,46 +1045,7 @@ class FixedKVBtree {
10341045
fixed_kv_extent.get_user_hint(),
10351046
// get target rewrite generation
10361047
fixed_kv_extent.get_rewrite_generation());
1037-
fixed_kv_extent.get_bptr().copy_out(
1038-
0,
1039-
fixed_kv_extent.get_length(),
1040-
n_fixed_kv_extent->get_bptr().c_str());
1041-
n_fixed_kv_extent->set_modify_time(fixed_kv_extent.get_modify_time());
1042-
n_fixed_kv_extent->range = n_fixed_kv_extent->get_node_meta();
1043-
n_fixed_kv_extent->set_last_committed_crc(fixed_kv_extent.get_last_committed_crc());
1044-
1045-
if (fixed_kv_extent.get_type() == internal_node_t::TYPE ||
1046-
leaf_node_t::do_has_children) {
1047-
if (!fixed_kv_extent.is_pending()) {
1048-
n_fixed_kv_extent->copy_sources.emplace(&fixed_kv_extent);
1049-
n_fixed_kv_extent->prior_instance = &fixed_kv_extent;
1050-
} else {
1051-
ceph_assert(fixed_kv_extent.is_mutation_pending());
1052-
n_fixed_kv_extent->copy_sources.emplace(
1053-
(typename internal_node_t::base_t*
1054-
)fixed_kv_extent.get_prior_instance().get());
1055-
n_fixed_kv_extent->children = std::move(fixed_kv_extent.children);
1056-
n_fixed_kv_extent->prior_instance = fixed_kv_extent.get_prior_instance();
1057-
n_fixed_kv_extent->adjust_ptracker_for_children();
1058-
}
1059-
}
1060-
1061-
/* This is a bit underhanded. Any relative addrs here must necessarily
1062-
* be record relative as we are rewriting a dirty extent. Thus, we
1063-
* are using resolve_relative_addrs with a (likely negative) block
1064-
* relative offset to correct them to block-relative offsets adjusted
1065-
* for our new transaction location.
1066-
*
1067-
* Upon commit, these now block relative addresses will be interpretted
1068-
* against the real final address.
1069-
*/
1070-
if (!n_fixed_kv_extent->get_paddr().is_absolute()) {
1071-
// backend_type_t::SEGMENTED
1072-
assert(n_fixed_kv_extent->get_paddr().is_record_relative());
1073-
n_fixed_kv_extent->resolve_relative_addrs(
1074-
make_record_relative_paddr(0).block_relative_to(
1075-
n_fixed_kv_extent->get_paddr()));
1076-
} // else: backend_type_t::RANDOM_BLOCK
1048+
n_fixed_kv_extent->rewrite(fixed_kv_extent, 0);
10771049

10781050
SUBTRACET(
10791051
seastore_fixedkv_tree,

src/crimson/os/seastore/btree/fixed_kv_node.h

Lines changed: 53 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -59,15 +59,15 @@ struct FixedKVNode : ChildableCachedExtent {
5959
* b. prior_instance is empty
6060
* c. child pointers point at stable children. Child resolution is done
6161
* directly via this array.
62-
* c. copy_sources is empty
62+
* d. copy_sources is empty
6363
* 2. if nodes are mutation_pending:
6464
* a. parent is empty and needs to be fixed upon commit
6565
* b. prior_instance points to its stable version
6666
* c. child pointers are null except for initial_pending() children of
6767
* this transaction. Child resolution is done by first checking this
6868
* array, and then recursively resolving via the parent. We copy child
6969
* pointers from parent on commit.
70-
* c. copy_sources is empty
70+
* d. copy_sources is empty
7171
* 3. if nodes are initial_pending
7272
* a. parent points at its pending parent on this transaction (must exist)
7373
* b. prior_instance is empty or, if it's the result of rewrite, points to
@@ -80,6 +80,8 @@ struct FixedKVNode : ChildableCachedExtent {
8080
* d. copy_sources contains the set of stable nodes at the same tree-level(only
8181
* its "prior_instance" if the node is the result of a rewrite), with which
8282
* the lba range of this node overlaps.
83+
* 4. EXIST_CLEAN and EXIST_MUTATION_PENDING belong to 3 above (except that they
84+
* cannot be rewritten) because their parents must be mutated upon remapping.
8385
*/
8486
std::vector<ChildableCachedExtent*> children;
8587
std::set<FixedKVNodeRef, copy_source_cmp_t> copy_sources;
@@ -157,6 +159,43 @@ struct FixedKVNode : ChildableCachedExtent {
157159
(get_node_size() - offset - 1) * sizeof(ChildableCachedExtent*));
158160
}
159161

162+
virtual bool have_children() const = 0;
163+
164+
void on_rewrite(CachedExtent &extent, extent_len_t off) final {
165+
assert(get_type() == extent.get_type());
166+
assert(off == 0);
167+
auto &foreign_extent = (FixedKVNode&)extent;
168+
range = get_node_meta();
169+
170+
if (have_children()) {
171+
if (!foreign_extent.is_pending()) {
172+
copy_sources.emplace(&foreign_extent);
173+
} else {
174+
ceph_assert(foreign_extent.is_mutation_pending());
175+
copy_sources.emplace(
176+
foreign_extent.get_prior_instance()->template cast<FixedKVNode>());
177+
children = std::move(foreign_extent.children);
178+
adjust_ptracker_for_children();
179+
}
180+
}
181+
182+
/* This is a bit underhanded. Any relative addrs here must necessarily
183+
* be record relative as we are rewriting a dirty extent. Thus, we
184+
* are using resolve_relative_addrs with a (likely negative) block
185+
* relative offset to correct them to block-relative offsets adjusted
186+
* for our new transaction location.
187+
*
188+
* Upon commit, these now block relative addresses will be interpretted
189+
* against the real final address.
190+
*/
191+
if (!get_paddr().is_absolute()) {
192+
// backend_type_t::SEGMENTED
193+
assert(get_paddr().is_record_relative());
194+
resolve_relative_addrs(
195+
make_record_relative_paddr(0).block_relative_to(get_paddr()));
196+
} // else: backend_type_t::RANDOM_BLOCK
197+
}
198+
160199
FixedKVNode& get_stable_for_key(node_key_t key) const {
161200
ceph_assert(is_pending());
162201
if (is_mutation_pending()) {
@@ -242,7 +281,7 @@ struct FixedKVNode : ChildableCachedExtent {
242281
return c.cache.template get_extent_viewable_by_trans<T>(c.trans, (T*)child);
243282
} else if (is_pending()) {
244283
auto &sparent = get_stable_for_key(key);
245-
auto spos = sparent.child_pos_for_key(key);
284+
auto spos = sparent.lower_bound_offset(key);
246285
auto child = sparent.children[spos];
247286
if (is_valid_child_ptr(child)) {
248287
return c.cache.template get_extent_viewable_by_trans<T>(c.trans, (T*)child);
@@ -415,7 +454,6 @@ struct FixedKVNode : ChildableCachedExtent {
415454

416455
virtual uint16_t lower_bound_offset(node_key_t) const = 0;
417456
virtual uint16_t upper_bound_offset(node_key_t) const = 0;
418-
virtual uint16_t child_pos_for_key(node_key_t) const = 0;
419457

420458
virtual bool validate_stable_children() = 0;
421459

@@ -488,10 +526,6 @@ struct FixedKVNode : ChildableCachedExtent {
488526
reset_parent_tracker();
489527
}
490528

491-
bool is_rewrite() {
492-
return is_initial_pending() && get_prior_instance();
493-
}
494-
495529
void on_initial_write() final {
496530
// All in-memory relative addrs are necessarily block-relative
497531
resolve_relative_addrs(get_paddr());
@@ -564,6 +598,10 @@ struct FixedKVInternalNode
564598
: FixedKVNode<NODE_KEY>(rhs),
565599
node_layout_t(this->get_bptr().c_str()) {}
566600

601+
bool have_children() const final {
602+
return true;
603+
}
604+
567605
bool is_leaf_and_has_children() const final {
568606
return false;
569607
}
@@ -648,13 +686,6 @@ struct FixedKVInternalNode
648686
return this->upper_bound(key).get_offset();
649687
}
650688

651-
uint16_t child_pos_for_key(NODE_KEY key) const final {
652-
auto it = this->upper_bound(key);
653-
assert(it != this->begin());
654-
--it;
655-
return it.get_offset();
656-
}
657-
658689
NODE_KEY get_key_from_idx(uint16_t idx) const final {
659690
return this->iter_idx(idx).get_key();
660691
}
@@ -690,7 +721,7 @@ struct FixedKVInternalNode
690721
return CachedExtentRef(new node_type_t(*this));
691722
};
692723

693-
void on_replace_prior(Transaction&) final {
724+
void on_replace_prior() final {
694725
ceph_assert(!this->is_rewrite());
695726
this->set_children_from_prior_instance();
696727
auto &prior = (this_type_t&)(*this->get_prior_instance());
@@ -977,6 +1008,10 @@ struct FixedKVLeafNode
9771008

9781009
static constexpr bool do_has_children = has_children;
9791010

1011+
bool have_children() const final {
1012+
return do_has_children;
1013+
}
1014+
9801015
bool is_leaf_and_has_children() const final {
9811016
return has_children;
9821017
}
@@ -1014,7 +1049,7 @@ struct FixedKVLeafNode
10141049
} else if (this->is_pending()) {
10151050
auto key = this->iter_idx(pos).get_key();
10161051
auto &sparent = this->get_stable_for_key(key);
1017-
auto spos = sparent.child_pos_for_key(key);
1052+
auto spos = sparent.lower_bound_offset(key);
10181053
auto child = sparent.children[spos];
10191054
if (is_valid_child_ptr(child)) {
10201055
ceph_assert(child->is_logical());
@@ -1078,7 +1113,7 @@ struct FixedKVLeafNode
10781113
true);
10791114
}
10801115

1081-
void on_replace_prior(Transaction&) final {
1116+
void on_replace_prior() final {
10821117
ceph_assert(!this->is_rewrite());
10831118
if constexpr (has_children) {
10841119
this->set_children_from_prior_instance();
@@ -1104,10 +1139,6 @@ struct FixedKVLeafNode
11041139
return this->upper_bound(key).get_offset();
11051140
}
11061141

1107-
uint16_t child_pos_for_key(NODE_KEY key) const final {
1108-
return lower_bound_offset(key);
1109-
}
1110-
11111142
NODE_KEY get_key_from_idx(uint16_t idx) const final {
11121143
return this->iter_idx(idx).get_key();
11131144
}

src/crimson/os/seastore/cache.cc

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -829,7 +829,7 @@ void Cache::commit_replace_extent(
829829
add_to_dirty(next);
830830
}
831831

832-
next->on_replace_prior(t);
832+
next->on_replace_prior();
833833
invalidate_extent(t, *prev);
834834
}
835835

@@ -1566,6 +1566,7 @@ void Cache::complete_commit(
15661566
i->on_initial_write();
15671567

15681568
i->state = CachedExtent::extent_state_t::CLEAN;
1569+
i->prior_instance.reset();
15691570
DEBUGT("add extent as fresh, inline={} -- {}",
15701571
t, is_inline, *i);
15711572
const auto t_src = t.get_src();

src/crimson/os/seastore/cached_extent.cc

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -137,7 +137,7 @@ LogicalCachedExtent::~LogicalCachedExtent() {
137137
}
138138
}
139139

140-
void LogicalCachedExtent::on_replace_prior(Transaction &t) {
140+
void LogicalCachedExtent::on_replace_prior() {
141141
assert(is_mutation_pending());
142142
take_prior_parent_tracker();
143143
assert(get_parent_node());

0 commit comments

Comments
 (0)