Skip to content

Commit 063e429

Browse files
committed
crimson/os/seastore/transaction_manager: fix lba mappings before getting
logical extents through them Signed-off-by: Xuehan Xu <[email protected]>
1 parent 10c0f2a commit 063e429

File tree

7 files changed

+128
-26
lines changed

7 files changed

+128
-26
lines changed

src/crimson/os/seastore/btree/btree_range_pin.cc

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,10 @@ bool BtreeNodeMapping<key_t, val_t>::is_stable() const
3131
assert(!this->parent_modified());
3232
assert(pos != std::numeric_limits<uint16_t>::max());
3333
auto &p = (FixedKVNode<key_t>&)*parent;
34-
return p.is_child_stable(ctx, pos);
34+
auto k = this->is_indirect()
35+
? this->get_intermediate_base()
36+
: get_key();
37+
return p.is_child_stable(ctx, pos, k);
3538
}
3639

3740
template <typename key_t, typename val_t>
@@ -40,7 +43,10 @@ bool BtreeNodeMapping<key_t, val_t>::is_data_stable() const
4043
assert(!this->parent_modified());
4144
assert(pos != std::numeric_limits<uint16_t>::max());
4245
auto &p = (FixedKVNode<key_t>&)*parent;
43-
return p.is_child_data_stable(ctx, pos);
46+
auto k = this->is_indirect()
47+
? this->get_intermediate_base()
48+
: get_key();
49+
return p.is_child_data_stable(ctx, pos, k);
4450
}
4551

4652
template class BtreeNodeMapping<laddr_t, paddr_t>;

src/crimson/os/seastore/btree/fixed_kv_node.h

Lines changed: 33 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -265,8 +265,14 @@ struct FixedKVNode : ChildableCachedExtent {
265265
set_child_ptracker(child);
266266
}
267267

268-
virtual bool is_child_stable(op_context_t<node_key_t>, uint16_t pos) const = 0;
269-
virtual bool is_child_data_stable(op_context_t<node_key_t>, uint16_t pos) const = 0;
268+
virtual bool is_child_stable(
269+
op_context_t<node_key_t>,
270+
uint16_t pos,
271+
node_key_t key) const = 0;
272+
virtual bool is_child_data_stable(
273+
op_context_t<node_key_t>,
274+
uint16_t pos,
275+
node_key_t key) const = 0;
270276

271277
template <typename T>
272278
get_child_ret_t<T> get_child(
@@ -275,6 +281,7 @@ struct FixedKVNode : ChildableCachedExtent {
275281
node_key_t key)
276282
{
277283
assert(children.capacity());
284+
assert(key == get_key_from_idx(pos));
278285
auto child = children[pos];
279286
ceph_assert(!is_reserved_ptr(child));
280287
if (is_valid_child_ptr(child)) {
@@ -632,11 +639,17 @@ struct FixedKVInternalNode
632639
}
633640
}
634641

635-
bool is_child_stable(op_context_t<NODE_KEY>, uint16_t pos) const final {
642+
bool is_child_stable(
643+
op_context_t<NODE_KEY>,
644+
uint16_t pos,
645+
NODE_KEY key) const final {
636646
ceph_abort("impossible");
637647
return false;
638648
}
639-
bool is_child_data_stable(op_context_t<NODE_KEY>, uint16_t pos) const final {
649+
bool is_child_data_stable(
650+
op_context_t<NODE_KEY>,
651+
uint16_t pos,
652+
NODE_KEY key) const final {
640653
ceph_abort("impossible");
641654
return false;
642655
}
@@ -1040,14 +1053,25 @@ struct FixedKVLeafNode
10401053
// 2. The child extent is stable
10411054
//
10421055
// For reserved mappings, the return values are undefined.
1043-
bool is_child_stable(op_context_t<NODE_KEY> c, uint16_t pos) const final {
1044-
return _is_child_stable(c, pos);
1056+
bool is_child_stable(
1057+
op_context_t<NODE_KEY> c,
1058+
uint16_t pos,
1059+
NODE_KEY key) const final {
1060+
return _is_child_stable(c, pos, key);
10451061
}
1046-
bool is_child_data_stable(op_context_t<NODE_KEY> c, uint16_t pos) const final {
1047-
return _is_child_stable(c, pos, true);
1062+
bool is_child_data_stable(
1063+
op_context_t<NODE_KEY> c,
1064+
uint16_t pos,
1065+
NODE_KEY key) const final {
1066+
return _is_child_stable(c, pos, key, true);
10481067
}
10491068

1050-
bool _is_child_stable(op_context_t<NODE_KEY> c, uint16_t pos, bool data_only = false) const {
1069+
bool _is_child_stable(
1070+
op_context_t<NODE_KEY> c,
1071+
uint16_t pos,
1072+
NODE_KEY key,
1073+
bool data_only = false) const {
1074+
assert(key == get_key_from_idx(pos));
10511075
auto child = this->children[pos];
10521076
if (is_reserved_ptr(child)) {
10531077
return true;

src/crimson/os/seastore/cached_extent.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1157,6 +1157,10 @@ class PhysicalNodeMapping {
11571157
return false;
11581158
};
11591159

1160+
virtual void maybe_fix_pos() {
1161+
ceph_abort("impossible");
1162+
}
1163+
11601164
virtual ~PhysicalNodeMapping() {}
11611165
protected:
11621166
std::optional<child_pos_t> child_pos = std::nullopt;

src/crimson/os/seastore/lba_manager/btree/btree_lba_manager.h

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,8 @@
2525

2626
namespace crimson::os::seastore::lba_manager::btree {
2727

28+
struct LBALeafNode;
29+
2830
class BtreeLBAMapping : public BtreeNodeMapping<laddr_t, paddr_t> {
2931
// To support cloning, there are two kinds of lba mappings:
3032
// 1. physical lba mapping: the pladdr in the value of which is the paddr of
@@ -166,6 +168,14 @@ class BtreeLBAMapping : public BtreeNodeMapping<laddr_t, paddr_t> {
166168
return p.modified_since(parent_modifications);
167169
}
168170

171+
void maybe_fix_pos() final {
172+
assert(is_parent_valid());
173+
if (!parent_modified()) {
174+
return;
175+
}
176+
auto &p = static_cast<LBALeafNode&>(*parent);
177+
p.maybe_fix_mapping_pos(*this);
178+
}
169179
protected:
170180
std::unique_ptr<BtreeNodeMapping<laddr_t, paddr_t>> _duplicate(
171181
op_context_t<laddr_t> ctx) const final {
@@ -181,6 +191,10 @@ class BtreeLBAMapping : public BtreeNodeMapping<laddr_t, paddr_t> {
181191
return pin;
182192
}
183193
private:
194+
void _new_pos(uint16_t pos) {
195+
this->pos = pos;
196+
}
197+
184198
laddr_t key = L_ADDR_NULL;
185199
bool indirect = false;
186200
laddr_t intermediate_key = L_ADDR_NULL;
@@ -189,6 +203,7 @@ class BtreeLBAMapping : public BtreeNodeMapping<laddr_t, paddr_t> {
189203
pladdr_t raw_val;
190204
lba_map_val_t map_val;
191205
uint64_t parent_modifications = 0;
206+
friend struct LBALeafNode;
192207
};
193208

194209
using BtreeLBAMappingRef = std::unique_ptr<BtreeLBAMapping>;

src/crimson/os/seastore/lba_manager/btree/lba_btree_node.cc

Lines changed: 20 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010
#include "include/buffer.h"
1111
#include "include/byteorder.h"
1212

13-
#include "crimson/os/seastore/lba_manager/btree/lba_btree_node.h"
13+
#include "crimson/os/seastore/lba_manager/btree/btree_lba_manager.h"
1414
#include "crimson/os/seastore/logging.h"
1515

1616
SET_SUBSYS(seastore_lba);
@@ -53,4 +53,23 @@ void LBALeafNode::resolve_relative_addrs(paddr_t base)
5353
}
5454
}
5555

56+
void LBALeafNode::maybe_fix_mapping_pos(BtreeLBAMapping &mapping)
57+
{
58+
assert(mapping.get_parent() == this);
59+
auto key = mapping.is_indirect()
60+
? mapping.get_intermediate_base()
61+
: mapping.get_key();
62+
if (key != iter_idx(mapping.get_pos()).get_key()) {
63+
auto iter = lower_bound(key);
64+
{
65+
// a mapping that no longer exist or has its value
66+
// modified is considered an outdated one, and
67+
// shouldn't be used anymore
68+
ceph_assert(iter != end());
69+
assert(iter.get_val() == mapping.get_map_val());
70+
}
71+
mapping._new_pos(iter.get_offset());
72+
}
73+
}
74+
5675
}

src/crimson/os/seastore/lba_manager/btree/lba_btree_node.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,8 @@ namespace crimson::os::seastore::lba_manager::btree {
2626
using base_iertr = LBAManager::base_iertr;
2727
using LBANode = FixedKVNode<laddr_t>;
2828

29+
class BtreeLBAMapping;
30+
2931
/**
3032
* lba_map_val_t
3133
*
@@ -290,6 +292,8 @@ struct LBALeafNode
290292
}
291293

292294
std::ostream &_print_detail(std::ostream &out) const final;
295+
296+
void maybe_fix_mapping_pos(BtreeLBAMapping &mapping);
293297
};
294298
using LBALeafNodeRef = TCachedExtentRef<LBALeafNode>;
295299

src/crimson/os/seastore/transaction_manager.h

Lines changed: 44 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -181,14 +181,27 @@ class TransactionManager : public ExtentCallbackInterface {
181181
Transaction &t,
182182
LBAMappingRef pin)
183183
{
184-
// checking the lba child must be atomic with creating
185-
// and linking the absent child
186-
auto ret = get_extent_if_linked<T>(t, std::move(pin));
187-
if (ret.index() == 1) {
188-
return std::move(std::get<1>(ret));
184+
auto fut = base_iertr::make_ready_future<LBAMappingRef>();
185+
if (!pin->is_parent_valid()) {
186+
fut = get_pin(t, pin->get_key()
187+
).handle_error_interruptible(
188+
crimson::ct_error::enoent::assert_failure{"unexpected enoent"},
189+
crimson::ct_error::input_output_error::pass_further{}
190+
);
189191
} else {
190-
return this->pin_to_extent<T>(t, std::move(std::get<0>(ret)));
192+
pin->maybe_fix_pos();
193+
fut = base_iertr::make_ready_future<LBAMappingRef>(std::move(pin));
191194
}
195+
return fut.si_then([&t, this](auto npin) mutable {
196+
// checking the lba child must be atomic with creating
197+
// and linking the absent child
198+
auto ret = get_extent_if_linked<T>(t, std::move(npin));
199+
if (ret.index() == 1) {
200+
return std::move(std::get<1>(ret));
201+
} else {
202+
return this->pin_to_extent<T>(t, std::move(std::get<0>(ret)));
203+
}
204+
});
192205
}
193206

194207
template <typename T>
@@ -198,6 +211,8 @@ class TransactionManager : public ExtentCallbackInterface {
198211
LBAMappingRef pin)
199212
{
200213
ceph_assert(pin->is_parent_valid());
214+
// checking the lba child must be atomic with creating
215+
// and linking the absent child
201216
auto v = pin->get_logical_extent(t);
202217
if (v.has_child()) {
203218
return v.get_child_fut().safe_then([pin=std::move(pin)](auto extent) {
@@ -459,16 +474,31 @@ class TransactionManager : public ExtentCallbackInterface {
459474
// The according extent might be stable or pending.
460475
auto fut = base_iertr::now();
461476
if (!pin->is_indirect()) {
462-
auto fut2 = base_iertr::make_ready_future<TCachedExtentRef<T>>();
463-
if (full_extent_integrity_check) {
464-
fut2 = read_pin<T>(t, pin->duplicate());
477+
if (!pin->is_parent_valid()) {
478+
fut = get_pin(t, pin->get_key()
479+
).si_then([&pin](auto npin) {
480+
assert(npin);
481+
pin = std::move(npin);
482+
return seastar::now();
483+
}).handle_error_interruptible(
484+
crimson::ct_error::enoent::assert_failure{"unexpected enoent"},
485+
crimson::ct_error::input_output_error::pass_further{}
486+
);
465487
} else {
466-
auto ret = get_extent_if_linked<T>(t, pin->duplicate());
467-
if (ret.index() == 1) {
468-
fut2 = std::move(std::get<1>(ret));
469-
}
488+
pin->maybe_fix_pos();
470489
}
471-
fut = fut2.si_then([this, &t, &remaps, original_paddr,
490+
491+
fut = fut.si_then([this, &t, &pin] {
492+
if (full_extent_integrity_check) {
493+
return read_pin<T>(t, pin->duplicate());
494+
} else {
495+
auto ret = get_extent_if_linked<T>(t, pin->duplicate());
496+
if (ret.index() == 1) {
497+
return std::move(std::get<1>(ret));
498+
}
499+
}
500+
return base_iertr::make_ready_future<TCachedExtentRef<T>>();
501+
}).si_then([this, &t, &remaps, original_paddr,
472502
original_laddr, original_len,
473503
&extents, FNAME](auto ext) mutable {
474504
ceph_assert(full_extent_integrity_check

0 commit comments

Comments
 (0)