Skip to content

Commit e21fa0d

Browse files
authored
Merge pull request ceph#63218 from xxhdx1985126/wip-seastore-fix-retire-absent
crimson/os/seastore/transaction_manager: Cache::retire_absent_extent_addr should be called immediately after TransactionManager::get_extent_if_linked if necessary Reviewed-by: Yingxin Cheng <[email protected]>
2 parents 6f0c606 + a38ff7c commit e21fa0d

File tree

9 files changed

+188
-51
lines changed

9 files changed

+188
-51
lines changed

src/crimson/os/seastore/btree/fixed_kv_btree.h

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -496,12 +496,12 @@ class FixedKVBtree {
496496
assert(cnode->has_parent_tracker());
497497
if (node->is_pending()) {
498498
auto &n = node->get_stable_for_key(i->get_key());
499-
assert(cnode->get_parent_node().get() == &n);
499+
assert(cnode->peek_parent_node().get() == &n);
500500
auto pos = n.lower_bound(i->get_key()).get_offset();
501501
assert(pos < n.get_size());
502502
assert(n.children[pos] == cnode.get());
503503
} else {
504-
assert(cnode->get_parent_node().get() == node.get());
504+
assert(cnode->peek_parent_node().get() == node.get());
505505
assert(node->children[i->get_offset()] == cnode.get());
506506
}
507507
} else if (child_node->is_pending()) {
@@ -511,12 +511,12 @@ class FixedKVBtree {
511511
assert(prior.is_parent_valid());
512512
if (node->is_mutation_pending()) {
513513
auto &n = node->get_stable_for_key(i->get_key());
514-
assert(prior.get_parent_node().get() == &n);
514+
assert(prior.peek_parent_node().get() == &n);
515515
auto pos = n.lower_bound(i->get_key()).get_offset();
516516
assert(pos < n.get_size());
517517
assert(n.children[pos] == &prior);
518518
} else {
519-
assert(prior.get_parent_node().get() == node.get());
519+
assert(prior.peek_parent_node().get() == node.get());
520520
assert(node->children[i->get_offset()] == &prior);
521521
}
522522
} else {
@@ -567,9 +567,9 @@ class FixedKVBtree {
567567
} else {
568568
auto c = static_cast<child_node_t*>(child);
569569
assert(c->has_parent_tracker());
570-
assert(c->get_parent_node().get() == node.get()
570+
assert(c->peek_parent_node().get() == node.get()
571571
|| (node->is_pending() && c->is_stable()
572-
&& c->get_parent_node().get() == &node->get_stable_for_key(
572+
&& c->peek_parent_node().get() == &node->get_stable_for_key(
573573
i->get_key())));
574574
}
575575
} else {

src/crimson/os/seastore/cache.cc

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1231,6 +1231,7 @@ record_t Cache::prepare_record(
12311231
auto trans_src = t.get_src();
12321232
assert(!t.is_weak());
12331233
assert(trans_src != Transaction::src_t::READ);
1234+
assert(t.read_set.size() + t.num_replace_placeholder == t.read_items.size());
12341235

12351236
auto& efforts = get_by_src(stats.committed_efforts_by_src,
12361237
trans_src);
@@ -1247,7 +1248,7 @@ record_t Cache::prepare_record(
12471248
i.ref->get_type()).increment(i.ref->get_length());
12481249
read_stat.increment(i.ref->get_length());
12491250
}
1250-
t.read_set.clear();
1251+
t.clear_read_set();
12511252
t.write_set.clear();
12521253

12531254
record_t record(record_type_t::JOURNAL, trans_src);
@@ -1365,6 +1366,7 @@ record_t Cache::prepare_record(
13651366
// retiering extents, this is because logical linked tree
13661367
// nodes needs to access their prior instances in this
13671368
// phase if they are rewritten.
1369+
e->set_io_wait();
13681370
e->prepare_commit();
13691371
});
13701372

@@ -1796,6 +1798,7 @@ void Cache::complete_commit(
17961798
assert(!i->is_dirty());
17971799
const auto t_src = t.get_src();
17981800
touch_extent(*i, &t_src, t.get_cache_hint());
1801+
i->complete_io();
17991802
epm.commit_space_used(i->get_paddr(), i->get_length());
18001803

18011804
// Note: commit extents and backref allocations in the same place

src/crimson/os/seastore/cache.h

Lines changed: 26 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -471,11 +471,19 @@ class Cache : public ExtentTransViewRetriever {
471471

472472
const auto t_src = t.get_src();
473473
auto ext_type = extent->get_type();
474+
// FIXME: retired-placeholder isn't linked in the lba tree yet.
475+
// We think it's still working because:
476+
// 1. A retired-placeholder must be an ObjectDataBlock
477+
// 2. Per rados object, no read is possible during write,
478+
// and write cannot be parallel
479+
assert(!is_retired_placeholder_type(ext_type));
474480
cache_access_stats_t& access_stats = get_by_ext(
475481
get_by_src(stats.access_by_src_ext, t_src),
476482
ext_type);
477483

478484
CachedExtent* p_extent;
485+
bool needs_step_2 = false;
486+
bool needs_touch = false;
479487
if (extent->is_stable()) {
480488
p_extent = extent->get_transactional_view(t);
481489
if (p_extent != extent.get()) {
@@ -495,16 +503,22 @@ class Cache : public ExtentTransViewRetriever {
495503
} else {
496504
// stable from trans-view
497505
assert(!p_extent->is_pending_in_trans(t.get_trans_id()));
498-
if (t.maybe_add_to_read_set(p_extent)) {
506+
auto ret = t.maybe_add_to_read_set(p_extent);
507+
if (ret.added) {
499508
if (p_extent->is_dirty()) {
500509
++access_stats.cache_dirty;
501510
++stats.access.cache_dirty;
502511
} else {
503512
++access_stats.cache_lru;
504513
++stats.access.cache_lru;
505514
}
506-
touch_extent(*p_extent, &t_src, t.get_cache_hint());
515+
if (ret.is_paddr_known) {
516+
touch_extent(*p_extent, &t_src, t.get_cache_hint());
517+
} else {
518+
needs_touch = true;
519+
}
507520
} else {
521+
// already exists
508522
if (p_extent->is_dirty()) {
509523
++access_stats.trans_dirty;
510524
++stats.access.trans_dirty;
@@ -513,6 +527,9 @@ class Cache : public ExtentTransViewRetriever {
513527
++stats.access.trans_lru;
514528
}
515529
}
530+
// step 2 maybe reordered after wait_io(),
531+
// always try step 2 if paddr unknown
532+
needs_step_2 = !ret.is_paddr_known;
516533
}
517534
} else {
518535
assert(!extent->is_stable_writting());
@@ -538,7 +555,13 @@ class Cache : public ExtentTransViewRetriever {
538555

539556
return trans_intr::make_interruptible(
540557
p_extent->wait_io()
541-
).then_interruptible([p_extent] {
558+
).then_interruptible([p_extent, needs_touch, needs_step_2, &t, this, &t_src] {
559+
if (needs_step_2) {
560+
t.maybe_add_to_read_set_step_2(p_extent);
561+
}
562+
if (needs_touch) {
563+
touch_extent(*p_extent, &t_src, t.get_cache_hint());
564+
}
542565
return get_extent_iertr::make_ready_future<CachedExtentRef>(
543566
CachedExtentRef(p_extent));
544567
});

src/crimson/os/seastore/cached_extent.h

Lines changed: 26 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -67,11 +67,16 @@ class read_set_item_t {
6767
using set_hook_t = boost::intrusive::set_member_hook<
6868
boost::intrusive::link_mode<
6969
boost::intrusive::auto_unlink>>;
70-
set_hook_t trans_hook;
71-
using set_hook_options = boost::intrusive::member_hook<
70+
set_hook_t trans_hook; // used to attach transactions to extents
71+
set_hook_t extent_hook; // used to attach extents to transactions
72+
using trans_hook_options = boost::intrusive::member_hook<
7273
read_set_item_t,
7374
set_hook_t,
7475
&read_set_item_t::trans_hook>;
76+
using extent_hook_options = boost::intrusive::member_hook<
77+
read_set_item_t,
78+
set_hook_t,
79+
&read_set_item_t::extent_hook>;
7580

7681
public:
7782
struct extent_cmp_t {
@@ -101,23 +106,34 @@ class read_set_item_t {
101106

102107
using trans_set_t = boost::intrusive::set<
103108
read_set_item_t,
104-
set_hook_options,
109+
trans_hook_options,
105110
boost::intrusive::constant_time_size<false>,
106111
boost::intrusive::compare<trans_cmp_t>>;
112+
using extent_set_t = boost::intrusive::set<
113+
read_set_item_t,
114+
extent_hook_options,
115+
boost::intrusive::constant_time_size<false>,
116+
boost::intrusive::compare<extent_cmp_t>>;
107117

108118
T *t = nullptr;
109119
CachedExtentRef ref;
110120

121+
bool is_extent_attached_to_trans() const {
122+
return extent_hook.is_linked();
123+
}
124+
125+
bool is_trans_attached_to_extent() const {
126+
return trans_hook.is_linked();
127+
}
128+
111129
read_set_item_t(T *t, CachedExtentRef ref);
112130
read_set_item_t(const read_set_item_t &) = delete;
113131
read_set_item_t(read_set_item_t &&) = default;
114132
~read_set_item_t() = default;
115133
};
116134

117135
template <typename T>
118-
using read_extent_set_t = std::set<
119-
read_set_item_t<T>,
120-
typename read_set_item_t<T>::extent_cmp_t>;
136+
using read_extent_set_t = typename read_set_item_t<T>::extent_set_t;
121137

122138
template <typename T>
123139
using read_trans_set_t = typename read_set_item_t<T>::trans_set_t;
@@ -549,12 +565,12 @@ class CachedExtent
549565
}
550566

551567
bool is_stable_writting() const {
552-
// MUTATION_PENDING and under-io extents are already stable and visible,
553-
// see prepare_record().
568+
// MUTATION_PENDING/INITIAL_WRITE_PENDING and under-io extents are already
569+
// stable and visible, see prepare_record().
554570
//
555-
// XXX: It might be good to mark this case as DIRTY from the definition,
571+
// XXX: It might be good to mark this case as DIRTY/CLEAN from the definition,
556572
// which probably can make things simpler.
557-
return is_mutation_pending() && is_pending_io();
573+
return (is_mutation_pending() || is_initial_pending()) && is_pending_io();
558574
}
559575

560576
/// Returns true if extent is stable and shared among transactions

src/crimson/os/seastore/lba/btree_lba_manager.cc

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1091,7 +1091,7 @@ BtreeLBAManager::_update_mapping(
10911091
}
10921092
assert(!nextent ||
10931093
(nextent->has_parent_tracker() &&
1094-
nextent->get_parent_node().get() == iter.get_leaf_node().get()));
1094+
nextent->peek_parent_node().get() == iter.get_leaf_node().get()));
10951095
return update_mapping_ret_bare_t(iter.get_cursor(c));
10961096
});
10971097
}

src/crimson/os/seastore/linked_tree_node.h

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -196,7 +196,10 @@ class BaseChildNode {
196196
bool is_parent_valid() const {
197197
return parent_tracker && parent_tracker->is_valid();
198198
}
199-
TCachedExtentRef<ParentT> get_parent_node() const {
199+
// this method should only be used for asserts and logs, because
200+
// the parent node might be stable writing and should "wait_io"
201+
// before further access
202+
TCachedExtentRef<ParentT> peek_parent_node() const {
200203
assert(parent_tracker);
201204
return parent_tracker->get_parent();
202205
}
@@ -999,7 +1002,7 @@ class ChildNode : public BaseChildNode<ParentT, key_t> {
9991002
assert(!down_cast().is_btree_root());
10001003
assert(this->has_parent_tracker());
10011004
auto off = get_parent_pos();
1002-
auto parent = this->get_parent_node();
1005+
auto parent = this->peek_parent_node();
10031006
assert(parent->children[off] == &down_cast());
10041007
parent->children[off] = nullptr;
10051008
}
@@ -1018,15 +1021,15 @@ class ChildNode : public BaseChildNode<ParentT, key_t> {
10181021
this->parent_tracker = prior.BaseChildNode<ParentT, key_t>::parent_tracker;
10191022
assert(this->has_parent_tracker());
10201023
auto off = get_parent_pos();
1021-
auto parent = this->get_parent_node();
1024+
auto parent = this->peek_parent_node();
10221025
assert(me.get_prior_instance().get() ==
10231026
dynamic_cast<CachedExtent*>(parent->children[off]));
10241027
parent->children[off] = &me;
10251028
}
10261029

10271030
btreenode_pos_t get_parent_pos() const {
10281031
auto &me = down_cast();
1029-
auto parent = this->get_parent_node();
1032+
auto parent = this->peek_parent_node();
10301033
assert(parent);
10311034
//TODO: can this search be avoided?
10321035
auto key = me.get_begin();

src/crimson/os/seastore/omap_manager/btree/omap_btree_node_impl.cc

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -626,7 +626,7 @@ std::ostream &OMapLeafNode::print_detail_l(std::ostream &out) const
626626
<< ", end=" << get_end();
627627
}
628628
if (this->child_node_t::is_parent_valid())
629-
return out << ", parent=" << (void*)this->child_node_t::get_parent_node().get();
629+
return out << ", parent=" << (void*)this->child_node_t::peek_parent_node().get();
630630
else
631631
return out;
632632
}

0 commit comments

Comments
 (0)