Skip to content

Commit b3a6c35

Browse files
authored
Merge pull request ceph#62895 from cyx1231st/wip-seastore-omap-link-init
crimson/os/seastore/omap_manager: simplify maybe_init from tolerating duplicated calls Reviewed-by: Xuehan Xu <[email protected]>
2 parents d4f58c9 + 9362812 commit b3a6c35

File tree

9 files changed

+110
-27
lines changed

9 files changed

+110
-27
lines changed

src/crimson/os/seastore/cache.cc

Lines changed: 11 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1164,13 +1164,19 @@ CachedExtentRef Cache::duplicate_for_write(
11641164
LOG_PREFIX(Cache::duplicate_for_write);
11651165
assert(i->is_fully_loaded());
11661166

1167+
#ifndef NDEBUG
1168+
if (i->is_logical()) {
1169+
assert(static_cast<LogicalCachedExtent&>(*i).has_laddr());
1170+
assert(static_cast<LogicalCachedExtent&>(*i).is_seen_by_users());
1171+
}
1172+
#endif
1173+
11671174
if (i->is_mutable()) {
11681175
return i;
11691176
}
11701177

11711178
if (i->is_exist_clean()) {
11721179
assert(i->is_logical());
1173-
assert(static_cast<LogicalCachedExtent&>(*i).has_laddr());
11741180
i->version++;
11751181
i->state = CachedExtent::extent_state_t::EXIST_MUTATION_PENDING;
11761182
i->last_committed_crc = i->calc_crc32c();
@@ -1201,10 +1207,11 @@ CachedExtentRef Cache::duplicate_for_write(
12011207
ret->version++;
12021208
ret->state = CachedExtent::extent_state_t::MUTATION_PENDING;
12031209
if (i->is_logical()) {
1204-
auto& lextent = static_cast<LogicalCachedExtent&>(*i);
1205-
assert(lextent.has_laddr());
1210+
auto& stable_extent = static_cast<LogicalCachedExtent&>(*i);
12061211
assert(ret->is_logical());
1207-
static_cast<LogicalCachedExtent&>(*ret).set_laddr(lextent.get_laddr());
1212+
auto& mutate_extent = static_cast<LogicalCachedExtent&>(*ret);
1213+
mutate_extent.set_laddr(stable_extent.get_laddr());
1214+
assert(mutate_extent.is_seen_by_users());
12081215
}
12091216

12101217
t.add_mutated_extent(ret);

src/crimson/os/seastore/cached_extent.cc

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -91,7 +91,8 @@ CachedExtent* CachedExtent::get_transactional_view(transaction_id_t tid) {
9191

9292
std::ostream &LogicalCachedExtent::print_detail(std::ostream &out) const
9393
{
94-
out << ", laddr=" << laddr;
94+
out << ", laddr=" << laddr
95+
<< ", seen=" << seen_by_users;
9596
return print_detail_l(out);
9697
}
9798

src/crimson/os/seastore/cached_extent.h

Lines changed: 29 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -857,10 +857,12 @@ class CachedExtent
857857

858858
/// used to wait while in-progress commit completes
859859
std::optional<seastar::shared_promise<>> io_wait_promise;
860+
860861
void set_io_wait() {
861862
ceph_assert(!io_wait_promise);
862863
io_wait_promise = seastar::shared_promise<>();
863864
}
865+
864866
void complete_io() {
865867
ceph_assert(io_wait_promise);
866868
io_wait_promise->set_value();
@@ -1385,18 +1387,37 @@ class LBAMapping;
13851387
*/
13861388
class LogicalCachedExtent : public CachedExtent {
13871389
public:
1388-
template <typename... T>
1389-
LogicalCachedExtent(T&&... t) : CachedExtent(std::forward<T>(t)...) {}
1390+
using CachedExtent::CachedExtent;
1391+
1392+
LogicalCachedExtent(const LogicalCachedExtent& other)
1393+
: CachedExtent(other),
1394+
seen_by_users(other.seen_by_users) {}
1395+
1396+
LogicalCachedExtent(const LogicalCachedExtent& other, share_buffer_t s)
1397+
: CachedExtent(other, s),
1398+
seen_by_users(other.seen_by_users) {}
13901399

13911400
void on_rewrite(Transaction &t, CachedExtent &extent, extent_len_t off) final {
13921401
assert(get_type() == extent.get_type());
13931402
auto &lextent = (LogicalCachedExtent&)extent;
13941403
set_laddr((lextent.get_laddr() + off).checked_to_laddr());
1404+
seen_by_users = lextent.seen_by_users;
13951405
do_on_rewrite(t, lextent);
13961406
}
13971407

13981408
virtual void do_on_rewrite(Transaction &t, LogicalCachedExtent &extent) {}
13991409

1410+
bool is_seen_by_users() const {
1411+
return seen_by_users;
1412+
}
1413+
1414+
// handled under TransactionManager,
1415+
// user should not set this by themselves.
1416+
void set_seen_by_users() {
1417+
assert(!seen_by_users);
1418+
seen_by_users = true;
1419+
}
1420+
14001421
bool has_laddr() const {
14011422
return laddr != L_ADDR_NULL;
14021423
}
@@ -1461,6 +1482,12 @@ class LogicalCachedExtent : public CachedExtent {
14611482
// the logical address of the extent, and if shared,
14621483
// it is the intermediate_base, see BtreeLBAMapping comments.
14631484
laddr_t laddr = L_ADDR_NULL;
1485+
1486+
// whether the extent has been seen by users since OSD startup,
1487+
// otherwise, an extent can still be alive if it's dirty or pinned by lru.
1488+
//
1489+
// user must initialize the logical extent upon the first access.
1490+
bool seen_by_users = false;
14641491
};
14651492

14661493
using LogicalCachedExtentRef = TCachedExtentRef<LogicalCachedExtent>;

src/crimson/os/seastore/logical_child_node.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,7 @@ class LogicalChildNode : public LogicalCachedExtent,
4040
}
4141
protected:
4242
void on_replace_prior() final {
43+
assert(is_seen_by_users());
4344
lba_child_node_t::on_replace_prior();
4445
do_on_replace_prior();
4546
}

src/crimson/os/seastore/omap_manager/btree/btree_omap_manager.cc

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -76,7 +76,6 @@ BtreeOMapManager::handle_root_split(
7676
omap_node_meta_t meta{omap_root.depth + 1};
7777
nroot->init_range(BEGIN_KEY, END_KEY);
7878
nroot->set_meta(meta);
79-
left->init_range(BEGIN_KEY, pivot);
8079
nroot->insert_child_ptr(
8180
nroot->iter_begin().get_offset(),
8281
dynamic_cast<BaseChildNode<OMapInnerNode, std::string>*>(left.get()));
@@ -85,7 +84,6 @@ BtreeOMapManager::handle_root_split(
8584
nroot->insert_child_ptr(
8685
(nroot->iter_begin() + 1).get_offset(),
8786
dynamic_cast<BaseChildNode<OMapInnerNode, std::string>*>(right.get()));
88-
right->init_range(pivot, END_KEY);
8987
nroot->journal_inner_insert(nroot->iter_begin() + 1, right->get_laddr(),
9088
pivot, nroot->maybe_get_delta_buffer());
9189
omap_root.update(nroot->get_laddr(), omap_root.get_depth() + 1, omap_root.hint,

src/crimson/os/seastore/omap_manager/btree/omap_btree_node.h

Lines changed: 6 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -116,17 +116,14 @@ struct OMapNode : LogicalChildNode {
116116

117117
virtual ~OMapNode() = default;
118118

119-
void init_range(std::string begin, std::string end) {
120-
if (!this->end.empty()) {
121-
// this is a duplicated init.
122-
assert(end == this->end);
123-
return;
124-
}
125-
if (begin == BEGIN_KEY && end == END_KEY) {
119+
void init_range(std::string _begin, std::string _end) {
120+
assert(begin.empty());
121+
assert(end.empty());
122+
if (_begin == BEGIN_KEY && _end == END_KEY) {
126123
root = true;
127124
}
128-
this->begin = std::move(begin);
129-
this->end = std::move(end);
125+
begin = std::move(_begin);
126+
end = std::move(_end);
130127
}
131128
const std::string &get_begin() const {
132129
return begin;

src/crimson/os/seastore/omap_manager/btree/omap_btree_node_impl.h

Lines changed: 18 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -80,10 +80,15 @@ struct OMapInnerNode
8080
if (is_rewrite() && !is_btree_root()) {
8181
auto &prior = *get_prior_instance()->template cast<OMapInnerNode>();
8282
if (prior.base_child_t::has_parent_tracker()) {
83+
assert(prior.is_seen_by_users());
8384
// unlike fixed-kv nodes, rewriting child nodes of the omap tree
8485
// won't affect parent nodes, so we have to manually take prior
8586
// instances' parent trackers here.
8687
this->child_node_t::take_parent_from_prior();
88+
} else {
89+
// dirty omap extent may not be accessed yet during rewrite,
90+
// this means the extent may not be initalized yet as linked.
91+
assert(!prior.is_seen_by_users());
8792
}
8893
}
8994
}
@@ -231,6 +236,7 @@ struct OMapInnerNode
231236
if (this->is_valid()
232237
&& !this->is_pending()
233238
&& !this->is_btree_root()
239+
// dirty omap extent may not be accessed/linked yet
234240
&& this->base_child_t::has_parent_tracker()) {
235241
this->child_node_t::destroy();
236242
}
@@ -282,10 +288,15 @@ struct OMapLeafNode
282288
if (is_rewrite() && !is_btree_root()) {
283289
auto &prior = *get_prior_instance()->template cast<OMapLeafNode>();
284290
if (prior.base_child_t::has_parent_tracker()) {
291+
assert(prior.is_seen_by_users());
285292
// unlike fixed-kv nodes, rewriting child nodes of the omap tree
286293
// won't affect parent nodes, so we have to manually take prior
287294
// instances' parent trackers here.
288295
this->child_node_t::take_parent_from_prior();
296+
} else {
297+
// dirty omap extent may not be accessed yet during rewrite,
298+
// this means the extent may not be initalized yet as linked.
299+
assert(!prior.is_seen_by_users());
289300
}
290301
}
291302
}
@@ -303,6 +314,7 @@ struct OMapLeafNode
303314
if (this->is_valid()
304315
&& !this->is_pending()
305316
&& !this->is_btree_root()
317+
// dirty omap extent may not be accessed/linked yet
306318
&& this->base_child_t::has_parent_tracker()) {
307319
this->child_node_t::destroy();
308320
}
@@ -434,14 +446,15 @@ omap_load_extent(
434446
oc.t, laddr, size,
435447
[begin=std::move(begin), end=std::move(end), FNAME,
436448
oc, chp=std::move(chp)](T &extent) mutable {
449+
assert(!extent.is_seen_by_users());
437450
extent.init_range(std::move(begin), std::move(end));
438-
if (extent.T::base_child_t::is_parent_valid()
439-
|| extent.is_btree_root()) {
440-
return;
451+
if (extent.is_btree_root()) {
452+
return;
441453
}
442-
assert(chp);
443454
SUBDEBUGT(seastore_omap, "linking {} to {}",
444-
oc.t, extent, *chp->get_parent());
455+
oc.t, extent, *chp->get_parent());
456+
assert(chp);
457+
assert(!extent.T::base_child_t::is_parent_valid());
445458
chp->link_child(&extent);
446459
}
447460
).handle_error_interruptible(

src/crimson/os/seastore/seastore_types.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2214,6 +2214,10 @@ constexpr bool is_valid_transaction(transaction_type_t type) {
22142214
return type < transaction_type_t::MAX;
22152215
}
22162216

2217+
constexpr bool is_user_transaction(transaction_type_t type) {
2218+
return type <= transaction_type_t::READ;
2219+
}
2220+
22172221
constexpr bool is_background_transaction(transaction_type_t type) {
22182222
return (type >= transaction_type_t::TRIM_DIRTY &&
22192223
type < transaction_type_t::MAX);

src/crimson/os/seastore/transaction_manager.h

Lines changed: 39 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -260,6 +260,8 @@ class TransactionManager : public ExtentCallbackInterface {
260260
static_assert(is_logical_type(T::TYPE));
261261
assert(is_aligned(partial_off, get_block_size()));
262262
assert(is_aligned(partial_len, get_block_size()));
263+
// must be user-oriented required by maybe_init
264+
assert(is_user_transaction(t.get_src()));
263265

264266
extent_len_t direct_partial_off = partial_off;
265267
bool is_clone = pin->is_clone();
@@ -302,9 +304,12 @@ class TransactionManager : public ExtentCallbackInterface {
302304
return cache->read_extent_maybe_partial(
303305
t, std::move(extent), direct_partial_off, partial_len);
304306
}).si_then([maybe_init=std::move(maybe_init)](auto extent) {
305-
maybe_init(*extent);
306-
return extent;
307-
});
307+
if (!extent->is_seen_by_users()) {
308+
maybe_init(*extent);
309+
extent->set_seen_by_users();
310+
}
311+
return std::move(extent);
312+
});
308313
} else {
309314
return this->pin_to_extent<T>(
310315
t, std::move(std::get<0>(ret)),
@@ -381,6 +386,7 @@ class TransactionManager : public ExtentCallbackInterface {
381386
laddr_t laddr_hint,
382387
extent_len_t len,
383388
placement_hint_t placement_hint = placement_hint_t::HOT) {
389+
static_assert(is_logical_metadata_type(T::TYPE));
384390
LOG_PREFIX(TransactionManager::alloc_non_data_extent);
385391
SUBDEBUGT(seastore_tm, "{} hint {}~0x{:x} phint={} ...",
386392
t, T::TYPE, laddr_hint, len, placement_hint);
@@ -389,6 +395,9 @@ class TransactionManager : public ExtentCallbackInterface {
389395
len,
390396
placement_hint,
391397
INIT_GENERATION);
398+
// user must initialize the logical extent themselves.
399+
assert(is_user_transaction(t.get_src()));
400+
ext->set_seen_by_users();
392401
return lba_manager->alloc_extent(
393402
t,
394403
laddr_hint,
@@ -416,6 +425,7 @@ class TransactionManager : public ExtentCallbackInterface {
416425
laddr_t laddr_hint,
417426
extent_len_t len,
418427
placement_hint_t placement_hint = placement_hint_t::HOT) {
428+
static_assert(is_data_type(T::TYPE));
419429
LOG_PREFIX(TransactionManager::alloc_data_extents);
420430
SUBDEBUGT(seastore_tm, "{} hint {}~0x{:x} phint={} ...",
421431
t, T::TYPE, laddr_hint, len, placement_hint);
@@ -424,6 +434,11 @@ class TransactionManager : public ExtentCallbackInterface {
424434
len,
425435
placement_hint,
426436
INIT_GENERATION);
437+
// user must initialize the logical extent themselves
438+
assert(is_user_transaction(t.get_src()));
439+
for (auto& ext : exts) {
440+
ext->set_seen_by_users();
441+
}
427442
return lba_manager->alloc_extents(
428443
t,
429444
laddr_hint,
@@ -479,6 +494,10 @@ class TransactionManager : public ExtentCallbackInterface {
479494
LBAMappingRef &&pin,
480495
std::array<remap_entry, N> remaps) {
481496
static_assert(std::is_base_of_v<LogicalChildNode, T>);
497+
// data extents don't need maybe_init yet, currently,
498+
static_assert(is_data_type(T::TYPE));
499+
// must be user-oriented required by (the potential) maybe_init
500+
assert(is_user_transaction(t.get_src()));
482501

483502
#ifndef NDEBUG
484503
std::sort(remaps.begin(), remaps.end(),
@@ -550,7 +569,14 @@ class TransactionManager : public ExtentCallbackInterface {
550569
} else {
551570
auto ret = get_extent_if_linked<T>(t, pin->duplicate());
552571
if (ret.index() == 1) {
553-
return std::move(std::get<1>(ret));
572+
return std::get<1>(ret
573+
).si_then([](auto extent) {
574+
if (!extent->is_seen_by_users()) {
575+
// Note, no maybe_init available for data extents
576+
extent->set_seen_by_users();
577+
}
578+
return std::move(extent);
579+
});
554580
} else {
555581
// absent
556582
return base_iertr::make_ready_future<TCachedExtentRef<T>>();
@@ -571,6 +597,7 @@ class TransactionManager : public ExtentCallbackInterface {
571597
original_bptr = ext->get_bptr();
572598
}
573599
if (ext) {
600+
assert(ext->is_seen_by_users());
574601
cache->retire_extent(t, ext);
575602
} else {
576603
cache->retire_absent_extent_addr(t, original_paddr, original_len);
@@ -594,6 +621,8 @@ class TransactionManager : public ExtentCallbackInterface {
594621
remap_len,
595622
original_laddr,
596623
original_bptr);
624+
// user must initialize the logical extent themselves.
625+
extent->set_seen_by_users();
597626
extents.emplace_back(std::move(extent));
598627
}
599628
});
@@ -1034,6 +1063,8 @@ class TransactionManager : public ExtentCallbackInterface {
10341063
extent_len_t partial_len,
10351064
lextent_init_func_t<T> &&maybe_init) {
10361065
static_assert(is_logical_type(T::TYPE));
1066+
// must be user-oriented required by maybe_init
1067+
assert(is_user_transaction(t.get_src()));
10371068
using ret = pin_to_extent_ret<T>;
10381069
auto &pref = *pin;
10391070
auto direct_length = pref.is_indirect() ?
@@ -1062,6 +1093,7 @@ class TransactionManager : public ExtentCallbackInterface {
10621093
pref.link_child(&extent);
10631094
extent.maybe_set_intermediate_laddr(pref);
10641095
maybe_init(extent);
1096+
extent.set_seen_by_users();
10651097
}
10661098
).si_then([FNAME, &t, pin=std::move(pin), this](auto ref) mutable -> ret {
10671099
if (ref->is_fully_loaded()) {
@@ -1114,6 +1146,7 @@ class TransactionManager : public ExtentCallbackInterface {
11141146
SUBTRACET(seastore_tm, "getting absent extent from pin {} type {} ...",
11151147
t, *pin, type);
11161148
assert(is_logical_type(type));
1149+
assert(is_background_transaction(t.get_src()));
11171150
auto &pref = *pin;
11181151
laddr_t direct_key;
11191152
extent_len_t direct_length;
@@ -1140,6 +1173,8 @@ class TransactionManager : public ExtentCallbackInterface {
11401173
assert(!pref.get_parent()->is_pending());
11411174
pref.link_child(&lextent);
11421175
lextent.maybe_set_intermediate_laddr(pref);
1176+
// No change to extent::seen_by_user because this path is only
1177+
// for background cleaning.
11431178
}
11441179
).si_then([FNAME, &t, pin=std::move(pin), this](auto ref) {
11451180
auto crc = ref->calc_crc32c();

0 commit comments

Comments
 (0)