Skip to content

Commit 3a35aeb

Browse files
authored
Merge pull request ceph#60655 from xxhdx1985126/wip-seastore-move-out-root-meta
crimson/os/seastore: move the root meta out of the root block Reviewed-by: Yingxin Cheng <[email protected]>
2 parents 5853cb7 + 1627e38 commit 3a35aeb

File tree

10 files changed

+234
-119
lines changed

10 files changed

+234
-119
lines changed

src/crimson/os/seastore/cache.cc

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -172,6 +172,7 @@ void Cache::register_metrics()
172172
{extent_types_t::LADDR_INTERNAL, sm::label_instance("ext", "LADDR_INTERNAL")},
173173
{extent_types_t::LADDR_LEAF, sm::label_instance("ext", "LADDR_LEAF")},
174174
{extent_types_t::DINK_LADDR_LEAF, sm::label_instance("ext", "DINK_LADDR_LEAF")},
175+
{extent_types_t::ROOT_META, sm::label_instance("ext", "ROOT_META")},
175176
{extent_types_t::OMAP_INNER, sm::label_instance("ext", "OMAP_INNER")},
176177
{extent_types_t::OMAP_LEAF, sm::label_instance("ext", "OMAP_LEAF")},
177178
{extent_types_t::ONODE_BLOCK_STAGED, sm::label_instance("ext", "ONODE_BLOCK_STAGED")},
@@ -1093,6 +1094,9 @@ CachedExtentRef Cache::alloc_new_extent_by_type(
10931094
case extent_types_t::LADDR_LEAF:
10941095
return alloc_new_non_data_extent<lba_manager::btree::LBALeafNode>(
10951096
t, length, hint, gen);
1097+
case extent_types_t::ROOT_META:
1098+
return alloc_new_non_data_extent<RootMetaBlock>(
1099+
t, length, hint, gen);
10961100
case extent_types_t::ONODE_BLOCK_STAGED:
10971101
return alloc_new_non_data_extent<onode::SeastoreNodeExtent>(
10981102
t, length, hint, gen);
@@ -2193,6 +2197,12 @@ Cache::do_get_caching_extent_by_type(
21932197
).safe_then([](auto extent) {
21942198
return CachedExtentRef(extent.detach(), false /* add_ref */);
21952199
});
2200+
case extent_types_t::ROOT_META:
2201+
return do_get_caching_extent<RootMetaBlock>(
2202+
offset, length, std::move(extent_init_func), std::move(on_cache)
2203+
).safe_then([](auto extent) {
2204+
return CachedExtentRef(extent.detach(), false /* add_ref */);
2205+
});
21962206
case extent_types_t::OMAP_INNER:
21972207
return do_get_caching_extent<omap_manager::OMapInnerNode>(
21982208
offset, length, std::move(extent_init_func), std::move(on_cache)

src/crimson/os/seastore/cache.h

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -978,7 +978,8 @@ class Cache {
978978
auto result = epm.alloc_new_non_data_extent(t, T::TYPE, length, hint, gen);
979979
#endif
980980
if (!result) {
981-
return nullptr;
981+
SUBERRORT(seastore_cache, "insufficient space", t);
982+
std::rethrow_exception(crimson::ct_error::enospc::exception_ptr());
982983
}
983984
auto ret = CachedExtent::make_cached_extent_ref<T>(std::move(result->bp));
984985
ret->init(CachedExtent::extent_state_t::INITIAL_WRITE_PENDING,
@@ -1019,6 +1020,10 @@ class Cache {
10191020
#else
10201021
auto results = epm.alloc_new_data_extents(t, T::TYPE, length, hint, gen);
10211022
#endif
1023+
if (results.empty()) {
1024+
SUBERRORT(seastore_cache, "insufficient space", t);
1025+
std::rethrow_exception(crimson::ct_error::enospc::exception_ptr());
1026+
}
10221027
std::vector<TCachedExtentRef<T>> extents;
10231028
for (auto &result : results) {
10241029
auto ret = CachedExtent::make_cached_extent_ref<T>(std::move(result.bp));

src/crimson/os/seastore/random_block_manager/block_rb_manager.cc

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -188,10 +188,10 @@ BlockRBManager::write_ertr::future<> BlockRBManager::write(
188188
void BlockRBManager::prefill_fragmented_device()
189189
{
190190
LOG_PREFIX(BlockRBManager::prefill_fragmented_device);
191-
// the first 2 blocks must be allocated to lba root
191+
// the first 3 blocks must be allocated to lba root
192192
// and backref root during mkfs
193-
for (size_t block = get_block_size() * 2;
194-
block <= get_size() - get_block_size() * 2;
193+
for (size_t block = get_block_size() * 3;
194+
block <= get_size() - get_block_size() * 3;
195195
block += get_block_size() * 2) {
196196
DEBUG("marking {}~{} used",
197197
get_start_rbm_addr() + block,
Lines changed: 76 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,76 @@
1+
// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2+
// vim: ts=8 sw=2 smarttab
3+
4+
#pragma once
5+
6+
#include "crimson/os/seastore/cached_extent.h"
7+
8+
namespace crimson::os::seastore {
9+
10+
struct RootMetaBlock : LogicalCachedExtent {
11+
using meta_t = std::map<std::string, std::string>;
12+
using Ref = TCachedExtentRef<RootMetaBlock>;
13+
static constexpr size_t SIZE = 4096;
14+
static constexpr int MAX_META_LENGTH = 1024;
15+
16+
explicit RootMetaBlock(ceph::bufferptr &&ptr)
17+
: LogicalCachedExtent(std::move(ptr)) {}
18+
explicit RootMetaBlock(extent_len_t length)
19+
: LogicalCachedExtent(length) {}
20+
RootMetaBlock(const RootMetaBlock &rhs)
21+
: LogicalCachedExtent(rhs) {}
22+
23+
CachedExtentRef duplicate_for_write(Transaction&) final {
24+
return CachedExtentRef(new RootMetaBlock(*this));
25+
}
26+
27+
static constexpr extent_types_t TYPE = extent_types_t::ROOT_META;
28+
extent_types_t get_type() const final {
29+
return extent_types_t::ROOT_META;
30+
}
31+
32+
/// dumps root meta as delta
33+
ceph::bufferlist get_delta() final {
34+
ceph::bufferlist bl;
35+
ceph::buffer::ptr bptr(get_bptr(), 0, MAX_META_LENGTH);
36+
bl.append(bptr);
37+
return bl;
38+
}
39+
40+
/// overwrites root
41+
void apply_delta(const ceph::bufferlist &_bl) final
42+
{
43+
assert(_bl.length() == MAX_META_LENGTH);
44+
ceph::bufferlist bl = _bl;
45+
bl.rebuild();
46+
get_bptr().copy_in(0, MAX_META_LENGTH, bl.front().c_str());
47+
}
48+
49+
meta_t get_meta() const {
50+
bufferlist bl;
51+
bl.append(get_bptr());
52+
meta_t ret;
53+
auto iter = bl.cbegin();
54+
decode(ret, iter);
55+
return ret;
56+
}
57+
58+
void set_meta(const meta_t &m) {
59+
ceph::bufferlist bl;
60+
encode(m, bl);
61+
ceph_assert(bl.length() <= MAX_META_LENGTH);
62+
bl.rebuild();
63+
get_bptr().zero(0, MAX_META_LENGTH);
64+
get_bptr().copy_in(0, bl.length(), bl.front().c_str());
65+
}
66+
67+
};
68+
using RootMetaBlockRef = RootMetaBlock::Ref;
69+
70+
} // crimson::os::seastore
71+
72+
73+
#if FMT_VERSION >= 90000
74+
template <> struct fmt::formatter<crimson::os::seastore::RootMetaBlock>
75+
: fmt::ostream_formatter {};
76+
#endif

src/crimson/os/seastore/seastore_types.cc

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -246,6 +246,8 @@ std::ostream &operator<<(std::ostream &out, extent_types_t t)
246246
return out << "LADDR_LEAF";
247247
case extent_types_t::ONODE_BLOCK_STAGED:
248248
return out << "ONODE_BLOCK_STAGED";
249+
case extent_types_t::ROOT_META:
250+
return out << "ROOT_META";
249251
case extent_types_t::OMAP_INNER:
250252
return out << "OMAP_INNER";
251253
case extent_types_t::OMAP_LEAF:

src/crimson/os/seastore/seastore_types.h

Lines changed: 18 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -1378,23 +1378,24 @@ enum class extent_types_t : uint8_t {
13781378
LADDR_INTERNAL = 1,
13791379
LADDR_LEAF = 2,
13801380
DINK_LADDR_LEAF = 3, // should only be used for unitttests
1381-
OMAP_INNER = 4,
1382-
OMAP_LEAF = 5,
1383-
ONODE_BLOCK_STAGED = 6,
1384-
COLL_BLOCK = 7,
1385-
OBJECT_DATA_BLOCK = 8,
1386-
RETIRED_PLACEHOLDER = 9,
1381+
ROOT_META = 4,
1382+
OMAP_INNER = 5,
1383+
OMAP_LEAF = 6,
1384+
ONODE_BLOCK_STAGED = 7,
1385+
COLL_BLOCK = 8,
1386+
OBJECT_DATA_BLOCK = 9,
1387+
RETIRED_PLACEHOLDER = 10,
13871388
// the following two types are not extent types,
13881389
// they are just used to indicates paddr allocation deltas
1389-
ALLOC_INFO = 10,
1390-
JOURNAL_TAIL = 11,
1390+
ALLOC_INFO = 11,
1391+
JOURNAL_TAIL = 12,
13911392
// Test Block Types
1392-
TEST_BLOCK = 12,
1393-
TEST_BLOCK_PHYSICAL = 13,
1394-
BACKREF_INTERNAL = 14,
1395-
BACKREF_LEAF = 15,
1393+
TEST_BLOCK = 13,
1394+
TEST_BLOCK_PHYSICAL = 14,
1395+
BACKREF_INTERNAL = 15,
1396+
BACKREF_LEAF = 16,
13961397
// None and the number of valid extent_types_t
1397-
NONE = 16,
1398+
NONE = 17,
13981399
};
13991400
using extent_types_le_t = uint8_t;
14001401
constexpr auto EXTENT_TYPES_MAX = static_cast<uint8_t>(extent_types_t::NONE);
@@ -1409,12 +1410,12 @@ constexpr bool is_data_type(extent_types_t type) {
14091410
}
14101411

14111412
constexpr bool is_logical_metadata_type(extent_types_t type) {
1412-
return type >= extent_types_t::OMAP_INNER &&
1413+
return type >= extent_types_t::ROOT_META &&
14131414
type <= extent_types_t::COLL_BLOCK;
14141415
}
14151416

14161417
constexpr bool is_logical_type(extent_types_t type) {
1417-
if ((type >= extent_types_t::OMAP_INNER &&
1418+
if ((type >= extent_types_t::ROOT_META &&
14181419
type <= extent_types_t::OBJECT_DATA_BLOCK) ||
14191420
type == extent_types_t::TEST_BLOCK) {
14201421
assert(is_logical_metadata_type(type) ||
@@ -1926,44 +1927,18 @@ using backref_root_t = phy_tree_root_t;
19261927
* TODO: generalize this to permit more than one lba_manager implementation
19271928
*/
19281929
struct __attribute__((packed)) root_t {
1929-
using meta_t = std::map<std::string, std::string>;
1930-
1931-
static constexpr int MAX_META_LENGTH = 1024;
1932-
19331930
backref_root_t backref_root;
19341931
lba_root_t lba_root;
19351932
laddr_le_t onode_root;
19361933
coll_root_le_t collection_root;
1934+
laddr_le_t meta;
19371935

1938-
char meta[MAX_META_LENGTH];
1939-
1940-
root_t() {
1941-
set_meta(meta_t{});
1942-
}
1936+
root_t() = default;
19431937

19441938
void adjust_addrs_from_base(paddr_t base) {
19451939
lba_root.adjust_addrs_from_base(base);
19461940
backref_root.adjust_addrs_from_base(base);
19471941
}
1948-
1949-
meta_t get_meta() {
1950-
bufferlist bl;
1951-
bl.append(ceph::buffer::create_static(MAX_META_LENGTH, meta));
1952-
meta_t ret;
1953-
auto iter = bl.cbegin();
1954-
decode(ret, iter);
1955-
return ret;
1956-
}
1957-
1958-
void set_meta(const meta_t &m) {
1959-
ceph::bufferlist bl;
1960-
encode(m, bl);
1961-
ceph_assert(bl.length() < MAX_META_LENGTH);
1962-
bl.rebuild();
1963-
auto &bptr = bl.front();
1964-
::memset(meta, 0, MAX_META_LENGTH);
1965-
::memcpy(meta, bptr.c_str(), bl.length());
1966-
}
19671942
};
19681943

19691944
struct alloc_blk_t {

src/crimson/os/seastore/transaction_manager.cc

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -74,6 +74,8 @@ TransactionManager::mkfs_ertr::future<> TransactionManager::mkfs()
7474
return lba_manager->mkfs(t);
7575
}).si_then([this, &t] {
7676
return backref_manager->mkfs(t);
77+
}).si_then([this, &t] {
78+
return init_root_meta(t);
7779
}).si_then([this, FNAME, &t] {
7880
INFOT("submitting mkfs transaction", t);
7981
return submit_transaction_direct(t);

src/crimson/os/seastore/transaction_manager.h

Lines changed: 45 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@
2323
#include "crimson/os/seastore/logging.h"
2424
#include "crimson/os/seastore/seastore_types.h"
2525
#include "crimson/os/seastore/cache.h"
26+
#include "crimson/os/seastore/root_meta.h"
2627
#include "crimson/os/seastore/lba_manager.h"
2728
#include "crimson/os/seastore/backref_manager.h"
2829
#include "crimson/os/seastore/journal.h"
@@ -303,10 +304,6 @@ class TransactionManager : public ExtentCallbackInterface {
303304
len,
304305
placement_hint,
305306
INIT_GENERATION);
306-
if (!ext) {
307-
SUBERRORT(seastore_tm, "insufficient space!", t);
308-
return crimson::ct_error::enospc::make();
309-
}
310307
return lba_manager->alloc_extent(
311308
t,
312309
laddr_hint,
@@ -342,10 +339,6 @@ class TransactionManager : public ExtentCallbackInterface {
342339
len,
343340
placement_hint,
344341
INIT_GENERATION);
345-
if (exts.empty()) {
346-
SUBERRORT(seastore_tm, "insufficient space!", t);
347-
return crimson::ct_error::enospc::make();
348-
}
349342
return lba_manager->alloc_extents(
350343
t,
351344
laddr_hint,
@@ -690,9 +683,11 @@ class TransactionManager : public ExtentCallbackInterface {
690683
const std::string &key) {
691684
return cache->get_root(
692685
t
693-
).si_then([&key, &t](auto root) {
686+
).si_then([&t, this](auto root) {
687+
return read_extent<RootMetaBlock>(t, root->root.meta);
688+
}).si_then([key, &t](auto mblock) {
694689
LOG_PREFIX(TransactionManager::read_root_meta);
695-
auto meta = root->root.get_meta();
690+
auto meta = mblock->get_meta();
696691
auto iter = meta.find(key);
697692
if (iter == meta.end()) {
698693
SUBDEBUGT(seastore_tm, "{} -> nullopt", t, key);
@@ -701,7 +696,35 @@ class TransactionManager : public ExtentCallbackInterface {
701696
SUBDEBUGT(seastore_tm, "{} -> {}", t, key, iter->second);
702697
return seastar::make_ready_future<read_root_meta_bare>(iter->second);
703698
}
704-
});
699+
}).handle_error_interruptible(
700+
crimson::ct_error::input_output_error::pass_further{},
701+
crimson::ct_error::assert_all{"unexpected error!"}
702+
);
703+
}
704+
705+
/**
706+
* init_root_meta
707+
*
708+
* create the root meta block
709+
*/
710+
using init_root_meta_iertr = base_iertr;
711+
using init_root_meta_ret = init_root_meta_iertr::future<>;
712+
init_root_meta_ret init_root_meta(Transaction &t) {
713+
return alloc_non_data_extent<RootMetaBlock>(
714+
t, L_ADDR_MIN, RootMetaBlock::SIZE
715+
).si_then([this, &t](auto meta) {
716+
meta->set_meta(RootMetaBlock::meta_t{});
717+
return cache->get_root(t
718+
).si_then([this, &t, meta](auto root) {
719+
auto mroot = cache->duplicate_for_write(
720+
t, root)->template cast<RootBlock>();
721+
mroot->root.meta = meta->get_laddr();
722+
return seastar::now();
723+
});
724+
}).handle_error_interruptible(
725+
crimson::ct_error::input_output_error::pass_further{},
726+
crimson::ct_error::assert_all{"unexpected error!"}
727+
);
705728
}
706729

707730
/**
@@ -719,15 +742,21 @@ class TransactionManager : public ExtentCallbackInterface {
719742
SUBDEBUGT(seastore_tm, "seastore_tm, {} -> {} ...", t, key, value);
720743
return cache->get_root(
721744
t
722-
).si_then([this, &t, &key, &value](RootBlockRef root) {
723-
root = cache->duplicate_for_write(t, root)->cast<RootBlock>();
745+
).si_then([this, &t](RootBlockRef root) {
746+
return read_extent<RootMetaBlock>(t, root->root.meta);
747+
}).si_then([this, key, value, &t](auto mblock) {
748+
mblock = get_mutable_extent(t, mblock
749+
)->template cast<RootMetaBlock>();
724750

725-
auto meta = root->root.get_meta();
751+
auto meta = mblock->get_meta();
726752
meta[key] = value;
727753

728-
root->root.set_meta(meta);
754+
mblock->set_meta(meta);
729755
return seastar::now();
730-
});
756+
}).handle_error_interruptible(
757+
crimson::ct_error::input_output_error::pass_further{},
758+
crimson::ct_error::assert_all{"unexpected error!"}
759+
);
731760
}
732761

733762
/**

0 commit comments

Comments
 (0)