Skip to content

Commit 326f877

Browse files
authored
Merge pull request ceph#59213 from myoungwon/wip-dec-omap-leaf-size
crimson/os/seastore: redirect log operations to 16K-leaf-size omap Reviewed-by: Yingxin Cheng <[email protected]> Reviewed-by: Matan Breizman <[email protected]> Reviewed-by: Xuehan Xu <[email protected]>
2 parents 54f7b27 + 5026c1d commit 326f877

22 files changed

+346
-163
lines changed

src/common/ceph_strings.cc

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -227,6 +227,8 @@ const char *ceph_osd_alloc_hint_flag_name(int f)
227227
return "compressible";
228228
case CEPH_OSD_ALLOC_HINT_FLAG_INCOMPRESSIBLE:
229229
return "incompressible";
230+
case CEPH_OSD_ALLOC_HINT_FLAG_LOG:
231+
return "log";
230232
default:
231233
return "???";
232234
}

src/crimson/os/seastore/omap_manager.h

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
// and return errors during insert if the max is exceeded.
1919
#define OMAP_INNER_BLOCK_SIZE 8192
2020
#define OMAP_LEAF_BLOCK_SIZE 65536
21+
#define LOG_LEAF_BLOCK_SIZE 16384
2122

2223
namespace crimson::os::seastore {
2324

@@ -40,7 +41,8 @@ class OMapManager {
4041
*/
4142
using initialize_omap_iertr = base_iertr;
4243
using initialize_omap_ret = initialize_omap_iertr::future<omap_root_t>;
43-
virtual initialize_omap_ret initialize_omap(Transaction &t, laddr_t hint) = 0;
44+
virtual initialize_omap_ret initialize_omap(Transaction &t, laddr_t hint,
45+
omap_type_t type) = 0;
4446

4547
/**
4648
* get value(string) by key(string)

src/crimson/os/seastore/omap_manager/btree/btree_omap_manager.cc

Lines changed: 28 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -18,17 +18,18 @@ BtreeOMapManager::BtreeOMapManager(
1818
: tm(tm) {}
1919

2020
BtreeOMapManager::initialize_omap_ret
21-
BtreeOMapManager::initialize_omap(Transaction &t, laddr_t hint)
21+
BtreeOMapManager::initialize_omap(Transaction &t, laddr_t hint,
22+
omap_type_t type)
2223
{
2324
LOG_PREFIX(BtreeOMapManager::initialize_omap);
2425
DEBUGT("hint: {}", t, hint);
25-
return tm.alloc_non_data_extent<OMapLeafNode>(t, hint, OMAP_LEAF_BLOCK_SIZE)
26-
.si_then([hint, &t](auto&& root_extent) {
26+
return tm.alloc_non_data_extent<OMapLeafNode>(t, hint, get_leaf_size(type))
27+
.si_then([hint, &t, type](auto&& root_extent) {
2728
root_extent->set_size(0);
2829
omap_node_meta_t meta{1};
2930
root_extent->set_meta(meta);
3031
omap_root_t omap_root;
31-
omap_root.update(root_extent->get_laddr(), 1, hint);
32+
omap_root.update(root_extent->get_laddr(), 1, hint, type);
3233
t.get_omap_tree_stats().depth = 1u;
3334
t.get_omap_tree_stats().extents_num_delta++;
3435
return initialize_omap_iertr::make_ready_future<omap_root_t>(omap_root);
@@ -64,7 +65,8 @@ BtreeOMapManager::handle_root_split(
6465
"", nroot->maybe_get_delta_buffer());
6566
nroot->journal_inner_insert(nroot->iter_begin() + 1, right->get_laddr(),
6667
pivot, nroot->maybe_get_delta_buffer());
67-
omap_root.update(nroot->get_laddr(), omap_root.get_depth() + 1, omap_root.hint);
68+
omap_root.update(nroot->get_laddr(), omap_root.get_depth() + 1, omap_root.hint,
69+
omap_root.get_type());
6870
oc.t.get_omap_tree_stats().depth = omap_root.depth;
6971
++(oc.t.get_omap_tree_stats().extents_num_delta);
7072
return seastar::now();
@@ -87,7 +89,8 @@ BtreeOMapManager::handle_root_merge(
8789
omap_root.update(
8890
iter->get_val(),
8991
omap_root.depth -= 1,
90-
omap_root.hint);
92+
omap_root.hint,
93+
omap_root.get_type());
9194
oc.t.get_omap_tree_stats().depth = omap_root.depth;
9295
oc.t.get_omap_tree_stats().extents_num_delta--;
9396
return oc.tm.remove(oc.t, root->get_laddr()
@@ -110,10 +113,11 @@ BtreeOMapManager::omap_get_value(
110113
LOG_PREFIX(BtreeOMapManager::omap_get_value);
111114
DEBUGT("key={}", t, key);
112115
return get_omap_root(
113-
get_omap_context(t, omap_root.hint),
116+
get_omap_context(t, omap_root),
114117
omap_root
115118
).si_then([this, &t, &key, &omap_root](auto&& extent) {
116-
return extent->get_value(get_omap_context(t, omap_root.hint), key);
119+
return extent->get_value(
120+
get_omap_context(t, omap_root), key);
117121
}).si_then([](auto &&e) {
118122
return omap_get_value_ret(
119123
interruptible::ready_future_marker{},
@@ -147,15 +151,17 @@ BtreeOMapManager::omap_set_key(
147151
LOG_PREFIX(BtreeOMapManager::omap_set_key);
148152
DEBUGT("{} -> {}", t, key, value);
149153
return get_omap_root(
150-
get_omap_context(t, omap_root.hint),
154+
get_omap_context(t, omap_root),
151155
omap_root
152156
).si_then([this, &t, &key, &value, &omap_root](auto root) {
153-
return root->insert(get_omap_context(t, omap_root.hint), key, value);
157+
return root->insert(get_omap_context(
158+
t, omap_root), key, value);
154159
}).si_then([this, &omap_root, &t](auto mresult) -> omap_set_key_ret {
155160
if (mresult.status == mutation_status_t::SUCCESS)
156161
return seastar::now();
157162
else if (mresult.status == mutation_status_t::WAS_SPLIT)
158-
return handle_root_split(get_omap_context(t, omap_root.hint), omap_root, mresult);
163+
return handle_root_split(
164+
get_omap_context(t, omap_root), omap_root, mresult);
159165
else
160166
return seastar::now();
161167
});
@@ -170,19 +176,21 @@ BtreeOMapManager::omap_rm_key(
170176
LOG_PREFIX(BtreeOMapManager::omap_rm_key);
171177
DEBUGT("{}", t, key);
172178
return get_omap_root(
173-
get_omap_context(t, omap_root.hint),
179+
get_omap_context(t, omap_root),
174180
omap_root
175181
).si_then([this, &t, &key, &omap_root](auto root) {
176-
return root->rm_key(get_omap_context(t, omap_root.hint), key);
182+
return root->rm_key(get_omap_context(t, omap_root), key);
177183
}).si_then([this, &omap_root, &t](auto mresult) -> omap_rm_key_ret {
178184
if (mresult.status == mutation_status_t::SUCCESS) {
179185
return seastar::now();
180186
} else if (mresult.status == mutation_status_t::WAS_SPLIT) {
181-
return handle_root_split(get_omap_context(t, omap_root.hint), omap_root, mresult);
187+
return handle_root_split(
188+
get_omap_context(t, omap_root), omap_root, mresult);
182189
} else if (mresult.status == mutation_status_t::NEED_MERGE) {
183190
auto root = *(mresult.need_merge);
184191
if (root->get_node_size() == 1 && omap_root.depth != 1) {
185-
return handle_root_merge(get_omap_context(t, omap_root.hint), omap_root, mresult);
192+
return handle_root_merge(
193+
get_omap_context(t, omap_root), omap_root, mresult);
186194
} else {
187195
return seastar::now();
188196
}
@@ -256,11 +264,11 @@ BtreeOMapManager::omap_list(
256264
}
257265

258266
return get_omap_root(
259-
get_omap_context(t, omap_root.hint),
267+
get_omap_context(t, omap_root),
260268
omap_root
261269
).si_then([this, config, &t, &first, &last, &omap_root](auto extent) {
262270
return extent->list(
263-
get_omap_context(t, omap_root.hint),
271+
get_omap_context(t, omap_root),
264272
first,
265273
last,
266274
config);
@@ -275,17 +283,17 @@ BtreeOMapManager::omap_clear(
275283
LOG_PREFIX(BtreeOMapManager::omap_clear);
276284
DEBUGT("{}", t, omap_root);
277285
return get_omap_root(
278-
get_omap_context(t, omap_root.hint),
286+
get_omap_context(t, omap_root),
279287
omap_root
280288
).si_then([this, &t, &omap_root](auto extent) {
281-
return extent->clear(get_omap_context(t, omap_root.hint));
289+
return extent->clear(get_omap_context(t, omap_root));
282290
}).si_then([this, &omap_root, &t] {
283291
return tm.remove(
284292
t, omap_root.get_location()
285293
).si_then([&omap_root] (auto ret) {
286294
omap_root.update(
287295
L_ADDR_NULL,
288-
0, L_ADDR_MIN);
296+
0, L_ADDR_MIN, omap_root.get_type());
289297
return omap_clear_iertr::now();
290298
});
291299
}).handle_error_interruptible(

src/crimson/os/seastore/omap_manager/btree/btree_omap_manager.h

Lines changed: 13 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -26,8 +26,9 @@ class BtreeOMapManager : public OMapManager {
2626
TransactionManager &tm;
2727

2828
omap_context_t get_omap_context(
29-
Transaction &t, laddr_t addr_min) {
30-
return omap_context_t{tm, t, addr_min};
29+
Transaction &t, const omap_root_t &omap_root) {
30+
ceph_assert(omap_root.type < omap_type_t::NUM_TYPES);
31+
return omap_context_t{tm, t, omap_root.hint, omap_root.type};
3132
}
3233

3334
/* get_omap_root
@@ -65,7 +66,8 @@ class BtreeOMapManager : public OMapManager {
6566
public:
6667
explicit BtreeOMapManager(TransactionManager &tm);
6768

68-
initialize_omap_ret initialize_omap(Transaction &t, laddr_t hint) final;
69+
initialize_omap_ret initialize_omap(Transaction &t, laddr_t hint,
70+
omap_type_t type) final;
6971

7072
omap_get_value_ret omap_get_value(
7173
const omap_root_t &omap_root,
@@ -105,6 +107,14 @@ class BtreeOMapManager : public OMapManager {
105107
omap_root_t &omap_root,
106108
Transaction &t) final;
107109

110+
static extent_len_t get_leaf_size(omap_type_t type) {
111+
if (type == omap_type_t::LOG) {
112+
return LOG_LEAF_BLOCK_SIZE;
113+
}
114+
ceph_assert(type == omap_type_t::OMAP ||
115+
type == omap_type_t::XATTR);
116+
return OMAP_LEAF_BLOCK_SIZE;
117+
}
108118
};
109119
using BtreeOMapManagerRef = std::unique_ptr<BtreeOMapManager>;
110120

src/crimson/os/seastore/omap_manager/btree/omap_btree_node.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ struct omap_context_t {
1919
TransactionManager &tm;
2020
Transaction &t;
2121
laddr_t hint;
22+
omap_type_t type;
2223
};
2324

2425
enum class mutation_status_t : uint8_t {

src/crimson/os/seastore/omap_manager/btree/omap_btree_node_impl.cc

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
#include "crimson/os/seastore/omap_manager/btree/omap_btree_node.h"
1010
#include "crimson/os/seastore/omap_manager/btree/omap_btree_node_impl.h"
1111
#include "seastar/core/thread.hh"
12+
#include "crimson/os/seastore/omap_manager/btree/btree_omap_manager.h"
1213

1314
SET_SUBSYS(seastore_omap);
1415

@@ -673,7 +674,7 @@ OMapLeafNode::make_split_children(omap_context_t oc)
673674
{
674675
LOG_PREFIX(OMapLeafNode::make_split_children);
675676
DEBUGT("this: {}", oc.t, *this);
676-
return oc.tm.alloc_extents<OMapLeafNode>(oc.t, oc.hint, OMAP_LEAF_BLOCK_SIZE, 2)
677+
return oc.tm.alloc_extents<OMapLeafNode>(oc.t, oc.hint, get_len(), 2)
677678
.si_then([this] (auto &&ext_pair) {
678679
auto left = ext_pair.front();
679680
auto right = ext_pair.back();
@@ -692,7 +693,7 @@ OMapLeafNode::make_full_merge(omap_context_t oc, OMapNodeRef right)
692693
ceph_assert(right->get_type() == TYPE);
693694
LOG_PREFIX(OMapLeafNode::make_full_merge);
694695
DEBUGT("this: {}", oc.t, *this);
695-
return oc.tm.alloc_non_data_extent<OMapLeafNode>(oc.t, oc.hint, OMAP_LEAF_BLOCK_SIZE)
696+
return oc.tm.alloc_non_data_extent<OMapLeafNode>(oc.t, oc.hint, get_len())
696697
.si_then([this, right] (auto &&replacement) {
697698
replacement->merge_from(*this, *right->cast<OMapLeafNode>());
698699
return full_merge_ret(
@@ -710,7 +711,7 @@ OMapLeafNode::make_balanced(omap_context_t oc, OMapNodeRef _right)
710711
ceph_assert(_right->get_type() == TYPE);
711712
LOG_PREFIX(OMapLeafNode::make_balanced);
712713
DEBUGT("this: {}", oc.t, *this);
713-
return oc.tm.alloc_extents<OMapLeafNode>(oc.t, oc.hint, OMAP_LEAF_BLOCK_SIZE, 2)
714+
return oc.tm.alloc_extents<OMapLeafNode>(oc.t, oc.hint, get_len(), 2)
714715
.si_then([this, _right] (auto &&replacement_pair) {
715716
auto replacement_left = replacement_pair.front();
716717
auto replacement_right = replacement_pair.back();
@@ -747,7 +748,8 @@ omap_load_extent(omap_context_t oc, laddr_t laddr, depth_t depth)
747748
});
748749
} else {
749750
return oc.tm.read_extent<OMapLeafNode>(
750-
oc.t, laddr, OMAP_LEAF_BLOCK_SIZE
751+
oc.t, laddr,
752+
BtreeOMapManager::get_leaf_size(oc.type)
751753
).handle_error_interruptible(
752754
omap_load_extent_iertr::pass_further{},
753755
crimson::ct_error::assert_all{ "Invalid error in omap_load_extent" }

src/crimson/os/seastore/omap_manager/btree/omap_btree_node_impl.h

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -163,14 +163,14 @@ struct OMapLeafNode
163163

164164
explicit OMapLeafNode(ceph::bufferptr &&ptr)
165165
: OMapNode(std::move(ptr)) {
166-
this->set_layout_buf(this->get_bptr().c_str());
166+
this->set_layout_buf(this->get_bptr().c_str(), this->get_bptr().length());
167167
}
168168
// Must be identical with OMapLeafNode(ptr) after on_fully_loaded()
169169
explicit OMapLeafNode(extent_len_t length)
170170
: OMapNode(length) {}
171171
OMapLeafNode(const OMapLeafNode &rhs)
172172
: OMapNode(rhs) {
173-
this->set_layout_buf(this->get_bptr().c_str());
173+
this->set_layout_buf(this->get_bptr().c_str(), this->get_bptr().length());
174174
}
175175

176176
omap_node_meta_t get_node_meta() const final { return get_meta(); }
@@ -185,7 +185,7 @@ struct OMapLeafNode
185185
uint32_t get_node_size() { return get_size(); }
186186

187187
void on_fully_loaded() final {
188-
this->set_layout_buf(this->get_bptr().c_str());
188+
this->set_layout_buf(this->get_bptr().c_str(), this->get_bptr().length());
189189
}
190190

191191
CachedExtentRef duplicate_for_write(Transaction&) final {

src/crimson/os/seastore/omap_manager/btree/string_kv_node_layout.h

Lines changed: 12 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -915,6 +915,7 @@ class StringKVInnerNodeLayout {
915915
*/
916916
class StringKVLeafNodeLayout {
917917
char *buf = nullptr;
918+
extent_len_t len = 0;
918919

919920
using L = absl::container_internal::Layout<ceph_le32, omap_node_meta_le_t, omap_leaf_key_le_t>;
920921
static constexpr L layout{1, 1, 1}; // = L::Partial(1, 1, 1);
@@ -1014,7 +1015,7 @@ class StringKVLeafNodeLayout {
10141015
return get_node_key().key_off;
10151016
}
10161017
auto get_node_val_ptr() const {
1017-
auto tail = node->buf + OMAP_LEAF_BLOCK_SIZE;
1018+
auto tail = node->buf + node->len;
10181019
if (*this == node->iter_end())
10191020
return tail;
10201021
else {
@@ -1029,7 +1030,7 @@ class StringKVLeafNodeLayout {
10291030
return (*this - 1)->get_node_val_offset();
10301031
}
10311032
auto get_right_ptr_end() const {
1032-
return node->buf + OMAP_LEAF_BLOCK_SIZE - get_right_offset_end();
1033+
return node->buf + node->len - get_right_offset_end();
10331034
}
10341035

10351036
void update_offset(int offset) {
@@ -1127,10 +1128,12 @@ class StringKVLeafNodeLayout {
11271128

11281129
StringKVLeafNodeLayout() : buf(nullptr) {}
11291130

1130-
void set_layout_buf(char *_buf) {
1131+
void set_layout_buf(char *_buf, extent_len_t _len) {
1132+
assert(_len > 0);
11311133
assert(buf == nullptr);
11321134
assert(_buf != nullptr);
11331135
buf = _buf;
1136+
len = _len;
11341137
}
11351138

11361139
const_iterator iter_begin() const {
@@ -1274,11 +1277,16 @@ class StringKVLeafNodeLayout {
12741277
}
12751278

12761279
uint32_t capacity() const {
1277-
return OMAP_LEAF_BLOCK_SIZE
1280+
return len
12781281
- (reinterpret_cast<char*>(layout.template Pointer<2>(buf))
12791282
- reinterpret_cast<char*>(layout.template Pointer<0>(buf)));
12801283
}
12811284

1285+
auto get_len() const {
1286+
assert(len > 0);
1287+
return len;
1288+
}
1289+
12821290
bool is_overflow(size_t ksize, size_t vsize) const {
12831291
return free_space() < (sizeof(omap_leaf_key_le_t) + ksize + vsize);
12841292
}

src/crimson/os/seastore/onode.h

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,12 +32,27 @@ struct onode_layout_t {
3232
ceph_le32 oi_size{0};
3333
ceph_le32 ss_size{0};
3434
omap_root_le_t omap_root;
35+
omap_root_le_t log_root;
3536
omap_root_le_t xattr_root;
3637

3738
object_data_le_t object_data;
3839

3940
char oi[MAX_OI_LENGTH] = {0};
4041
char ss[MAX_SS_LENGTH] = {0};
42+
43+
onode_layout_t() : omap_root(omap_type_t::OMAP), log_root(omap_type_t::LOG),
44+
xattr_root(omap_type_t::XATTR) {}
45+
46+
const omap_root_le_t& get_root(omap_type_t type) const {
47+
if (type == omap_type_t::XATTR) {
48+
return xattr_root;
49+
} else if (type == omap_type_t::OMAP) {
50+
return omap_root;
51+
} else {
52+
ceph_assert(type == omap_type_t::LOG);
53+
return log_root;
54+
}
55+
}
4156
} __attribute__((packed));
4257

4358
class Transaction;
@@ -71,6 +86,7 @@ class Onode : public boost::intrusive_ref_counter<
7186

7287
virtual void update_onode_size(Transaction&, uint32_t) = 0;
7388
virtual void update_omap_root(Transaction&, omap_root_t&) = 0;
89+
virtual void update_log_root(Transaction&, omap_root_t&) = 0;
7490
virtual void update_xattr_root(Transaction&, omap_root_t&) = 0;
7591
virtual void update_object_data(Transaction&, object_data_t&) = 0;
7692
virtual void update_object_info(Transaction&, ceph::bufferlist&) = 0;

src/crimson/os/seastore/onode_manager/staged-fltree/fltree_onode_manager.cc

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,10 @@ void FLTreeOnode::Recorder::apply_value_delta(
2828
DEBUG("update omap root");
2929
bliter.copy(sizeof(mlayout.omap_root), (char *)&mlayout.omap_root);
3030
break;
31+
case delta_op_t::UPDATE_LOG_ROOT:
32+
DEBUG("update log root");
33+
bliter.copy(sizeof(mlayout.log_root), (char *)&mlayout.log_root);
34+
break;
3135
case delta_op_t::UPDATE_XATTR_ROOT:
3236
DEBUG("update xattr root");
3337
bliter.copy(sizeof(mlayout.xattr_root), (char *)&mlayout.xattr_root);
@@ -88,6 +92,12 @@ void FLTreeOnode::Recorder::encode_update(
8892
(const char *)&layout.omap_root,
8993
sizeof(layout.omap_root));
9094
break;
95+
case delta_op_t::UPDATE_LOG_ROOT:
96+
DEBUG("update log root");
97+
encoded.append(
98+
(const char *)&layout.log_root,
99+
sizeof(layout.log_root));
100+
break;
91101
case delta_op_t::UPDATE_XATTR_ROOT:
92102
DEBUG("update xattr root");
93103
encoded.append(

0 commit comments

Comments
 (0)