Skip to content

Commit fffbab1

Browse files
authored
Merge pull request ceph#56353 from myoungwon/wip-apply-shallow-copy-rbm-overwrite
crimson/os/seastore: avoid new allocation when overwriting data in RBM for performance Reviewed-by: Yingxin Cheng <[email protected]>
2 parents 274926b + 5df6ffc commit fffbab1

File tree

5 files changed

+243
-42
lines changed

5 files changed

+243
-42
lines changed

src/common/options/crimson.yaml.in

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -126,5 +126,5 @@ options:
126126
- name: seastore_data_delta_based_overwrite
127127
type: size
128128
level: dev
129-
desc: overwrite the existing data block based on delta if the original size is smaller than the value, otherwise do overwrite based on remapping, set to 0 to enforce the remap-based overwrite.
129+
desc: overwrite the existing data block based on delta if the overwrite size is equal to or less than the value, otherwise do overwrite based on remapping, set to 0 to enforce the remap-based overwrite.
130130
default: 0

src/crimson/os/seastore/cached_extent.h

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -547,11 +547,11 @@ class CachedExtent
547547
}
548548

549549
/// Get ref to raw buffer
550-
bufferptr &get_bptr() {
550+
virtual bufferptr &get_bptr() {
551551
assert(ptr.has_value());
552552
return *ptr;
553553
}
554-
const bufferptr &get_bptr() const {
554+
virtual const bufferptr &get_bptr() const {
555555
assert(ptr.has_value());
556556
return *ptr;
557557
}
@@ -648,11 +648,6 @@ class CachedExtent
648648
return extent_index_hook.is_linked();
649649
}
650650

651-
/// set bufferptr
652-
void set_bptr(ceph::bufferptr &&nptr) {
653-
ptr = nptr;
654-
}
655-
656651
/// hook for intrusive ref list (mainly dirty or lru list)
657652
boost::intrusive::list_member_hook<> primary_ref_list_hook;
658653
using primary_ref_list_member_options = boost::intrusive::member_hook<
@@ -799,6 +794,11 @@ class CachedExtent
799794
poffset = offset;
800795
}
801796

797+
/// set bufferptr
798+
void set_bptr(ceph::bufferptr &&nptr) {
799+
ptr = nptr;
800+
}
801+
802802
/**
803803
* maybe_generate_relative
804804
*

src/crimson/os/seastore/object_data_handler.cc

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -297,6 +297,7 @@ overwrite_ops_t prepare_ops_list(
297297
interval_set<uint64_t> pre_alloc_addr_removed, pre_alloc_addr_remapped;
298298
if (delta_based_overwrite_max_extent_size) {
299299
for (auto &r : ops.to_remove) {
300+
// TODO: Introduce LBAMapping::is_data_stable() to include EXIST_CLEAN extents
300301
if (r->is_stable() && !r->is_zero_reserved()) {
301302
pre_alloc_addr_removed.insert(r->get_key(), r->get_length());
302303

src/crimson/os/seastore/object_data_handler.h

Lines changed: 86 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -30,22 +30,72 @@ struct block_delta_t {
3030
}
3131
};
3232

33+
class overwrite_buf_t {
34+
public:
35+
overwrite_buf_t() = default;
36+
bool is_empty() const {
37+
return changes.empty() && !has_cached_bptr();
38+
}
39+
bool has_cached_bptr() const {
40+
return ptr.has_value();
41+
}
42+
void add(const block_delta_t &b) {
43+
changes.push_back(b);
44+
}
45+
void apply_changes_to(bufferptr &b) const {
46+
assert(!changes.empty());
47+
for (auto p : changes) {
48+
auto iter = p.bl.cbegin();
49+
iter.copy(p.bl.length(), b.c_str() + p.offset);
50+
}
51+
changes.clear();
52+
}
53+
const bufferptr &get_cached_bptr(const bufferptr &_ptr) const {
54+
apply_changes_to_cache(_ptr);
55+
return *ptr;
56+
}
57+
bufferptr &get_cached_bptr(const bufferptr &_ptr) {
58+
apply_changes_to_cache(_ptr);
59+
return *ptr;
60+
}
61+
bufferptr &&move_cached_bptr() {
62+
assert(has_cached_bptr());
63+
apply_changes_to(*ptr);
64+
return std::move(*ptr);
65+
}
66+
private:
67+
void apply_changes_to_cache(const bufferptr &_ptr) const {
68+
assert(!is_empty());
69+
if (!has_cached_bptr()) {
70+
ptr = ceph::buffer::copy(_ptr.c_str(), _ptr.length());
71+
}
72+
if (!changes.empty()) {
73+
apply_changes_to(*ptr);
74+
}
75+
}
76+
mutable std::vector<block_delta_t> changes = {};
77+
mutable std::optional<ceph::bufferptr> ptr = std::nullopt;
78+
};
79+
3380
struct ObjectDataBlock : crimson::os::seastore::LogicalCachedExtent {
3481
using Ref = TCachedExtentRef<ObjectDataBlock>;
3582

3683
std::vector<block_delta_t> delta = {};
3784

3885
interval_set<extent_len_t> modified_region;
3986

87+
// to provide the local modified view during transaction
88+
overwrite_buf_t cached_overwrites;
89+
4090
explicit ObjectDataBlock(ceph::bufferptr &&ptr)
4191
: LogicalCachedExtent(std::move(ptr)) {}
42-
explicit ObjectDataBlock(const ObjectDataBlock &other)
43-
: LogicalCachedExtent(other), modified_region(other.modified_region) {}
92+
explicit ObjectDataBlock(const ObjectDataBlock &other, share_buffer_t s)
93+
: LogicalCachedExtent(other, s), modified_region(other.modified_region) {}
4494
explicit ObjectDataBlock(extent_len_t length)
4595
: LogicalCachedExtent(length) {}
4696

4797
CachedExtentRef duplicate_for_write(Transaction&) final {
48-
return CachedExtentRef(new ObjectDataBlock(*this));
98+
return CachedExtentRef(new ObjectDataBlock(*this, share_buffer_t{}));
4999
};
50100

51101
static constexpr extent_types_t TYPE = extent_types_t::OBJECT_DATA_BLOCK;
@@ -54,9 +104,9 @@ struct ObjectDataBlock : crimson::os::seastore::LogicalCachedExtent {
54104
}
55105

56106
void overwrite(extent_len_t offset, bufferlist bl) {
57-
auto iter = bl.cbegin();
58-
iter.copy(bl.length(), get_bptr().c_str() + offset);
59-
delta.push_back({offset, bl.length(), bl});
107+
block_delta_t b {offset, bl.length(), bl};
108+
cached_overwrites.add(b);
109+
delta.push_back(b);
60110
modified_region.union_insert(offset, bl.length());
61111
}
62112

@@ -76,9 +126,39 @@ struct ObjectDataBlock : crimson::os::seastore::LogicalCachedExtent {
76126
modified_region.clear();
77127
}
78128

129+
void prepare_commit() final {
130+
if (is_mutation_pending() || is_exist_mutation_pending()) {
131+
ceph_assert(!cached_overwrites.is_empty());
132+
if (cached_overwrites.has_cached_bptr()) {
133+
set_bptr(cached_overwrites.move_cached_bptr());
134+
} else {
135+
// The optimized path to minimize data copy
136+
cached_overwrites.apply_changes_to(CachedExtent::get_bptr());
137+
}
138+
} else {
139+
assert(cached_overwrites.is_empty());
140+
}
141+
}
142+
79143
void logical_on_delta_write() final {
80144
delta.clear();
81145
}
146+
147+
bufferptr &get_bptr() override {
148+
if (cached_overwrites.is_empty()) {
149+
return CachedExtent::get_bptr();
150+
} else {
151+
return cached_overwrites.get_cached_bptr(CachedExtent::get_bptr());
152+
}
153+
}
154+
155+
const bufferptr &get_bptr() const override {
156+
if (cached_overwrites.is_empty()) {
157+
return CachedExtent::get_bptr();
158+
} else {
159+
return cached_overwrites.get_cached_bptr(CachedExtent::get_bptr());
160+
}
161+
}
82162
};
83163
using ObjectDataBlockRef = TCachedExtentRef<ObjectDataBlock>;
84164

0 commit comments

Comments
 (0)