Skip to content

Commit 7530afa

Browse files
authored
Merge pull request ceph#57740 from bill-scales/wip-bscales-align-write-buffers-v2
os/transaction: page align write data buffers to improve performance - version 2
2 parents 4448e6a + a0c9fec commit 7530afa

18 files changed

+1554
-218
lines changed

src/crimson/osd/ops_executer.cc

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1102,17 +1102,18 @@ void OpsExecuter::apply_stats()
11021102
pg->apply_stats(get_target(), delta_stats);
11031103
}
11041104

1105-
OpsExecuter::OpsExecuter(Ref<PG> pg,
1105+
OpsExecuter::OpsExecuter(Ref<PG> _pg,
11061106
ObjectContextRef _obc,
11071107
const OpInfo& op_info,
11081108
abstracted_msg_t&& msg,
11091109
crimson::net::ConnectionXcoreRef conn,
11101110
const SnapContext& _snapc)
1111-
: pg(std::move(pg)),
1111+
: pg(std::move(_pg)),
11121112
obc(std::move(_obc)),
11131113
op_info(op_info),
11141114
msg(std::move(msg)),
11151115
conn(conn),
1116+
txn(pg->min_peer_features()),
11161117
snapc(_snapc)
11171118
{
11181119
if (op_info.may_write() && should_clone(*obc, snapc)) {

src/crimson/osd/pg.cc

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1221,8 +1221,10 @@ PG::handle_rep_op_fut PG::handle_rep_op(Ref<MOSDRepOp> req)
12211221
DEBUGDPP("{}", *this, *req);
12221222

12231223
ceph::os::Transaction txn;
1224-
auto encoded_txn = req->get_data().cbegin();
1225-
decode(txn, encoded_txn);
1224+
auto encoded_txn_p = req->get_middle().cbegin();
1225+
auto encoded_txn_d = req->get_data().cbegin();
1226+
txn.decode(req->get_middle().length() != 0 ? encoded_txn_p : encoded_txn_d,
1227+
encoded_txn_d);
12261228
auto p = req->logbl.cbegin();
12271229
std::vector<pg_log_entry_t> log_entries;
12281230
decode(log_entries, p);

src/crimson/osd/replicated_backend.cc

Lines changed: 13 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,8 @@ ReplicatedBackend::_read(const hobject_t& hoid,
3939
MURef<MOSDRepOp> ReplicatedBackend::new_repop_msg(
4040
const pg_shard_t &pg_shard,
4141
const hobject_t &hoid,
42-
const bufferlist &encoded_txn,
42+
bufferlist &encoded_txn_p_bl,
43+
bufferlist &encoded_txn_d_bl,
4344
const osd_op_params_t &osd_op_p,
4445
epoch_t min_epoch,
4546
epoch_t map_epoch,
@@ -59,7 +60,13 @@ MURef<MOSDRepOp> ReplicatedBackend::new_repop_msg(
5960
tid,
6061
osd_op_p.at_version);
6162
if (send_op) {
62-
m->set_data(encoded_txn);
63+
if (encoded_txn_d_bl.length() != 0) {
64+
m->set_txn_payload(encoded_txn_p_bl);
65+
m->set_data(encoded_txn_d_bl);
66+
} else {
67+
// Pre-tentacle format - everything in data
68+
m->set_data(encoded_txn_p_bl);
69+
}
6370
} else {
6471
ceph::os::Transaction t;
6572
bufferlist bl;
@@ -97,8 +104,8 @@ ReplicatedBackend::submit_transaction(
97104
pg_shards.size(),
98105
osd_op_p.at_version,
99106
pg.get_last_complete()).first;
100-
bufferlist encoded_txn;
101-
encode(txn, encoded_txn);
107+
bufferlist encoded_txn_p_bl, encoded_txn_d_bl;
108+
txn.encode(encoded_txn_p_bl, encoded_txn_d_bl, pg.min_peer_features());
102109

103110
bool is_delete = false;
104111
for (auto &le : log_entries) {
@@ -120,11 +127,11 @@ ReplicatedBackend::submit_transaction(
120127
MURef<MOSDRepOp> m;
121128
if (pg.should_send_op(pg_shard, hoid)) {
122129
m = new_repop_msg(
123-
pg_shard, hoid, encoded_txn, osd_op_p,
130+
pg_shard, hoid, encoded_txn_p_bl, encoded_txn_d_bl, osd_op_p,
124131
min_epoch, map_epoch, log_entries, true, tid);
125132
} else {
126133
m = new_repop_msg(
127-
pg_shard, hoid, encoded_txn, osd_op_p,
134+
pg_shard, hoid, encoded_txn_p_bl, encoded_txn_d_bl, osd_op_p,
128135
min_epoch, map_epoch, log_entries, false, tid);
129136
if (pg.is_missing_on_peer(pg_shard, hoid)) {
130137
if (_new_clone) {

src/crimson/osd/replicated_backend.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -70,7 +70,8 @@ class ReplicatedBackend : public PGBackend
7070
MURef<MOSDRepOp> new_repop_msg(
7171
const pg_shard_t &pg_shard,
7272
const hobject_t &hoid,
73-
const bufferlist &encoded_txn,
73+
bufferlist &encoded_txn_p_bl,
74+
bufferlist &encoded_txn_d_bl,
7475
const osd_op_params_t &osd_op_p,
7576
epoch_t min_epoch,
7677
epoch_t map_epoch,

src/include/encoding.h

Lines changed: 17 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -236,7 +236,7 @@ inline void encode_nohead(const std::string& s, bufferlist& bl)
236236
{
237237
encode_nohead(std::string_view(s), bl);
238238
}
239-
inline void decode_nohead(int len, std::string& s, bufferlist::const_iterator& p)
239+
inline void decode_nohead(unsigned len, std::string& s, bufferlist::const_iterator& p)
240240
{
241241
s.clear();
242242
p.copy(len, s);
@@ -318,7 +318,7 @@ inline void encode_nohead(const bufferlist& s, bufferlist& bl)
318318
{
319319
bl.append(s);
320320
}
321-
inline void decode_nohead(int len, bufferlist& s, bufferlist::const_iterator& p)
321+
inline void decode_nohead(unsigned len, bufferlist& s, bufferlist::const_iterator& p)
322322
{
323323
s.clear();
324324
p.copy(len, s);
@@ -463,7 +463,7 @@ inline std::enable_if_t<!traits::supported>
463463
encode_nohead(const std::set<T,Comp,Alloc>& s, bufferlist& bl);
464464
template<class T, class Comp, class Alloc, typename traits=denc_traits<T>>
465465
inline std::enable_if_t<!traits::supported>
466-
decode_nohead(int len, std::set<T,Comp,Alloc>& s, bufferlist::iterator& p);
466+
decode_nohead(unsigned len, std::set<T,Comp,Alloc>& s, bufferlist::iterator& p);
467467
template<class T, class Comp, class Alloc, typename traits=denc_traits<T>>
468468
inline std::enable_if_t<!traits::supported>
469469
encode(const boost::container::flat_set<T, Comp, Alloc>& s, bufferlist& bl);
@@ -476,7 +476,7 @@ encode_nohead(const boost::container::flat_set<T, Comp, Alloc>& s,
476476
bufferlist& bl);
477477
template<class T, class Comp, class Alloc, typename traits=denc_traits<T>>
478478
inline std::enable_if_t<!traits::supported>
479-
decode_nohead(int len, boost::container::flat_set<T, Comp, Alloc>& s,
479+
decode_nohead(unsigned len, boost::container::flat_set<T, Comp, Alloc>& s,
480480
bufferlist::iterator& p);
481481
template<class T, class Comp, class Alloc>
482482
inline void encode(const std::multiset<T,Comp,Alloc>& s, bufferlist& bl);
@@ -496,7 +496,7 @@ inline std::enable_if_t<!traits::supported>
496496
encode_nohead(const std::vector<T,Alloc>& v, bufferlist& bl);
497497
template<class T, class Alloc, typename traits=denc_traits<T>>
498498
inline std::enable_if_t<!traits::supported>
499-
decode_nohead(int len, std::vector<T,Alloc>& v, bufferlist::const_iterator& p);
499+
decode_nohead(unsigned len, std::vector<T,Alloc>& v, bufferlist::const_iterator& p);
500500
template<class T,class Alloc>
501501
inline void encode(const std::vector<std::shared_ptr<T>,Alloc>& v,
502502
bufferlist& bl,
@@ -522,7 +522,7 @@ inline std::enable_if_t<!traits::supported>
522522
encode_nohead(const boost::container::small_vector<T,N,Alloc>& v, bufferlist& bl);
523523
template<class T, std::size_t N, class Alloc, typename traits=denc_traits<T>>
524524
inline std::enable_if_t<!traits::supported>
525-
decode_nohead(int len, boost::container::small_vector<T,N,Alloc>& v, bufferlist::const_iterator& p);
525+
decode_nohead(unsigned len, boost::container::small_vector<T,N,Alloc>& v, bufferlist::const_iterator& p);
526526
// std::map
527527
template<class T, class U, class Comp, class Alloc,
528528
typename t_traits=denc_traits<T>, typename u_traits=denc_traits<U>>
@@ -550,7 +550,7 @@ encode_nohead(const std::map<T,U,Comp,Alloc>& m, bufferlist& bl, uint64_t featur
550550
template<class T, class U, class Comp, class Alloc,
551551
typename t_traits=denc_traits<T>, typename u_traits=denc_traits<U>>
552552
inline std::enable_if_t<!t_traits::supported || !u_traits::supported>
553-
decode_nohead(int n, std::map<T,U,Comp,Alloc>& m, bufferlist::const_iterator& p);
553+
decode_nohead(unsigned n, std::map<T,U,Comp,Alloc>& m, bufferlist::const_iterator& p);
554554
template<class T, class U, class Comp, class Alloc,
555555
typename t_traits=denc_traits<T>, typename u_traits=denc_traits<U>>
556556
inline std::enable_if_t<!t_traits::supported || !u_traits::supported>
@@ -580,7 +580,7 @@ encode_nohead(const boost::container::flat_map<T,U,Comp,Alloc>& m,
580580
template<class T, class U, class Comp, class Alloc,
581581
typename t_traits=denc_traits<T>, typename u_traits=denc_traits<U>>
582582
inline std::enable_if_t<!t_traits::supported || !u_traits::supported>
583-
decode_nohead(int n, boost::container::flat_map<T,U,Comp,Alloc>& m,
583+
decode_nohead(unsigned n, boost::container::flat_map<T,U,Comp,Alloc>& m,
584584
bufferlist::const_iterator& p);
585585
template<class T, class U, class Comp, class Alloc>
586586
inline void encode(const std::multimap<T,U,Comp,Alloc>& m, bufferlist& bl);
@@ -844,9 +844,9 @@ inline typename std::enable_if<!traits::supported>::type
844844
}
845845
template<class T, class Comp, class Alloc, typename traits>
846846
inline std::enable_if_t<!traits::supported>
847-
decode_nohead(int len, std::set<T,Comp,Alloc>& s, bufferlist::const_iterator& p)
847+
decode_nohead(unsigned len, std::set<T,Comp,Alloc>& s, bufferlist::const_iterator& p)
848848
{
849-
for (int i=0; i<len; i++) {
849+
for (unsigned i=0; i<len; i++) {
850850
T v;
851851
decode(v, p);
852852
s.insert(v);
@@ -888,11 +888,11 @@ encode_nohead(const boost::container::flat_set<T, Comp, Alloc>& s,
888888
}
889889
template<class T, class Comp, class Alloc, typename traits>
890890
inline std::enable_if_t<!traits::supported>
891-
decode_nohead(int len, boost::container::flat_set<T, Comp, Alloc>& s,
891+
decode_nohead(unsigned len, boost::container::flat_set<T, Comp, Alloc>& s,
892892
bufferlist::iterator& p)
893893
{
894894
s.reserve(len);
895-
for (int i=0; i<len; i++) {
895+
for (unsigned i=0; i<len; i++) {
896896
T v;
897897
decode(v, p);
898898
s.insert(v);
@@ -959,7 +959,7 @@ inline std::enable_if_t<!traits::supported>
959959
}
960960
template<class T, class Alloc, typename traits>
961961
inline std::enable_if_t<!traits::supported>
962-
decode_nohead(int len, std::vector<T,Alloc>& v, bufferlist::const_iterator& p)
962+
decode_nohead(unsigned len, std::vector<T,Alloc>& v, bufferlist::const_iterator& p)
963963
{
964964
v.resize(len);
965965
for (__u32 i=0; i<v.size(); i++)
@@ -1005,7 +1005,7 @@ inline std::enable_if_t<!traits::supported>
10051005
}
10061006
template<class T, std::size_t N, class Alloc, typename traits>
10071007
inline std::enable_if_t<!traits::supported>
1008-
decode_nohead(int len, boost::container::small_vector<T,N,Alloc>& v, bufferlist::const_iterator& p)
1008+
decode_nohead(unsigned len, boost::container::small_vector<T,N,Alloc>& v, bufferlist::const_iterator& p)
10091009
{
10101010
v.resize(len);
10111011
for (auto& i : v)
@@ -1159,7 +1159,7 @@ inline std::enable_if_t<!t_traits::supported || !u_traits::supported>
11591159
template<class T, class U, class Comp, class Alloc,
11601160
typename t_traits, typename u_traits>
11611161
inline std::enable_if_t<!t_traits::supported || !u_traits::supported>
1162-
decode_nohead(int n, std::map<T,U,Comp,Alloc>& m, bufferlist::const_iterator& p)
1162+
decode_nohead(unsigned n, std::map<T,U,Comp,Alloc>& m, bufferlist::const_iterator& p)
11631163
{
11641164
m.clear();
11651165
while (n--) {
@@ -1172,7 +1172,7 @@ inline std::enable_if_t<!t_traits::supported || !u_traits::supported>
11721172
template <std::move_constructible T, std::move_constructible U, class Comp, class Alloc,
11731173
typename t_traits, typename u_traits>
11741174
inline std::enable_if_t<!t_traits::supported || !u_traits::supported>
1175-
decode_nohead(int n, std::map<T, U, Comp, Alloc>& m, bufferlist::const_iterator& p)
1175+
decode_nohead(unsigned n, std::map<T, U, Comp, Alloc>& m, bufferlist::const_iterator& p)
11761176
{
11771177
m.clear();
11781178
while (n--) {
@@ -1264,7 +1264,7 @@ template<class T, class U, class Comp, class Alloc,
12641264
template<class T, class U, class Comp, class Alloc,
12651265
typename t_traits, typename u_traits>
12661266
inline std::enable_if_t<!t_traits::supported || !u_traits::supported>
1267-
decode_nohead(int n, boost::container::flat_map<T,U,Comp,Alloc>& m,
1267+
decode_nohead(unsigned n, boost::container::flat_map<T,U,Comp,Alloc>& m,
12681268
bufferlist::const_iterator& p)
12691269
{
12701270
m.clear();

src/messages/MOSDECSubOpReadReply.h

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -48,9 +48,10 @@ class MOSDECSubOpReadReply : public MOSDFastDispatchOp {
4848
void decode_payload() override {
4949
using ceph::decode;
5050
auto p = payload.cbegin();
51+
auto d = data.cbegin();
5152
decode(pgid, p);
5253
decode(map_epoch, p);
53-
decode(op, p);
54+
op.decode(p, d);
5455
if (header.version >= 2) {
5556
decode(min_epoch, p);
5657
decode_trace(p);
@@ -63,7 +64,7 @@ class MOSDECSubOpReadReply : public MOSDFastDispatchOp {
6364
using ceph::encode;
6465
encode(pgid, payload);
6566
encode(map_epoch, payload);
66-
encode(op, payload);
67+
op.encode(payload, data, features);
6768
encode(min_epoch, payload);
6869
encode_trace(payload, features);
6970
}

src/messages/MOSDECSubOpWrite.h

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -52,9 +52,10 @@ class MOSDECSubOpWrite : public MOSDFastDispatchOp {
5252
void decode_payload() override {
5353
using ceph::decode;
5454
auto p = payload.cbegin();
55+
auto d = data.cbegin();
5556
decode(pgid, p);
5657
decode(map_epoch, p);
57-
decode(op, p);
58+
op.decode(p, d);
5859
if (header.version >= 2) {
5960
decode(min_epoch, p);
6061
decode_trace(p);
@@ -67,7 +68,7 @@ class MOSDECSubOpWrite : public MOSDFastDispatchOp {
6768
using ceph::encode;
6869
encode(pgid, payload);
6970
encode(map_epoch, payload);
70-
encode(op, payload);
71+
op.encode(payload, data, features);
7172
encode(min_epoch, payload);
7273
encode_trace(payload, features);
7374
}

src/messages/MOSDRepOp.h

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -85,6 +85,8 @@ class MOSDRepOp final : public MOSDFastDispatchOp {
8585
/// non-empty if this transaction involves a hit_set history update
8686
std::optional<pg_hit_set_history_t> updated_hit_set_history;
8787

88+
bufferlist txn_payload;
89+
8890
epoch_t get_map_epoch() const override {
8991
return map_epoch;
9092
}
@@ -99,6 +101,11 @@ class MOSDRepOp final : public MOSDFastDispatchOp {
99101
return data.length();
100102
}
101103

104+
void set_txn_payload(bufferlist bl)
105+
{
106+
txn_payload = bl;
107+
}
108+
102109
void decode_payload() override {
103110
using ceph::decode;
104111
p = payload.cbegin();
@@ -159,6 +166,8 @@ class MOSDRepOp final : public MOSDFastDispatchOp {
159166
encode(from, payload);
160167
encode(updated_hit_set_history, payload);
161168
encode(pg_committed_to, payload);
169+
bufferlist middle(txn_payload);
170+
set_middle(middle);
162171
}
163172

164173
MOSDRepOp()

src/msg/Message.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -441,6 +441,7 @@ class Message : public RefCountedObject {
441441
byte_throttler->take(middle.length());
442442
}
443443
ceph::buffer::list& get_middle() { return middle; }
444+
const ceph::buffer::list& get_middle() const { return middle; }
444445

445446
void set_data(const ceph::buffer::list &bl) {
446447
if (byte_throttler)

0 commit comments

Comments
 (0)