Skip to content

Commit 42dc7bb

Browse files
committed
osd: EC optimizations: add partial_writes_last_complete to pg_info_t
Add partial_writes_last_complete map to pg_info_t and pg_fast_info_t. For optimized EC pools not all shards receive every log entry. As log entries are marked completed the partial writeis last complete map is updated to track shards that did not receive the log entry. Each map entry stores an eversion range. The first version is the last completion the shard participated in, the second version tracks subsequent updates where the shard was not updated. For example the range 88'10-88'12 means a shard completed update 10 and that updates 11 and 12 intentionally did not update the shard. This information is used during peering to distinguish a shard that is missing updates from a shard that intentionally did not participate in an update to work out what recovery is required. By default this map is empty indicating that every shard is expected to participate in an update and have a copy of the log entry. Signed-off-by: Bill Scales <[email protected]>
1 parent 5c6f346 commit 42dc7bb

File tree

2 files changed

+38
-4
lines changed

2 files changed

+38
-4
lines changed

src/osd/osd_types.cc

Lines changed: 16 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3627,7 +3627,7 @@ void pg_history_t::generate_test_instances(list<pg_history_t*>& o)
36273627

36283628
void pg_info_t::encode(ceph::buffer::list &bl) const
36293629
{
3630-
ENCODE_START(32, 26, bl);
3630+
ENCODE_START(33, 26, bl);
36313631
encode(pgid.pgid, bl);
36323632
encode(last_update, bl);
36333633
encode(last_complete, bl);
@@ -3643,12 +3643,13 @@ void pg_info_t::encode(ceph::buffer::list &bl) const
36433643
encode(last_backfill, bl);
36443644
encode(true, bl); // was last_backfill_bitwise
36453645
encode(last_interval_started, bl);
3646+
encode(partial_writes_last_complete, bl);
36463647
ENCODE_FINISH(bl);
36473648
}
36483649

36493650
void pg_info_t::decode(ceph::buffer::list::const_iterator &bl)
36503651
{
3651-
DECODE_START(32, bl);
3652+
DECODE_START(33, bl);
36523653
decode(pgid.pgid, bl);
36533654
decode(last_update, bl);
36543655
decode(last_complete, bl);
@@ -3677,6 +3678,9 @@ void pg_info_t::decode(ceph::buffer::list::const_iterator &bl)
36773678
} else {
36783679
last_interval_started = last_epoch_started;
36793680
}
3681+
if (struct_v >= 33) {
3682+
decode(partial_writes_last_complete, bl);
3683+
}
36803684
DECODE_FINISH(bl);
36813685
}
36823686

@@ -3691,6 +3695,16 @@ void pg_info_t::dump(Formatter *f) const
36913695
f->dump_stream("log_tail") << log_tail;
36923696
f->dump_int("last_user_version", last_user_version);
36933697
f->dump_stream("last_backfill") << last_backfill;
3698+
f->open_array_section("partial_writes_last_complete");
3699+
for (const auto & [shard, versionrange] : partial_writes_last_complete) {
3700+
auto & [from, to] = versionrange;
3701+
f->open_object_section("shard");
3702+
f->dump_int("id", int(shard));
3703+
f->dump_stream("from") << from;
3704+
f->dump_stream("to") << to;
3705+
f->close_section();
3706+
}
3707+
f->close_section();
36943708
f->open_array_section("purged_snaps");
36953709
for (interval_set<snapid_t>::const_iterator i=purged_snaps.begin();
36963710
i != purged_snaps.end();

src/osd/osd_types.h

Lines changed: 22 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3057,6 +3057,9 @@ struct pg_info_t {
30573057

30583058
interval_set<snapid_t> purged_snaps;
30593059

3060+
std::map<shard_id_t,std::pair<eversion_t, eversion_t>>
3061+
partial_writes_last_complete; ///< last_complete for shards not modified by a partial write
3062+
30603063
pg_stat_t stats;
30613064

30623065
pg_history_t history;
@@ -3073,6 +3076,7 @@ struct pg_info_t {
30733076
l.log_tail == r.log_tail &&
30743077
l.last_backfill == r.last_backfill &&
30753078
l.purged_snaps == r.purged_snaps &&
3079+
l.partial_writes_last_complete == r.partial_writes_last_complete &&
30763080
l.stats == r.stats &&
30773081
l.history == r.history &&
30783082
l.hit_set == r.hit_set;
@@ -3149,6 +3153,7 @@ struct pg_fast_info_t {
31493153
eversion_t last_update;
31503154
eversion_t last_complete;
31513155
version_t last_user_version;
3156+
std::map<shard_id_t,std::pair<eversion_t,eversion_t>> partial_writes_last_complete;
31523157
struct { // pg_stat_t stats
31533158
eversion_t version;
31543159
version_t reported_seq;
@@ -3178,6 +3183,7 @@ struct pg_fast_info_t {
31783183
last_update = info.last_update;
31793184
last_complete = info.last_complete;
31803185
last_user_version = info.last_user_version;
3186+
partial_writes_last_complete = info.partial_writes_last_complete;
31813187
stats.version = info.stats.version;
31823188
stats.reported_seq = info.stats.reported_seq;
31833189
stats.last_fresh = info.stats.last_fresh;
@@ -3204,6 +3210,7 @@ struct pg_fast_info_t {
32043210
info->last_update = last_update;
32053211
info->last_complete = last_complete;
32063212
info->last_user_version = last_user_version;
3213+
info->partial_writes_last_complete = partial_writes_last_complete;
32073214
info->stats.version = stats.version;
32083215
info->stats.reported_seq = stats.reported_seq;
32093216
info->stats.last_fresh = stats.last_fresh;
@@ -3227,7 +3234,7 @@ struct pg_fast_info_t {
32273234
}
32283235

32293236
void encode(ceph::buffer::list& bl) const {
3230-
ENCODE_START(1, 1, bl);
3237+
ENCODE_START(2, 1, bl);
32313238
encode(last_update, bl);
32323239
encode(last_complete, bl);
32333240
encode(last_user_version, bl);
@@ -3249,10 +3256,11 @@ struct pg_fast_info_t {
32493256
encode(stats.stats.sum.num_wr, bl);
32503257
encode(stats.stats.sum.num_wr_kb, bl);
32513258
encode(stats.stats.sum.num_objects_dirty, bl);
3259+
encode(partial_writes_last_complete, bl);
32523260
ENCODE_FINISH(bl);
32533261
}
32543262
void decode(ceph::buffer::list::const_iterator& p) {
3255-
DECODE_START(1, p);
3263+
DECODE_START(2, p);
32563264
decode(last_update, p);
32573265
decode(last_complete, p);
32583266
decode(last_user_version, p);
@@ -3274,12 +3282,24 @@ struct pg_fast_info_t {
32743282
decode(stats.stats.sum.num_wr, p);
32753283
decode(stats.stats.sum.num_wr_kb, p);
32763284
decode(stats.stats.sum.num_objects_dirty, p);
3285+
if (struct_v >= 2)
3286+
decode(partial_writes_last_complete, p);
32773287
DECODE_FINISH(p);
32783288
}
32793289
void dump(ceph::Formatter *f) const {
32803290
f->dump_stream("last_update") << last_update;
32813291
f->dump_stream("last_complete") << last_complete;
32823292
f->dump_stream("last_user_version") << last_user_version;
3293+
f->open_array_section("partial_writes_last_complete");
3294+
for (const auto & [shard, versionrange] : partial_writes_last_complete) {
3295+
auto & [from, to] = versionrange;
3296+
f->open_object_section("shard");
3297+
f->dump_int("id", int(shard));
3298+
f->dump_stream("from") << from;
3299+
f->dump_stream("to") << to;
3300+
f->close_section();
3301+
}
3302+
f->close_section();
32833303
f->open_object_section("stats");
32843304
f->dump_stream("version") << stats.version;
32853305
f->dump_unsigned("reported_seq", stats.reported_seq);

0 commit comments

Comments
 (0)