Skip to content

Commit 5c6f346

Browse files
committed
osd: EC optimizations: add nonprimary_shards set to pg_pool_t
EC optimizations pools do not update every shard on every I/O. The primary must have a complete log and requires objects to have up to date object attributes, so the choice of primary has to be restricted. Shards that cannot become a primary are listed in the nonprimary_shards set. For a K+M EC pool with optimizations enabled the 1st data shard and all M coding parity shards are always updated and can become a primary, the other shards will be marked as nonprimary. The new set nonprimary_shards stores shards that cannot become the primary, by default it is an empty set which retains existing behavior. When optimisations are enabled on an EC pool this set will be filled in to restrict the choice of primary. Signed-off-by: Bill Scales <[email protected]>
1 parent c1ca6f4 commit 5c6f346

File tree

2 files changed

+18
-3
lines changed

2 files changed

+18
-3
lines changed

src/osd/osd_types.cc

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1642,6 +1642,7 @@ void pg_pool_t::dump(Formatter *f) const
16421642
f->dump_unsigned("stripe_width", get_stripe_width());
16431643
f->dump_unsigned("expected_num_objects", expected_num_objects);
16441644
f->dump_bool("fast_read", fast_read);
1645+
f->dump_stream("nonprimary_shards") << nonprimary_shards;
16451646
f->open_object_section("options");
16461647
opts.dump(f);
16471648
f->close_section(); // options
@@ -1961,7 +1962,7 @@ void pg_pool_t::encode(ceph::buffer::list& bl, uint64_t features) const
19611962
return;
19621963
}
19631964

1964-
uint8_t v = 31;
1965+
uint8_t v = 32;
19651966
// NOTE: any new encoding dependencies must be reflected by
19661967
// SIGNIFICANT_FEATURES
19671968
if (!HAVE_FEATURE(features, SERVER_TENTACLE)) {
@@ -2080,12 +2081,15 @@ void pg_pool_t::encode(ceph::buffer::list& bl, uint64_t features) const
20802081
auto maybe_peering_crush_data1 = maybe_peering_crush_data();
20812082
encode(maybe_peering_crush_data1, bl);
20822083
}
2084+
if (v >= 32) {
2085+
encode(nonprimary_shards, bl);
2086+
}
20832087
ENCODE_FINISH(bl);
20842088
}
20852089

20862090
void pg_pool_t::decode(ceph::buffer::list::const_iterator& bl)
20872091
{
2088-
DECODE_START_LEGACY_COMPAT_LEN(31, 5, 5, bl);
2092+
DECODE_START_LEGACY_COMPAT_LEN(32, 5, 5, bl);
20892093
decode(type, bl);
20902094
decode(size, bl);
20912095
decode(crush_rule, bl);
@@ -2276,6 +2280,11 @@ void pg_pool_t::decode(ceph::buffer::list::const_iterator& bl)
22762280
peering_crush_mandatory_member) = *peering_crush_data;
22772281
}
22782282
}
2283+
if (struct_v >= 32) {
2284+
decode(nonprimary_shards, bl);
2285+
} else {
2286+
nonprimary_shards.clear();
2287+
}
22792288
DECODE_FINISH(bl);
22802289
calc_pg_masks();
22812290
calc_grade_table();
@@ -2377,6 +2386,7 @@ void pg_pool_t::generate_test_instances(list<pg_pool_t*>& o)
23772386
a.erasure_code_profile = "profile in osdmap";
23782387
a.expected_num_objects = 123456;
23792388
a.fast_read = false;
2389+
a.nonprimary_shards.clear();
23802390
a.application_metadata = {{"rbd", {{"key", "value"}}}};
23812391
o.push_back(new pg_pool_t(a));
23822392

src/osd/osd_types.h

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1626,7 +1626,7 @@ struct pg_pool_t {
16261626
uint64_t expected_num_objects = 0; ///< expected number of objects on this pool, a value of 0 indicates
16271627
///< user does not specify any expected value
16281628
bool fast_read = false; ///< whether turn on fast read on the pool or not
1629-
1629+
shard_id_set nonprimary_shards; ///< EC partial writes: shards that cannot become a primary
16301630
pool_opts_t opts; ///< options
16311631

16321632
typedef enum {
@@ -1931,6 +1931,11 @@ struct pg_pool_t {
19311931
/// choose a random hash position within a pg
19321932
uint32_t get_random_pg_position(pg_t pgid, uint32_t seed) const;
19331933

1934+
/// EC partial writes: test if a shard is a non-primary
1935+
bool is_nonprimary_shard(const shard_id_t shard) const {
1936+
return !nonprimary_shards.empty() && nonprimary_shards.contains(shard);
1937+
}
1938+
19341939
void encode(ceph::buffer::list& bl, uint64_t features) const;
19351940
void decode(ceph::buffer::list::const_iterator& bl);
19361941

0 commit comments

Comments
 (0)