Skip to content

Commit 790d430

Browse files
Merge pull request ceph#63184 from JonBailey1993/ec_optimisation_deep_scrubbing_implementation
OSD: Optimised EC Deep Scrubbing Implementation
2 parents 266a4e4 + afd3cae commit 790d430

27 files changed

+988
-68
lines changed

src/common/bitset_set.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,11 @@ class bitset_set {
3939
KeyT pos;
4040

4141
public:
42+
using value_type = const KeyT;
4243
using difference_type = std::int64_t;
44+
using pointer = const value_type *;
45+
using reference = const value_type &;
46+
using iterator_category = std::forward_iterator_tag;
4347

4448
const_iterator() : set(nullptr), pos(0) {
4549
}

src/common/mini_flat_map.h

Lines changed: 12 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -50,14 +50,22 @@ class mini_flat_map {
5050
template<bool is_const>
5151
class _iterator {
5252
friend class mini_flat_map;
53-
using mini_flat_map_p = std::conditional_t<is_const,
54-
const mini_flat_map *,
55-
mini_flat_map *>;
53+
54+
public:
55+
// types required by std::iterator_traits
5656
using value_type = std::conditional_t<is_const,
5757
const std::pair<const KeyT &,
5858
const ValueT &>,
5959
std::pair<const KeyT &, ValueT &>>;
60-
60+
using difference_type = std::ptrdiff_t;
61+
using pointer = const value_type *;
62+
using reference = const value_type &;
63+
using iterator_category = std::forward_iterator_tag;
64+
65+
private:
66+
using mini_flat_map_p = std::conditional_t<is_const,
67+
const mini_flat_map *,
68+
mini_flat_map *>;
6169
mini_flat_map_p map;
6270
std::optional<value_type> value;
6371
KeyT key;
@@ -73,8 +81,6 @@ class mini_flat_map {
7381
}
7482

7583
public:
76-
using difference_type = std::ptrdiff_t;
77-
7884
_iterator(mini_flat_map_p map) : map(map), key(0) {
7985
progress();
8086
}

src/erasure-code/ErasureCodeInterface.h

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -679,6 +679,11 @@ namespace ceph {
679679
* are irrelevant if this flag is false.
680680
*/
681681
FLAG_EC_PLUGIN_OPTIMIZED_SUPPORTED = 1<<6,
682+
/* This plugin supports the ability to encode CRCs of data shards to get
683+
* the CRC of a parity shard. This flag also represents the inverse,
684+
* to decode a parity CRC to get the CRC of a data shard.
685+
*/
686+
FLAG_EC_PLUGIN_CRC_ENCODE_DECODE_SUPPORT = 1<<7,
682687
};
683688
static const char *get_optimization_flag_name(const plugin_flags flag) {
684689
switch (flag) {
@@ -689,6 +694,8 @@ namespace ceph {
689694
case FLAG_EC_PLUGIN_PARITY_DELTA_OPTIMIZATION: return "paritydelta";
690695
case FLAG_EC_PLUGIN_REQUIRE_SUB_CHUNKS: return "requiresubchunks";
691696
case FLAG_EC_PLUGIN_OPTIMIZED_SUPPORTED: return "optimizedsupport";
697+
case FLAG_EC_PLUGIN_CRC_ENCODE_DECODE_SUPPORT:
698+
return "crcencodedecode";
692699
default: return "???";
693700
}
694701
}

src/erasure-code/clay/ErasureCodeClay.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -52,7 +52,8 @@ class ErasureCodeClay final : public ceph::ErasureCode {
5252
// the corner case of m = 1
5353
return FLAG_EC_PLUGIN_PARTIAL_READ_OPTIMIZATION |
5454
FLAG_EC_PLUGIN_PARTIAL_WRITE_OPTIMIZATION |
55-
FLAG_EC_PLUGIN_REQUIRE_SUB_CHUNKS;
55+
FLAG_EC_PLUGIN_REQUIRE_SUB_CHUNKS |
56+
FLAG_EC_PLUGIN_CRC_ENCODE_DECODE_SUPPORT;
5657
}
5758
return FLAG_EC_PLUGIN_PARTIAL_READ_OPTIMIZATION |
5859
FLAG_EC_PLUGIN_REQUIRE_SUB_CHUNKS;

src/erasure-code/isa/ErasureCodeIsa.h

Lines changed: 9 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -53,10 +53,10 @@ class ErasureCodeIsa : public ceph::ErasureCode {
5353
int w;
5454

5555
ErasureCodeIsaTableCache &tcache;
56-
const char *technique;
56+
std::string technique;
5757
uint64_t flags;
5858

59-
ErasureCodeIsa(const char *_technique,
59+
ErasureCodeIsa(const std::string &_technique,
6060
ErasureCodeIsaTableCache &_tcache) :
6161
k(0),
6262
m(0),
@@ -69,9 +69,11 @@ class ErasureCodeIsa : public ceph::ErasureCode {
6969
FLAG_EC_PLUGIN_ZERO_INPUT_ZERO_OUTPUT_OPTIMIZATION |
7070
FLAG_EC_PLUGIN_PARITY_DELTA_OPTIMIZATION;
7171

72-
if (technique == "reed_sol_van"sv ||
73-
technique == "default"sv) {
74-
flags |= FLAG_EC_PLUGIN_OPTIMIZED_SUPPORTED;
72+
if (technique == "reed_sol_van"sv) {
73+
flags |= FLAG_EC_PLUGIN_OPTIMIZED_SUPPORTED |
74+
FLAG_EC_PLUGIN_CRC_ENCODE_DECODE_SUPPORT;
75+
} else if (technique == "cauchy"sv && m == 1) {
76+
flags |= FLAG_EC_PLUGIN_CRC_ENCODE_DECODE_SUPPORT;
7577
}
7678
}
7779

@@ -151,15 +153,14 @@ class ErasureCodeIsaDefault : public ErasureCodeIsa {
151153
unsigned char* encode_tbls; // encoding table
152154

153155
ErasureCodeIsaDefault(ErasureCodeIsaTableCache &_tcache,
156+
const std::string& technique,
154157
int matrix = kVandermonde) :
155-
156-
ErasureCodeIsa("default", _tcache),
158+
ErasureCodeIsa(technique, _tcache),
157159
encode_coeff(0), encode_tbls(0)
158160
{
159161
matrixtype = matrix;
160162
}
161163

162-
163164
~ErasureCodeIsaDefault() override
164165
{
165166

src/erasure-code/isa/ErasureCodePluginIsa.cc

Lines changed: 13 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -36,24 +36,22 @@ int ErasureCodePluginIsa::factory(const std::string &directory,
3636
std::ostream *ss)
3737
{
3838
ErasureCodeIsa *interface;
39-
std::string t;
40-
if (profile.find("technique") == profile.end())
41-
profile["technique"] = "reed_sol_van";
42-
t = profile.find("technique")->second;
43-
if ((t == "reed_sol_van")) {
39+
std::string technique;
40+
technique = profile.find("technique")->second;
41+
if ((technique == "reed_sol_van")) {
4442
interface = new ErasureCodeIsaDefault(tcache,
43+
technique,
4544
ErasureCodeIsaDefault::kVandermonde);
45+
} else if ((technique == "cauchy")) {
46+
interface = new ErasureCodeIsaDefault(tcache,
47+
technique,
48+
ErasureCodeIsaDefault::kCauchy);
4649
} else {
47-
if ((t == "cauchy")) {
48-
interface = new ErasureCodeIsaDefault(tcache,
49-
ErasureCodeIsaDefault::kCauchy);
50-
} else {
51-
*ss << "technique=" << t << " is not a valid coding technique. "
52-
<< " Choose one of the following: "
53-
<< "reed_sol_van,"
54-
<< "cauchy" << std::endl;
55-
return -ENOENT;
56-
}
50+
*ss << "technique=" << technique << " is not a valid coding technique. "
51+
<< " Choose one of the following: "
52+
<< "reed_sol_van,"
53+
<< "cauchy" << std::endl;
54+
return -ENOENT;
5755
}
5856

5957
int r = interface->init(profile, ss);

src/erasure-code/jerasure/ErasureCodeJerasure.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,8 @@ class ErasureCodeJerasure : public ceph::ErasureCode {
5454

5555
if (technique == "reed_sol_van"sv) {
5656
flags |= FLAG_EC_PLUGIN_OPTIMIZED_SUPPORTED;
57+
} else if (technique != "cauchy_orig"sv) {
58+
flags |= FLAG_EC_PLUGIN_CRC_ENCODE_DECODE_SUPPORT;
5759
}
5860
}
5961

src/erasure-code/shec/ErasureCodeShec.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -65,7 +65,8 @@ class ErasureCodeShec : public ceph::ErasureCode {
6565
return FLAG_EC_PLUGIN_PARTIAL_READ_OPTIMIZATION |
6666
FLAG_EC_PLUGIN_PARTIAL_WRITE_OPTIMIZATION |
6767
FLAG_EC_PLUGIN_ZERO_INPUT_ZERO_OUTPUT_OPTIMIZATION |
68-
FLAG_EC_PLUGIN_PARITY_DELTA_OPTIMIZATION;
68+
FLAG_EC_PLUGIN_PARITY_DELTA_OPTIMIZATION |
69+
FLAG_EC_PLUGIN_CRC_ENCODE_DECODE_SUPPORT;
6970
}
7071

7172
unsigned int get_chunk_count() const override {

src/osd/ECBackend.cc

Lines changed: 61 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1667,6 +1667,57 @@ void ECBackend::objects_read_async(
16671667
cct)));
16681668
}
16691669

1670+
bool ECBackend::ec_can_decode(const shard_id_set &available_shards) const {
1671+
if (sinfo.supports_sub_chunks()) {
1672+
ceph_abort_msg("Interface does not support subchunks");
1673+
return false;
1674+
}
1675+
1676+
mini_flat_map<shard_id_t, std::vector<std::pair<int, int>>>
1677+
minimum_sub_chunks{ec_impl->get_chunk_count()};
1678+
shard_id_set want_to_read = sinfo.get_all_shards();
1679+
shard_id_set available(available_shards);
1680+
shard_id_set minimum_set;
1681+
1682+
int r = ec_impl->minimum_to_decode(want_to_read, available, minimum_set,
1683+
&minimum_sub_chunks);
1684+
return (r == 0);
1685+
}
1686+
1687+
shard_id_map<bufferlist> ECBackend::ec_encode_acting_set(
1688+
const bufferlist &in_bl) const {
1689+
shard_id_set want_to_encode;
1690+
for (raw_shard_id_t raw_shard_id;raw_shard_id < ec_impl->get_chunk_count();
1691+
++raw_shard_id) {
1692+
want_to_encode.insert(sinfo.get_shard(raw_shard_id));
1693+
}
1694+
shard_id_map<bufferlist> encoded{ec_impl->get_chunk_count()};
1695+
ec_impl->encode(want_to_encode, in_bl, &encoded);
1696+
return encoded;
1697+
}
1698+
1699+
shard_id_map<bufferlist> ECBackend::ec_decode_acting_set(
1700+
const shard_id_map<bufferlist> &shard_map, int chunk_size) const {
1701+
shard_id_set want_to_read;
1702+
for (raw_shard_id_t raw_shard_id; raw_shard_id < ec_impl->get_chunk_count();
1703+
++raw_shard_id) {
1704+
shard_id_t shard_id = sinfo.get_shard(raw_shard_id);
1705+
if (!shard_map.contains(shard_id)) want_to_read.insert(shard_id);
1706+
}
1707+
1708+
shard_id_map<bufferlist> decoded_buffers(ec_impl->get_chunk_count());
1709+
ec_impl->decode(want_to_read, shard_map, &decoded_buffers, chunk_size);
1710+
1711+
shard_id_map<bufferlist> decoded_buffer_map{ec_impl->get_chunk_count()};
1712+
for (auto &[shard_id, bl] : decoded_buffers) {
1713+
decoded_buffer_map[shard_id] = bl;
1714+
}
1715+
1716+
return decoded_buffer_map;
1717+
}
1718+
1719+
ECUtil::stripe_info_t ECBackend::ec_get_sinfo() const { return sinfo; }
1720+
16701721
void ECBackend::objects_read_and_reconstruct(
16711722
const map<hobject_t, std::list<ec_align_t>> &reads,
16721723
bool fast_read,
@@ -1760,7 +1811,16 @@ int ECBackend::be_deep_scrub(
17601811
return -EINPROGRESS;
17611812
}
17621813

1763-
o.digest = 0;
1814+
if (sinfo.supports_encode_decode_crcs()) {
1815+
// We pass the calculated digest here
1816+
// This will be used along with the plugin to verify data consistency
1817+
o.digest = pos.data_hash.digest();
1818+
}
1819+
else
1820+
{
1821+
o.digest = 0;
1822+
}
1823+
17641824
o.digest_present = true;
17651825
o.omap_digest = -1;
17661826
o.omap_digest_present = true;

src/osd/ECBackend.h

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -175,6 +175,12 @@ class ECBackend : public ECCommon {
175175
bool fast_read = false
176176
);
177177

178+
bool ec_can_decode(const shard_id_set &available_shards) const;
179+
shard_id_map<bufferlist> ec_encode_acting_set(const bufferlist &in_bl) const;
180+
shard_id_map<bufferlist> ec_decode_acting_set(
181+
const shard_id_map<bufferlist> &shard_map, int chunk_size) const;
182+
ECUtil::stripe_info_t ec_get_sinfo() const;
183+
178184
private:
179185
friend struct ECRecoveryHandle;
180186

@@ -436,6 +442,10 @@ class ECBackend : public ECCommon {
436442
return sinfo.get_chunk_size();
437443
}
438444

445+
bool get_ec_supports_crc_encode_decode() const {
446+
return sinfo.supports_encode_decode_crcs();
447+
}
448+
439449
uint64_t object_size_to_shard_size(const uint64_t size, shard_id_t shard
440450
) const {
441451
return sinfo.object_size_to_shard_size(size, shard);

0 commit comments

Comments
 (0)