Skip to content

Commit 368a624

Browse files
authored
Merge pull request ceph#64502 from aainscow/ec_reduce_rebalance_reads
osd: Reduce reads when rebalancing healthy Erasure Coded PGs Reviewed-by: Bill Scales <[email protected]>
2 parents 057888b + 52915db commit 368a624

File tree

3 files changed

+90
-1
lines changed

3 files changed

+90
-1
lines changed

src/common/bitset_set.h

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -287,6 +287,29 @@ class bitset_set {
287287
return end();
288288
}
289289

290+
/** @return a const_iterator to the nth key or end if it does not exist.
291+
*
292+
* This is called "find_nth" rather an overloading find, as its clearer
293+
* what it is doing find(4) may imply "find(Key(4))"
294+
*/
295+
const_iterator find_nth(unsigned int n) const {
296+
for (size_t i = 0; i < word_count; ++i) {
297+
unsigned int bits_set = std::popcount(words[i]);
298+
if (bits_set > n) {
299+
uint64_t tmp = words[i];
300+
// This could be optimised with BMI _pdep_u64
301+
for (unsigned int j = 0; j < n; ++j) {
302+
// This clears the least significant bit that is set to 1.
303+
tmp &= tmp - 1;
304+
}
305+
return const_iterator(this,
306+
std::countr_zero(tmp) + i * bits_per_uint64_t);
307+
}
308+
n -= bits_set;
309+
}
310+
return end();
311+
}
312+
290313
/** @return number of keys in the container. O(1) complexity on most
291314
* modern CPUs.
292315
*/

src/osd/ECCommon.cc

Lines changed: 16 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -220,8 +220,23 @@ int ECCommon::ReadPipeline::get_min_avail_to_read_shards(
220220

221221
read_request.shard_want_to_read.populate_shard_id_set(want);
222222

223-
int r = ec_impl->minimum_to_decode(want, have, need_set,
223+
int r = 0;
224+
auto kth_iter = want.find_nth(sinfo.get_k());
225+
if (kth_iter != want.end()) {
226+
// If we support partial reads, we are making the assumption that only
227+
// K shards need to be read to recover data. We opt here for minimising
228+
// the number of reads over minimising the amount of parity calculations
229+
// that are needed.
230+
shard_id_set want_for_plugin = want;
231+
shard_id_t kth = *kth_iter;
232+
want_for_plugin.erase_range(kth, sinfo.get_k_plus_m() - (int)kth);
233+
r = ec_impl->minimum_to_decode(want_for_plugin, have, need_set,
224234
need_sub_chunks.get());
235+
} else {
236+
r = ec_impl->minimum_to_decode(want, have, need_set,
237+
need_sub_chunks.get());
238+
}
239+
225240
if (r < 0) {
226241
dout(20) << "minimum_to_decode_failed r: " << r << "want: " << want
227242
<< " have: " << have << " need: " << need_set << dendl;

src/test/common/test_bitset_set.cc

Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -211,3 +211,54 @@ TEST(bitset_set, fmt_formatting) {
211211
oss << bitset;
212212
EXPECT_EQ(using_fmt, oss.str());
213213
}
214+
215+
TEST(bitset_set, find_nth) {
216+
constexpr size_t range = 128;
217+
bitset_set<range, Key> bitset;
218+
219+
ASSERT_EQ(bitset.end(), bitset.find_nth(0) );
220+
ASSERT_EQ(bitset.end(), bitset.find_nth(1) );
221+
ASSERT_EQ(bitset.end(), bitset.find_nth(range) );
222+
223+
bitset.insert(0);
224+
ASSERT_EQ(Key(0), *bitset.find_nth(0) );
225+
ASSERT_EQ(bitset.end(), bitset.find_nth(1) );
226+
ASSERT_EQ(bitset.end(), bitset.find_nth(range) );
227+
228+
// Single bit set
229+
for (unsigned int i = 0; i < range; i++) {
230+
bitset.clear();
231+
bitset.insert(i);
232+
ASSERT_EQ(Key(i), *bitset.find_nth(0) );
233+
ASSERT_EQ(bitset.end(), bitset.find_nth(1) );
234+
ASSERT_EQ(bitset.end(), bitset.find_nth(range) );
235+
}
236+
237+
/* Alt bits set */
238+
bitset.clear();
239+
for (unsigned int i = 0; i < range; i += 2) {
240+
bitset.insert(i);
241+
}
242+
for (unsigned int i = 0; i < range / 2; i++) {
243+
ASSERT_EQ(Key(i * 2), *bitset.find_nth(i) );
244+
}
245+
ASSERT_EQ(bitset.end(), bitset.find_nth(range / 2) );
246+
247+
/* Other alt bits set */
248+
bitset.clear();
249+
for (unsigned int i = 1; i < range; i += 2) {
250+
bitset.insert(i);
251+
}
252+
for (unsigned int i = 0; i < range / 2; i++) {
253+
ASSERT_EQ(Key(i * 2 + 1), *bitset.find_nth(i) );
254+
}
255+
ASSERT_EQ(bitset.end(), bitset.find_nth(range / 2) );
256+
257+
/* All bits set */
258+
bitset.clear();
259+
bitset.insert_range(Key(0), range);
260+
for (unsigned int i = 0; i < range; i++) {
261+
ASSERT_EQ(Key(i), *bitset.find_nth(i) );
262+
}
263+
ASSERT_EQ(bitset.end(), bitset.find_nth(range) );
264+
}

0 commit comments

Comments
 (0)