Skip to content

Commit 15bd14c

Browse files
committed
os/bluestore/recompression: Estimator omits large compressed blobs
The problem was that Estimator accepted large compressed blobs for recompression. The fix is to discourage such actions by penalizing compressed blobs based on their size. In effect small compressed blob is likely to be recompressed, and large compressed blob will not. Fixes: https://tracker.ceph.com/issues/71244 Signed-off-by: Adam Kupczyk <[email protected]> (cherry picked from commit bbc9e96)
1 parent 6adc172 commit 15bd14c

File tree

3 files changed

+41
-11
lines changed

3 files changed

+41
-11
lines changed

src/os/bluestore/BlueStore.cc

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -17640,6 +17640,7 @@ int BlueStore::_do_write_v2_compressed(
1764017640
o->extent_map.fault_range(db, scan_left, scan_right - scan_left);
1764117641
if (!c->estimator) c->estimator.reset(create_estimator());
1764217642
Estimator* estimator = c->estimator.get();
17643+
estimator->set_wctx(&wctx);
1764317644
Scanner scanner(this);
1764417645
scanner.write_lookaround(o.get(), offset, length, scan_left, scan_right, estimator);
1764517646
std::vector<Estimator::region_t> regions;
@@ -17674,9 +17675,7 @@ int BlueStore::_do_write_v2_compressed(
1767417675
int32_t disk_for_compressed;
1767517676
int32_t disk_for_raw;
1767617677
uint32_t au_size = min_alloc_size;
17677-
uint32_t max_blob_size = c->pool_opts.value_or(
17678-
pool_opts_t::COMPRESSION_MAX_BLOB_SIZE, (int64_t)comp_max_blob_size.load());
17679-
disk_for_compressed = estimator->split_and_compress(wctx.compressor, max_blob_size, data_bl, bd);
17678+
disk_for_compressed = estimator->split_and_compress(data_bl, bd);
1768017679
disk_for_raw = p2roundup(i.offset + i.length, au_size) - p2align(i.offset, au_size);
1768117680
BlueStore::Writer wr(this, txc, &wctx, o);
1768217681
if (disk_for_compressed < disk_for_raw) {

src/os/bluestore/Compression.cc

Lines changed: 31 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -52,23 +52,41 @@ using P = BlueStore::printer;
5252

5353
void Estimator::cleanup()
5454
{
55+
wctx = nullptr;
5556
new_size = 0;
5657
uncompressed_size = 0;
5758
compressed_occupied = 0;
5859
compressed_size = 0;
60+
compressed_area = 0;
5961
total_uncompressed_size = 0;
6062
total_compressed_occupied = 0;
6163
total_compressed_size = 0;
6264
actual_compressed = 0;
6365
actual_compressed_plus_pad = 0;
6466
extra_recompress.clear();
67+
single_compressed_blob = true;
68+
last_blob = nullptr;
6569
}
70+
71+
void Estimator::set_wctx(const WriteContext* wctx)
72+
{
73+
this->wctx = wctx;
74+
}
75+
6676
inline void Estimator::batch(const BlueStore::Extent* e, uint32_t gain)
6777
{
6878
const Blob *h_Blob = &(*e->blob);
6979
const bluestore_blob_t &h_bblob = h_Blob->get_blob();
7080
if (h_bblob.is_compressed()) {
81+
if (last_blob) {
82+
if (h_Blob != last_blob) {
83+
single_compressed_blob = false;
84+
}
85+
} else {
86+
last_blob = h_Blob;
87+
}
7188
compressed_size += e->length * h_bblob.get_compressed_payload_length() / h_bblob.get_logical_length();
89+
compressed_area += e->length;
7290
compressed_occupied += gain;
7391
} else {
7492
uncompressed_size += e->length;
@@ -82,6 +100,16 @@ inline bool Estimator::is_worth()
82100
{
83101
uint32_t cost = uncompressed_size * expected_compression_factor +
84102
compressed_size * expected_recompression_error;
103+
if (uncompressed_size == 0 && single_compressed_blob) {
104+
// The special case if all extents are from compressed blobs.
105+
// We want to avoid the case of recompressing into exactly the same.
106+
// The cost should increase proportionally to blob size;
107+
// the rationale is that recompressing small blob is likely to provide gain,
108+
// but recompressing whole large blob isn't.
109+
uint64_t padding_size = p2nphase<uint64_t>(compressed_size, bluestore->min_alloc_size);
110+
uint32_t split_tax = padding_size * compressed_area / wctx->target_blob_size;
111+
cost += split_tax;
112+
}
85113
uint32_t gain = uncompressed_size + compressed_occupied;
86114
double need_ratio = bluestore->cct->_conf->bluestore_recompression_min_gain;
87115
bool take = gain > cost * need_ratio;
@@ -153,15 +181,13 @@ void Estimator::get_regions(std::vector<region_t>& regions)
153181
}
154182

155183
int32_t Estimator::split_and_compress(
156-
CompressorRef compr,
157-
uint32_t max_blob_size,
158184
ceph::buffer::list& data_bl,
159185
Writer::blob_vec& bd)
160186
{
161187
uint32_t au_size = bluestore->min_alloc_size;
162188
uint32_t size = data_bl.length();
163189
ceph_assert(size > 0);
164-
uint32_t blobs = (size + max_blob_size - 1) / max_blob_size;
190+
uint32_t blobs = (size + wctx->target_blob_size - 1) / wctx->target_blob_size;
165191
uint32_t blob_size = p2roundup(size / blobs, au_size);
166192
std::vector<uint32_t> blob_sizes(blobs);
167193
for (auto& i: blob_sizes) {
@@ -179,10 +205,10 @@ int32_t Estimator::split_and_compress(
179205
// FIXME: memory alignment here is bad
180206
bufferlist t;
181207
std::optional<int32_t> compressor_message;
182-
int r = compr->compress(bd.back().object_data, t, compressor_message);
208+
int r = wctx->compressor->compress(bd.back().object_data, t, compressor_message);
183209
ceph_assert(r == 0);
184210
bluestore_compression_header_t chdr;
185-
chdr.type = compr->get_type();
211+
chdr.type = wctx->compressor->get_type();
186212
chdr.length = t.length();
187213
chdr.compressor_message = compressor_message;
188214
encode(chdr, bd.back().disk_data);

src/os/bluestore/Compression.h

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,9 @@ class BlueStore::Estimator {
2222
Estimator(BlueStore* bluestore)
2323
:bluestore(bluestore) {}
2424

25+
// Each estimator run needs specific WriteContext
26+
void set_wctx(const WriteContext* wctx);
27+
2528
// Inform estimator that an extent is a candidate for recompression.
2629
// Estimator has to calculate (guess) the cost (size) of the referenced data.
2730
// 'gain' is the size that will be released should extent be recompressed.
@@ -45,8 +48,6 @@ class BlueStore::Estimator {
4548
void get_regions(std::vector<region_t>& regions);
4649

4750
int32_t split_and_compress(
48-
CompressorRef compr,
49-
uint32_t max_blob_size,
5051
ceph::buffer::list& data_bl,
5152
Writer::blob_vec& bd);
5253

@@ -57,9 +58,11 @@ class BlueStore::Estimator {
5758
double expected_compression_factor = 0.5;
5859
double expected_recompression_error = 1.1;
5960
double expected_pad_expansion = 1.1;
61+
const WriteContext* wctx = nullptr;
6062
uint32_t new_size = 0; // fresh data to write
6163
uint32_t uncompressed_size = 0; // data that was not compressed
62-
uint32_t compressed_size = 0; // data of compressed size
64+
uint32_t compressed_size = 0; // estimated size of compressed data
65+
uint32_t compressed_area = 0; // area that is compressed
6366
uint32_t compressed_occupied = 0; // disk size that will be freed
6467
uint32_t total_uncompressed_size = 0;
6568
uint32_t total_compressed_size = 0;
@@ -68,6 +71,8 @@ class BlueStore::Estimator {
6871
uint32_t actual_compressed = 0;
6972
uint32_t actual_compressed_plus_pad = 0;
7073
std::map<uint32_t, uint32_t> extra_recompress;
74+
bool single_compressed_blob = true;
75+
const Blob* last_blob = nullptr;
7176
// Prepare for new write
7277
void cleanup();
7378
};

0 commit comments

Comments
 (0)