Skip to content

Commit 14a9fc5

Browse files
authored
Merge pull request ceph#63167 from aclamk/aclamk-bs-recomp-improve-is-worth-tentacle
[tentacle] os/bluestore: Do not recompress large compressed blobs Reviewed-by: Ronen Friedman <[email protected]>
2 parents 264f186 + 15bd14c commit 14a9fc5

File tree

3 files changed

+42
-12
lines changed

3 files changed

+42
-12
lines changed

src/os/bluestore/BlueStore.cc

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -17640,6 +17640,7 @@ int BlueStore::_do_write_v2_compressed(
1764017640
o->extent_map.fault_range(db, scan_left, scan_right - scan_left);
1764117641
if (!c->estimator) c->estimator.reset(create_estimator());
1764217642
Estimator* estimator = c->estimator.get();
17643+
estimator->set_wctx(&wctx);
1764317644
Scanner scanner(this);
1764417645
scanner.write_lookaround(o.get(), offset, length, scan_left, scan_right, estimator);
1764517646
std::vector<Estimator::region_t> regions;
@@ -17674,9 +17675,7 @@ int BlueStore::_do_write_v2_compressed(
1767417675
int32_t disk_for_compressed;
1767517676
int32_t disk_for_raw;
1767617677
uint32_t au_size = min_alloc_size;
17677-
uint32_t max_blob_size = c->pool_opts.value_or(
17678-
pool_opts_t::COMPRESSION_MAX_BLOB_SIZE, (int64_t)comp_max_blob_size.load());
17679-
disk_for_compressed = estimator->split_and_compress(wctx.compressor, max_blob_size, data_bl, bd);
17678+
disk_for_compressed = estimator->split_and_compress(data_bl, bd);
1768017679
disk_for_raw = p2roundup(i.offset + i.length, au_size) - p2align(i.offset, au_size);
1768117680
BlueStore::Writer wr(this, txc, &wctx, o);
1768217681
if (disk_for_compressed < disk_for_raw) {

src/os/bluestore/Compression.cc

Lines changed: 32 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -52,23 +52,41 @@ using P = BlueStore::printer;
5252

5353
void Estimator::cleanup()
5454
{
55+
wctx = nullptr;
5556
new_size = 0;
5657
uncompressed_size = 0;
5758
compressed_occupied = 0;
5859
compressed_size = 0;
60+
compressed_area = 0;
5961
total_uncompressed_size = 0;
6062
total_compressed_occupied = 0;
6163
total_compressed_size = 0;
6264
actual_compressed = 0;
6365
actual_compressed_plus_pad = 0;
6466
extra_recompress.clear();
67+
single_compressed_blob = true;
68+
last_blob = nullptr;
6569
}
70+
71+
void Estimator::set_wctx(const WriteContext* wctx)
72+
{
73+
this->wctx = wctx;
74+
}
75+
6676
inline void Estimator::batch(const BlueStore::Extent* e, uint32_t gain)
6777
{
6878
const Blob *h_Blob = &(*e->blob);
6979
const bluestore_blob_t &h_bblob = h_Blob->get_blob();
7080
if (h_bblob.is_compressed()) {
81+
if (last_blob) {
82+
if (h_Blob != last_blob) {
83+
single_compressed_blob = false;
84+
}
85+
} else {
86+
last_blob = h_Blob;
87+
}
7188
compressed_size += e->length * h_bblob.get_compressed_payload_length() / h_bblob.get_logical_length();
89+
compressed_area += e->length;
7290
compressed_occupied += gain;
7391
} else {
7492
uncompressed_size += e->length;
@@ -82,6 +100,16 @@ inline bool Estimator::is_worth()
82100
{
83101
uint32_t cost = uncompressed_size * expected_compression_factor +
84102
compressed_size * expected_recompression_error;
103+
if (uncompressed_size == 0 && single_compressed_blob) {
104+
// The special case if all extents are from compressed blobs.
105+
// We want to avoid the case of recompressing into exactly the same.
106+
// The cost should increase proportionally to blob size;
107+
// the rationale is that recompressing small blob is likely to provide gain,
108+
// but recompressing whole large blob isn't.
109+
uint64_t padding_size = p2nphase<uint64_t>(compressed_size, bluestore->min_alloc_size);
110+
uint32_t split_tax = padding_size * compressed_area / wctx->target_blob_size;
111+
cost += split_tax;
112+
}
85113
uint32_t gain = uncompressed_size + compressed_occupied;
86114
double need_ratio = bluestore->cct->_conf->bluestore_recompression_min_gain;
87115
bool take = gain > cost * need_ratio;
@@ -153,15 +181,13 @@ void Estimator::get_regions(std::vector<region_t>& regions)
153181
}
154182

155183
int32_t Estimator::split_and_compress(
156-
CompressorRef compr,
157-
uint32_t max_blob_size,
158184
ceph::buffer::list& data_bl,
159185
Writer::blob_vec& bd)
160186
{
161187
uint32_t au_size = bluestore->min_alloc_size;
162188
uint32_t size = data_bl.length();
163189
ceph_assert(size > 0);
164-
uint32_t blobs = (size + max_blob_size - 1) / max_blob_size;
190+
uint32_t blobs = (size + wctx->target_blob_size - 1) / wctx->target_blob_size;
165191
uint32_t blob_size = p2roundup(size / blobs, au_size);
166192
std::vector<uint32_t> blob_sizes(blobs);
167193
for (auto& i: blob_sizes) {
@@ -179,10 +205,10 @@ int32_t Estimator::split_and_compress(
179205
// FIXME: memory alignment here is bad
180206
bufferlist t;
181207
std::optional<int32_t> compressor_message;
182-
int r = compr->compress(bd.back().object_data, t, compressor_message);
208+
int r = wctx->compressor->compress(bd.back().object_data, t, compressor_message);
183209
ceph_assert(r == 0);
184210
bluestore_compression_header_t chdr;
185-
chdr.type = compr->get_type();
211+
chdr.type = wctx->compressor->get_type();
186212
chdr.length = t.length();
187213
chdr.compressor_message = compressor_message;
188214
encode(chdr, bd.back().disk_data);
@@ -872,7 +898,7 @@ void Scan::on_write_start(
872898
}
873899
if (left_it != extent_map->extent_map.begin()) {
874900
--left_it; // left_walk points to processes extent
875-
if (limit_left < left_it->logical_offset) {
901+
if (limit_left <= left_it->logical_offset) {
876902
dout(30) << "left maybe expand" << dendl;
877903
has_expanded |= maybe_expand_scan_range(left_it, left, right);
878904
}

src/os/bluestore/Compression.h

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,9 @@ class BlueStore::Estimator {
2222
Estimator(BlueStore* bluestore)
2323
:bluestore(bluestore) {}
2424

25+
// Each estimator run needs specific WriteContext
26+
void set_wctx(const WriteContext* wctx);
27+
2528
// Inform estimator that an extent is a candidate for recompression.
2629
// Estimator has to calculate (guess) the cost (size) of the referenced data.
2730
// 'gain' is the size that will be released should extent be recompressed.
@@ -45,8 +48,6 @@ class BlueStore::Estimator {
4548
void get_regions(std::vector<region_t>& regions);
4649

4750
int32_t split_and_compress(
48-
CompressorRef compr,
49-
uint32_t max_blob_size,
5051
ceph::buffer::list& data_bl,
5152
Writer::blob_vec& bd);
5253

@@ -57,9 +58,11 @@ class BlueStore::Estimator {
5758
double expected_compression_factor = 0.5;
5859
double expected_recompression_error = 1.1;
5960
double expected_pad_expansion = 1.1;
61+
const WriteContext* wctx = nullptr;
6062
uint32_t new_size = 0; // fresh data to write
6163
uint32_t uncompressed_size = 0; // data that was not compressed
62-
uint32_t compressed_size = 0; // data of compressed size
64+
uint32_t compressed_size = 0; // estimated size of compressed data
65+
uint32_t compressed_area = 0; // area that is compressed
6366
uint32_t compressed_occupied = 0; // disk size that will be freed
6467
uint32_t total_uncompressed_size = 0;
6568
uint32_t total_compressed_size = 0;
@@ -68,6 +71,8 @@ class BlueStore::Estimator {
6871
uint32_t actual_compressed = 0;
6972
uint32_t actual_compressed_plus_pad = 0;
7073
std::map<uint32_t, uint32_t> extra_recompress;
74+
bool single_compressed_blob = true;
75+
const Blob* last_blob = nullptr;
7176
// Prepare for new write
7277
void cleanup();
7378
};

0 commit comments

Comments
 (0)