Skip to content

Commit f5e8d11

Browse files
authored
Merge pull request ceph#52489 from ifed01/wip-ifed-alloc2
os/bluestore: introduce hybrid_btree2 allocator
2 parents 4fc168b + cf36ba9 commit f5e8d11

21 files changed

+2214
-345
lines changed

src/common/options/global.yaml.in

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4213,6 +4213,7 @@ options:
42134213
- avl
42144214
- btree
42154215
- hybrid
4216+
- hybrid_btree2
42164217
with_legacy: true
42174218
- name: bluefs_log_replay_check_allocations
42184219
type: bool
@@ -5053,6 +5054,7 @@ options:
50535054
- avl
50545055
- btree
50555056
- hybrid
5057+
- hybrid_btree2
50565058
with_legacy: true
50575059
- name: bluestore_freelist_blocks_per_key
50585060
type: size
@@ -5561,6 +5563,11 @@ options:
55615563
level: dev
55625564
desc: Maximum RAM hybrid allocator should use before enabling bitmap supplement
55635565
default: 64_M
5566+
- name: bluestore_btree2_alloc_weight_factor
5567+
type: float
5568+
level: dev
5569+
desc: Large continuous extents weight factor
5570+
default: 2
55645571
- name: bluestore_volume_selection_policy
55655572
type: str
55665573
level: dev

src/crimson/os/alienstore/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,7 @@ set(alien_store_srcs
5050
${PROJECT_SOURCE_DIR}/src/os/bluestore/Allocator.cc
5151
${PROJECT_SOURCE_DIR}/src/os/bluestore/AvlAllocator.cc
5252
${PROJECT_SOURCE_DIR}/src/os/bluestore/BtreeAllocator.cc
53+
${PROJECT_SOURCE_DIR}/src/os/bluestore/Btree2Allocator.cc
5354
${PROJECT_SOURCE_DIR}/src/os/bluestore/BitmapFreelistManager.cc
5455
${PROJECT_SOURCE_DIR}/src/os/bluestore/BlueFS.cc
5556
${PROJECT_SOURCE_DIR}/src/os/bluestore/bluefs_types.cc

src/include/intarith.h

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,18 @@ constexpr inline T p2align(T x, T align) {
5757
return x & -align;
5858
}
5959

60+
/*
61+
* return whether x is aligned with (align)
62+
* eg, p2aligned(1200, 1024) ==> false
63+
* eg, p2aligned(1024, 1024) ==> true
64+
* eg, p2aligned(0x1234, 0x100) ==> false
65+
* eg, p2aligned(0x5600, 0x100) ==> true
66+
*/
67+
template<typename T>
68+
constexpr inline bool p2aligned(T x, T align) {
69+
return p2align(x, align) == x;
70+
}
71+
6072
/*
6173
* return x % (mod) align
6274
* eg, p2phase(0x1234, 0x100) == 0x34 (x-0x12*align)

src/os/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@ if(WITH_BLUESTORE)
2323
bluestore/BitmapAllocator.cc
2424
bluestore/AvlAllocator.cc
2525
bluestore/BtreeAllocator.cc
26+
bluestore/Btree2Allocator.cc
2627
bluestore/HybridAllocator.cc
2728
)
2829
endif(WITH_BLUESTORE)

src/os/bluestore/Allocator.cc

Lines changed: 59 additions & 60 deletions
Original file line numberDiff line numberDiff line change
@@ -7,9 +7,11 @@
77
#include "BitmapAllocator.h"
88
#include "AvlAllocator.h"
99
#include "BtreeAllocator.h"
10+
#include "Btree2Allocator.h"
1011
#include "HybridAllocator.h"
1112
#include "common/debug.h"
1213
#include "common/admin_socket.h"
14+
1315
#define dout_subsys ceph_subsys_bluestore
1416
using TOPNSPC::common::cmd_getval;
1517

@@ -108,7 +110,7 @@ class Allocator::SocketHook : public AdminSocketHook {
108110
} else if (command == "bluestore allocator fragmentation histogram " + name) {
109111
int64_t alloc_unit = 4096;
110112
cmd_getval(cmdmap, "alloc_unit", alloc_unit);
111-
if (alloc_unit == 0 ||
113+
if (alloc_unit <= 0 ||
112114
p2align(alloc_unit, alloc->get_block_size()) != alloc_unit) {
113115
ss << "Invalid allocation unit: '" << alloc_unit
114116
<< ", to be aligned with: '" << alloc->get_block_size()
@@ -123,20 +125,22 @@ class Allocator::SocketHook : public AdminSocketHook {
123125
return -EINVAL;
124126
}
125127

126-
Allocator::FreeStateHistogram hist;
127-
hist.resize(num_buckets);
128-
alloc->build_free_state_histogram(alloc_unit, hist);
128+
Allocator::FreeStateHistogram hist(num_buckets);
129+
alloc->foreach(
130+
[&](size_t off, size_t len) {
131+
hist.record_extent(uint64_t(alloc_unit), off, len);
132+
});
129133
f->open_array_section("extent_counts");
130-
for(int i = 0; i < num_buckets; i++) {
131-
f->open_object_section("c");
132-
f->dump_unsigned("max_len",
133-
hist[i].get_max(i, num_buckets)
134-
);
135-
f->dump_unsigned("total", hist[i].total);
136-
f->dump_unsigned("aligned", hist[i].aligned);
137-
f->dump_unsigned("units", hist[i].alloc_units);
138-
f->close_section();
139-
}
134+
hist.foreach(
135+
[&](uint64_t max_len, uint64_t total, uint64_t aligned, uint64_t units) {
136+
f->open_object_section("c");
137+
f->dump_unsigned("max_len", max_len);
138+
f->dump_unsigned("total", total);
139+
f->dump_unsigned("aligned", aligned);
140+
f->dump_unsigned("units", units);
141+
f->close_section();
142+
}
143+
);
140144
f->close_section();
141145
} else {
142146
ss << "Invalid command" << std::endl;
@@ -182,8 +186,13 @@ Allocator *Allocator::create(
182186
} else if (type == "btree") {
183187
return new BtreeAllocator(cct, size, block_size, name);
184188
} else if (type == "hybrid") {
185-
return new HybridAllocator(cct, size, block_size,
189+
return new HybridAvlAllocator(cct, size, block_size,
190+
cct->_conf.get_val<uint64_t>("bluestore_hybrid_alloc_mem_cap"),
191+
name);
192+
} else if (type == "hybrid_btree2") {
193+
return new HybridBtree2Allocator(cct, size, block_size,
186194
cct->_conf.get_val<uint64_t>("bluestore_hybrid_alloc_mem_cap"),
195+
cct->_conf.get_val<double>("bluestore_btree2_alloc_weight_factor"),
187196
name);
188197
}
189198
if (alloc == nullptr) {
@@ -195,7 +204,7 @@ Allocator *Allocator::create(
195204

196205
void Allocator::release(const PExtentVector& release_vec)
197206
{
198-
interval_set<uint64_t> release_set;
207+
release_set_t release_set;
199208
for (auto e : release_vec) {
200209
release_set.insert(e.offset, e.length);
201210
}
@@ -266,50 +275,40 @@ double Allocator::get_fragmentation_score()
266275
return (ideal - score_sum) / (ideal - terrible);
267276
}
268277

269-
void Allocator::build_free_state_histogram(
270-
size_t alloc_unit, Allocator::FreeStateHistogram& hist)
271-
{
272-
auto num_buckets = hist.size();
273-
ceph_assert(num_buckets);
274-
275-
auto base = free_state_hist_bucket::base;
276-
auto base_bits = free_state_hist_bucket::base_bits;
277-
auto mux = free_state_hist_bucket::mux;
278-
// maximum chunk size we track,
279-
// provided by the bucket before the last one
280-
size_t max =
281-
free_state_hist_bucket::get_max(num_buckets - 2, num_buckets);
282-
283-
auto iterated_allocation = [&](size_t off, size_t len) {
284-
size_t idx;
285-
if (len <= base) {
286-
idx = 0;
287-
} else if (len > max) {
288-
idx = num_buckets - 1;
289-
} else {
290-
size_t most_bit = cbits(uint64_t(len-1)) - 1;
291-
idx = 1 + ((most_bit - base_bits) / mux);
292-
}
293-
ceph_assert(idx < num_buckets);
294-
++hist[idx].total;
278+
/*************
279+
* Allocator::FreeStateHistogram
280+
*************/
281+
using std::function;
295282

296-
// now calculate the bucket for the chunk after alignment,
297-
// resulting chunks shorter than alloc_unit are discarded
298-
auto delta = p2roundup(off, alloc_unit) - off;
299-
if (len >= delta + alloc_unit) {
300-
len -= delta;
301-
if (len <= base) {
302-
idx = 0;
303-
} else if (len > max) {
304-
idx = num_buckets - 1;
305-
} else {
306-
size_t most_bit = cbits(uint64_t(len-1)) - 1;
307-
idx = 1 + ((most_bit - base_bits) / mux);
308-
}
309-
++hist[idx].aligned;
310-
hist[idx].alloc_units += len / alloc_unit;
311-
}
312-
};
283+
void Allocator::FreeStateHistogram::record_extent(uint64_t alloc_unit,
284+
uint64_t off,
285+
uint64_t len)
286+
{
287+
size_t idx = myTraits._get_bucket(len);
288+
ceph_assert(idx < buckets.size());
289+
++buckets[idx].total;
313290

314-
foreach(iterated_allocation);
291+
// now calculate the bucket for the chunk after alignment,
292+
// resulting chunks shorter than alloc_unit are discarded
293+
auto delta = p2roundup(off, alloc_unit) - off;
294+
if (len >= delta + alloc_unit) {
295+
len -= delta;
296+
idx = myTraits._get_bucket(len);
297+
ceph_assert(idx < buckets.size());
298+
++buckets[idx].aligned;
299+
buckets[idx].alloc_units += len / alloc_unit;
300+
}
301+
}
302+
void Allocator::FreeStateHistogram::foreach(
303+
function<void(uint64_t max_len,
304+
uint64_t total,
305+
uint64_t aligned,
306+
uint64_t unit)> cb)
307+
{
308+
size_t i = 0;
309+
for (const auto& b : buckets) {
310+
cb(myTraits._get_bucket_max(i),
311+
b.total, b.aligned, b.alloc_units);
312+
++i;
313+
}
315314
}

0 commit comments

Comments
 (0)