Skip to content

Commit 01cf6db

Browse files
Igor Fedotovifed01
authored andcommitted
os/bluestore: introduce allocator lookup policy
This allows to have different free space lookup approaches for ssd and hdd drives. Signed-off-by: Igor Fedotov <[email protected]>
1 parent 71ce487 commit 01cf6db

17 files changed

+261
-82
lines changed

src/common/options/global.yaml.in

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5646,6 +5646,23 @@ options:
56465646
- hdd
56475647
- ssd
56485648
with_legacy: true
5649+
- name: bluestore_allocator_lookup_policy
5650+
type: str
5651+
level: advanced
5652+
desc: Determines how to perform the next free extent lookup.
5653+
long_desc: When set to 'hdd_optimized' the allocator searches from the last
5654+
location found. This may facilitate contiguous disk writes. It may similarly
5655+
be beneficial for large-IU QLC SSDs to enable firmware coalescing of sub-IU
5656+
writes.
5657+
When set to 'ssd-optimized' the allocator will search from the beginning of
5658+
the device. This may facilitate SSD firmware housekeeping.
5659+
When set to 'auto' the value will be derived from the detected device type
5660+
(rotational or non-rotational).
5661+
default: auto
5662+
enum_values:
5663+
- hdd_optimized
5664+
- ssd_optimized
5665+
- auto
56495666
- name: bluestore_avl_alloc_ff_max_search_count
56505667
type: uint
56515668
level: dev

src/os/bluestore/AvlAllocator.cc

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -239,14 +239,14 @@ int64_t AvlAllocator::_allocate(
239239
uint64_t want,
240240
uint64_t unit,
241241
uint64_t max_alloc_size,
242-
int64_t hint, // unused, for now!
242+
int64_t hint,
243243
PExtentVector* extents)
244244
{
245245
uint64_t allocated = 0;
246246
while (allocated < want) {
247247
uint64_t offset, length;
248248
int r = _allocate(std::min(max_alloc_size, want - allocated),
249-
unit, &offset, &length);
249+
unit, hint, &offset, &length);
250250
if (r < 0) {
251251
// Allocation failed.
252252
break;
@@ -260,6 +260,7 @@ int64_t AvlAllocator::_allocate(
260260
int AvlAllocator::_allocate(
261261
uint64_t size,
262262
uint64_t unit,
263+
int64_t hint,
263264
uint64_t *offset,
264265
uint64_t *length)
265266
{
@@ -296,7 +297,9 @@ int AvlAllocator::_allocate(
296297
*/
297298
uint64_t align = size & -size;
298299
ceph_assert(align != 0);
299-
uint64_t* cursor = &lbas[cbits(align) - 1];
300+
uint64_t dummy_cursor = (uint64_t)hint;
301+
uint64_t* cursor =
302+
hint == -1 ? &lbas[cbits(align) - 1] : &dummy_cursor;
300303
start = _pick_block_after(cursor, size, unit);
301304
dout(20) << __func__
302305
<< std::hex << " first fit params: 0x" << start << "~" << size
@@ -399,7 +402,7 @@ int64_t AvlAllocator::allocate(
399402
uint64_t want,
400403
uint64_t unit,
401404
uint64_t max_alloc_size,
402-
int64_t hint, // unused, for now!
405+
int64_t hint,
403406
PExtentVector* extents)
404407
{
405408
ldout(cct, 10) << __func__ << std::hex

src/os/bluestore/AvlAllocator.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -109,6 +109,7 @@ class AvlAllocator : public AllocatorBase {
109109
int _allocate(
110110
uint64_t size,
111111
uint64_t unit,
112+
int64_t hint,
112113
uint64_t *offset,
113114
uint64_t *length);
114115

src/os/bluestore/BlueFS.cc

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4387,7 +4387,7 @@ int BlueFS::_allocate(uint8_t id, uint64_t len,
43874387
ceph_assert(id < alloc.size());
43884388
int64_t alloc_len = 0;
43894389
PExtentVector extents;
4390-
uint64_t hint = 0;
4390+
int64_t hint = -1;
43914391
int64_t need = len;
43924392
bool shared = is_shared_alloc(id);
43934393
auto shared_unit = shared_alloc ? shared_alloc->alloc_unit : 0;
@@ -4414,7 +4414,7 @@ int BlueFS::_allocate(uint8_t id, uint64_t len,
44144414
need = round_up_to(len, alloc_unit);
44154415
if (!node->extents.empty() && node->extents.back().bdev == id) {
44164416
hint = node->extents.back().end();
4417-
}
4417+
}
44184418
++alloc_attempts;
44194419
extents.reserve(4); // 4 should be (more than) enough for most allocations
44204420
auto t0 = mono_clock::now();

src/os/bluestore/BlueStore.cc

Lines changed: 28 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -5742,7 +5742,8 @@ std::vector<std::string> BlueStore::get_tracked_keys() const noexcept
57425742
"bluestore_warn_on_no_per_pool_omap"s,
57435743
"bluestore_warn_on_no_per_pg_omap"s,
57445744
"bluestore_max_defer_interval"s,
5745-
"bluestore_onode_segment_size"s
5745+
"bluestore_onode_segment_size"s,
5746+
"bluestore_allocator_lookup_policy"s
57465747
};
57475748
}
57485749

@@ -5814,6 +5815,9 @@ void BlueStore::handle_conf_change(const ConfigProxy& conf,
58145815
changed.count("osd_memory_expected_fragmentation")) {
58155816
_update_osd_memory_options();
58165817
}
5818+
if (changed.count("bluestore_allocator_lookup_policy")) {
5819+
_update_allocator_lookup_policy();
5820+
}
58175821
}
58185822

58195823
void BlueStore::_set_compression()
@@ -5948,6 +5952,24 @@ void BlueStore::_update_osd_memory_options()
59485952
<< dendl;
59495953
}
59505954

5955+
5956+
void BlueStore::_update_allocator_lookup_policy()
5957+
{
5958+
auto policy = cct->_conf.get_val<string>("bluestore_allocator_lookup_policy");
5959+
if (policy == "hdd_optimized") {
5960+
use_last_allocator_lookup_position = true;
5961+
} else if (policy == "ssd_optimized") {
5962+
use_last_allocator_lookup_position = false;
5963+
} else {
5964+
// Apply "auto" policy for everything else.
5965+
// Which means reusing last lookup position for hdds.
5966+
use_last_allocator_lookup_position = _use_rotational_settings();
5967+
}
5968+
dout(5) << __func__
5969+
<< " use_last_lookup_position " << use_last_allocator_lookup_position
5970+
<< dendl;
5971+
}
5972+
59515973
int BlueStore::_set_cache_sizes()
59525974
{
59535975
ceph_assert(bdev);
@@ -11240,7 +11262,7 @@ int BlueStore::_fsck_on_open(BlueStore::FSCKDepth depth, bool repair)
1124011262
dout(5) << __func__ << "::NCB::(F)alloc=" << alloc << ", length=" << e->length << dendl;
1124111263
int64_t alloc_len =
1124211264
alloc->allocate(e->length, min_alloc_size,
11243-
0, 0, &exts);
11265+
0, -1, &exts);
1124411266
if (alloc_len < 0 || alloc_len < (int64_t)e->length) {
1124511267
derr << __func__
1124611268
<< " failed to allocate 0x" << std::hex << e->length
@@ -11694,7 +11716,7 @@ void BlueStore::inject_leaked(uint64_t len)
1169411716
{
1169511717
PExtentVector exts;
1169611718
int64_t alloc_len = alloc->allocate(len, min_alloc_size,
11697-
min_alloc_size * 256, 0, &exts);
11719+
min_alloc_size * 256, -1, &exts);
1169811720
ceph_assert(alloc_len >= 0); // generally we do not expect any errors
1169911721
if (fm->is_null_manager()) {
1170011722
return;
@@ -14087,6 +14109,7 @@ int BlueStore::_open_super_meta()
1408714109
_set_csum();
1408814110
_set_compression();
1408914111
_set_blob_size();
14112+
_update_allocator_lookup_policy();
1409014113

1409114114
_validate_bdev();
1409214115
return 0;
@@ -17019,7 +17042,8 @@ int BlueStore::_do_alloc_write(
1701917042
auto start = mono_clock::now();
1702017043
prealloc_left = alloc->allocate(
1702117044
need, min_alloc_size, need,
17022-
0, &prealloc);
17045+
use_last_allocator_lookup_position ? -1 : 0,
17046+
&prealloc);
1702317047
log_latency("allocator@_do_alloc_write",
1702417048
l_bluestore_allocator_lat,
1702517049
mono_clock::now() - start,

src/os/bluestore/BlueStore.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2536,6 +2536,8 @@ class BlueStore : public ObjectStore,
25362536
friend class SocketHook;
25372537
AdminSocketHook* asok_hook = nullptr;
25382538

2539+
bool use_last_allocator_lookup_position = true;
2540+
25392541
struct MempoolThread : public Thread {
25402542
public:
25412543
BlueStore *store;
@@ -2793,6 +2795,7 @@ class BlueStore : public ObjectStore,
27932795
void _set_finisher_num();
27942796
void _set_per_pool_omap();
27952797
void _update_osd_memory_options();
2798+
void _update_allocator_lookup_policy();
27962799

27972800
int _open_bdev(bool create);
27982801
// Verifies if disk space is enough for reserved + min bluefs

src/os/bluestore/Btree2Allocator.cc

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -65,7 +65,7 @@ int64_t Btree2Allocator::allocate(
6565
uint64_t want,
6666
uint64_t unit,
6767
uint64_t max_alloc_size,
68-
int64_t hint, // unused, for now!
68+
int64_t hint, // unused and likely unneeded
6969
PExtentVector* extents)
7070
{
7171
ldout(cct, 10) << __func__ << std::hex
@@ -182,7 +182,7 @@ int64_t Btree2Allocator::_allocate(
182182
uint64_t want,
183183
uint64_t unit,
184184
uint64_t max_alloc_size,
185-
int64_t hint, // unused, for now!
185+
int64_t hint, // unused and likely unneeded
186186
PExtentVector* extents)
187187
{
188188
uint64_t allocated = 0;

src/os/bluestore/BtreeAllocator.cc

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -226,14 +226,14 @@ int64_t BtreeAllocator::_allocate(
226226
uint64_t want,
227227
uint64_t unit,
228228
uint64_t max_alloc_size,
229-
int64_t hint, // unused, for now!
229+
int64_t hint,
230230
PExtentVector* extents)
231231
{
232232
uint64_t allocated = 0;
233233
while (allocated < want) {
234234
uint64_t offset, length;
235235
int r = _allocate(std::min(max_alloc_size, want - allocated),
236-
unit, &offset, &length);
236+
unit, hint, &offset, &length);
237237
if (r < 0) {
238238
// Allocation failed.
239239
break;
@@ -248,6 +248,7 @@ int64_t BtreeAllocator::_allocate(
248248
int BtreeAllocator::_allocate(
249249
uint64_t size,
250250
uint64_t unit,
251+
int64_t hint,
251252
uint64_t *offset,
252253
uint64_t *length)
253254
{
@@ -294,7 +295,8 @@ int BtreeAllocator::_allocate(
294295
* not guarantee that other allocations sizes may exist in the same
295296
* region.
296297
*/
297-
uint64_t* cursor = &lbas[cbits(size) - 1];
298+
uint64_t dummy_cursor = (uint64_t)hint;
299+
uint64_t* cursor = hint == -1 ? &lbas[cbits(size) - 1] : &dummy_cursor;
298300
start = _pick_block_after(cursor, size, unit);
299301
dout(20) << __func__ << " first fit=" << start << " size=" << size << dendl;
300302
if (start != uint64_t(-1ULL)) {
@@ -376,7 +378,7 @@ int64_t BtreeAllocator::allocate(
376378
uint64_t want,
377379
uint64_t unit,
378380
uint64_t max_alloc_size,
379-
int64_t hint, // unused, for now!
381+
int64_t hint,
380382
PExtentVector* extents)
381383
{
382384
ldout(cct, 10) << __func__ << std::hex

src/os/bluestore/BtreeAllocator.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -103,6 +103,7 @@ class BtreeAllocator : public AllocatorBase {
103103
int _allocate(
104104
uint64_t size,
105105
uint64_t unit,
106+
int64_t hint,
106107
uint64_t *offset,
107108
uint64_t *length);
108109

src/os/bluestore/HybridAllocator.cc

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@ int64_t HybridBtree2Allocator::allocate(
3232
uint64_t want,
3333
uint64_t unit,
3434
uint64_t max_alloc_size,
35-
int64_t hint,
35+
int64_t hint, // unused and likely unneeded for btree2 allocator
3636
PExtentVector* extents)
3737
{
3838
ldout(get_context(), 10) << __func__ << std::hex

0 commit comments

Comments
 (0)