Skip to content

Commit 3883500

Browse files
authored
Merge pull request ceph#63358 from ifed01/wip-ifed-fix-vselector-math
os/bluestore:fix bluestore_volume_selection_reserved_factor usage
2 parents 897abcb + a9f591f commit 3883500

File tree

4 files changed

+174
-11
lines changed

4 files changed

+174
-11
lines changed

src/os/bluestore/BlueStore.cc

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -19677,13 +19677,13 @@ void RocksDBBlueFSVolumeSelector::dump(ostream& sout) {
1967719677
sout.width(width);
1967819678
switch (l + LEVEL_FIRST) {
1967919679
case LEVEL_LOG:
19680-
sout << "LOG"; break;
19680+
sout << "log"; break;
1968119681
case LEVEL_WAL:
19682-
sout << "WAL"; break;
19682+
sout << "db.wal"; break;
1968319683
case LEVEL_DB:
19684-
sout << "DB"; break;
19684+
sout << "db"; break;
1968519685
case LEVEL_SLOW:
19686-
sout << "SLOW"; break;
19686+
sout << "db.slow"; break;
1968719687
case LEVEL_MAX:
1968819688
sout << "TOTAL"; break;
1968919689
}
@@ -19704,13 +19704,13 @@ void RocksDBBlueFSVolumeSelector::dump(ostream& sout) {
1970419704
sout.width(width);
1970519705
switch (l + LEVEL_FIRST) {
1970619706
case LEVEL_LOG:
19707-
sout << "LOG"; break;
19707+
sout << "log"; break;
1970819708
case LEVEL_WAL:
19709-
sout << "WAL"; break;
19709+
sout << "db.wal"; break;
1971019710
case LEVEL_DB:
19711-
sout << "DB"; break;
19711+
sout << "db"; break;
1971219712
case LEVEL_SLOW:
19713-
sout << "SLOW"; break;
19713+
sout << "db.slow"; break;
1971419714
case LEVEL_MAX:
1971519715
sout << "TOTAL"; break;
1971619716
}

src/os/bluestore/BlueStore.h

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4508,20 +4508,18 @@ class RocksDBBlueFSVolumeSelector : public BlueFSVolumeSelector
45084508
level_multiplier = _level_multiplier;
45094509
uint64_t prev_levels = _level0_size;
45104510
uint64_t cur_level = _level_base;
4511-
uint64_t cur_threshold = prev_levels + cur_level;
45124511
extra_level = 1;
45134512
do {
45144513
uint64_t next_level = cur_level * _level_multiplier;
45154514
uint64_t next_threshold = prev_levels + cur_level + next_level;
45164515
++extra_level;
45174516
if (_db_total <= next_threshold) {
4518-
cur_threshold *= reserved_factor;
4517+
uint64_t cur_threshold = prev_levels + cur_level * reserved_factor;
45194518
db_avail4slow = cur_threshold < _db_total ? _db_total - cur_threshold : 0;
45204519
break;
45214520
} else {
45224521
prev_levels += cur_level;
45234522
cur_level = next_level;
4524-
cur_threshold = next_threshold;
45254523
}
45264524
} while (true);
45274525
} else {
@@ -4530,6 +4528,12 @@ class RocksDBBlueFSVolumeSelector : public BlueFSVolumeSelector
45304528
}
45314529
}
45324530

4531+
uint64_t get_available_extra() const {
4532+
return db_avail4slow;
4533+
}
4534+
uint64_t get_extra_level() const {
4535+
return extra_level;
4536+
}
45334537
void* get_hint_for_log() const override {
45344538
return reinterpret_cast<void*>(LEVEL_LOG);
45354539
}

src/test/objectstore/CMakeLists.txt

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@ target_include_directories(store_test_fixture PRIVATE
1212
add_executable(ceph_test_objectstore
1313
store_test.cc
1414
$<TARGET_OBJECTS:store_test_fixture>)
15+
1516
target_link_libraries(ceph_test_objectstore
1617
os
1718
ceph-common
@@ -24,6 +25,21 @@ target_link_libraries(ceph_test_objectstore
2425
install(TARGETS ceph_test_objectstore
2526
DESTINATION ${CMAKE_INSTALL_BINDIR})
2627

28+
add_executable(ceph_test_bluestore_vselector
29+
test_bluestore_vselector.cc)
30+
31+
target_link_libraries(ceph_test_bluestore_vselector
32+
os
33+
ceph-common
34+
${UNITTEST_LIBS}
35+
global
36+
${EXTRALIBS}
37+
${BLKID_LIBRARIES}
38+
${CMAKE_DL_LIBS}
39+
)
40+
install(TARGETS ceph_test_bluestore_vselector
41+
DESTINATION ${CMAKE_INSTALL_BINDIR})
42+
2743
add_subdirectory(allocsim)
2844

2945
add_executable(ceph_test_keyvaluedb
Lines changed: 143 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,143 @@
1+
// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2+
// vim: ts=8 sw=2 smarttab
3+
4+
#include "gtest/gtest.h"
5+
#include "os/bluestore/BlueStore.h"
6+
#include "common/ceph_argparse.h"
7+
#include "global/global_init.h"
8+
#include "global/global_context.h"
9+
10+
using namespace std;
11+
12+
TEST(rocksdb_bluefs_vselector, basic) {
13+
14+
uint64_t db_size = 168ull << 30;
15+
uint64_t level_base = 1ull << 30;
16+
size_t level_multi = 8;
17+
18+
RocksDBBlueFSVolumeSelector selector(
19+
10ull << 30,
20+
db_size,
21+
1000ull << 30,
22+
1ull << 30,
23+
level_base,
24+
level_multi,
25+
g_ceph_context->_conf->bluestore_volume_selection_reserved_factor,
26+
g_ceph_context->_conf->bluestore_volume_selection_reserved,
27+
g_ceph_context->_conf->bluestore_volume_selection_policy.find("use_some_extra")
28+
== 0);
29+
30+
// taken from RocksDBBlueFSVolumeSelector::
31+
size_t log_bdev = 1; // LEVEL_LOG
32+
size_t wal_bdev = 2; // LEVEL_WAL
33+
size_t db_bdev = 3; // LEVEL_DB
34+
size_t slow_bdev = 4;// LEVEL_SLOW
35+
bluefs_extent_t e;
36+
37+
ASSERT_EQ(4, selector.get_extra_level());
38+
ASSERT_EQ(30ull << 30, selector.get_available_extra()); // 168GB - 1GB (L0) - 1GB (L1) - 8GB (L2) - 2*64GB (L3)
39+
40+
ASSERT_EQ(0, selector.select_prefer_bdev((void*)log_bdev));
41+
ASSERT_EQ(0, selector.select_prefer_bdev((void*)wal_bdev));
42+
ASSERT_EQ(1, selector.select_prefer_bdev((void*)db_bdev));
43+
ASSERT_EQ(1, selector.select_prefer_bdev((void*)slow_bdev));
44+
// 'Use' 138GB DB level data at DB vol
45+
for (size_t i = 0; i < 138; i++) {
46+
e.bdev = 1; // DB dev
47+
e.length = 1ull * (1 << 30);
48+
selector.add_usage((void*)db_bdev, e);
49+
}
50+
ASSERT_EQ(0, selector.select_prefer_bdev((void*)log_bdev));
51+
ASSERT_EQ(0, selector.select_prefer_bdev((void*)wal_bdev));
52+
ASSERT_EQ(1, selector.select_prefer_bdev((void*)db_bdev));
53+
ASSERT_EQ(1, selector.select_prefer_bdev((void*)slow_bdev));
54+
55+
// 'Use' 30GB Slow level data at DB vol
56+
for (size_t i = 0; i < 30; i++) {
57+
e.bdev = 1; // DB dev
58+
e.length = 1ull * (1 << 30);
59+
ASSERT_EQ(1, selector.select_prefer_bdev((void*)slow_bdev));
60+
selector.add_usage((void*)slow_bdev, e);
61+
}
62+
ASSERT_EQ(0, selector.select_prefer_bdev((void*)log_bdev));
63+
ASSERT_EQ(0, selector.select_prefer_bdev((void*)wal_bdev));
64+
ASSERT_EQ(1, selector.select_prefer_bdev((void*)db_bdev));
65+
ASSERT_EQ(2, selector.select_prefer_bdev((void*)slow_bdev));
66+
67+
// 'Unuse' 10GB DB level data at DB vol, slow data still wouldn't fit
68+
// as it's exceeds the threshold
69+
for (size_t i = 0; i < 10; i++) {
70+
e.bdev = 1; // DB dev
71+
e.length = 1ull * (1 << 30);
72+
selector.sub_usage((void*)db_bdev, e);
73+
}
74+
ASSERT_EQ(2, selector.select_prefer_bdev((void*)slow_bdev));
75+
76+
// 'Unuse' 10GB Slow level data at DB vol, slow data fits now
77+
for (size_t i = 0; i < 10; i++) {
78+
e.bdev = 1; // DB dev
79+
e.length = 1ull * (1 << 30);
80+
selector.sub_usage((void*)slow_bdev, e);
81+
}
82+
ASSERT_EQ(1, selector.select_prefer_bdev((void*)slow_bdev));
83+
84+
// 'Unuse' remaining 20GB Slow level data at DB vol, slow data fits now
85+
for (size_t i = 0; i < 20; i++) {
86+
e.bdev = 1; // DB dev
87+
e.length = 1ull * (1 << 30);
88+
selector.sub_usage((void*)slow_bdev, e);
89+
}
90+
// 'Use' 30GB DB level data at DB vol to raise historic maximum, 10GB slow data fits only since now
91+
for (size_t i = 0; i < 30; i++) {
92+
e.bdev = 1; // DB dev
93+
e.length = 1ull * (1 << 30);
94+
ASSERT_EQ(1, selector.select_prefer_bdev((void*)slow_bdev));
95+
selector.add_usage((void*)db_bdev, e);
96+
}
97+
for (size_t i = 0; i < 10; i++) {
98+
e.bdev = 1; // DB dev
99+
e.length = 1ull * (1 << 30);
100+
ASSERT_EQ(1, selector.select_prefer_bdev((void*)slow_bdev));
101+
selector.add_usage((void*)slow_bdev, e);
102+
}
103+
ASSERT_EQ(2, selector.select_prefer_bdev((void*)slow_bdev));
104+
105+
// 'Unuse' remaining 10GB Slow level data at DB vol
106+
for (size_t i = 0; i < 10; i++) {
107+
e.bdev = 1; // DB dev
108+
e.length = 1ull * (1 << 30);
109+
selector.sub_usage((void*)slow_bdev, e);
110+
}
111+
// 'Use' additional 10GB DB level data at DB vol to raise historic maximum, 10GB slow data wouldn't fit since now
112+
for (size_t i = 0; i < 10; i++) {
113+
e.bdev = 1; // DB dev
114+
e.length = 1ull * (1 << 30);
115+
ASSERT_EQ(1, selector.select_prefer_bdev((void*)slow_bdev));
116+
selector.add_usage((void*)db_bdev, e);
117+
}
118+
ASSERT_EQ(2, selector.select_prefer_bdev((void*)slow_bdev));
119+
120+
// 'Unuse' 50GB DB level data, thi s wouldn't let slow data use DB volume anyway
121+
// due to updated historic maximum
122+
for (size_t i = 0; i < 50; i++) {
123+
e.bdev = 1; // DB dev
124+
e.length = 1ull * (1 << 30);
125+
selector.sub_usage((void*)db_bdev, e);
126+
}
127+
ASSERT_EQ(2, selector.select_prefer_bdev((void*)slow_bdev));
128+
129+
130+
std::stringstream ss;
131+
selector.dump(ss);
132+
std::cout << ss.str() << std::endl;
133+
}
134+
135+
int main(int argc, char **argv) {
136+
auto args = argv_to_vec(argc, argv);
137+
auto cct =
138+
global_init(NULL, args, CEPH_ENTITY_TYPE_CLIENT, CODE_ENVIRONMENT_UTILITY,
139+
CINIT_FLAG_NO_DEFAULT_CONFIG_FILE);
140+
common_init_finish(g_ceph_context);
141+
::testing::InitGoogleTest(&argc, argv);
142+
return RUN_ALL_TESTS();
143+
}

0 commit comments

Comments
 (0)