Skip to content

Commit ee84f89

Browse files
committed
crimson: Add support for bench osd command
this commit adds support for the 'bench' admin command in the OSD, allowing administrators to perform benchmark tests on the OSD. The 'bench' command accepts 4 optional parameters with the following default values: 1. count - Total number of bytes to write (default: 1GB). 2. size - Block size for each write operation (default: 4MB). 3. object_size - Size of each object to write (default: 0). 4. object_num - Number of objects to write (default: 0). The results of the benchmark are returned in a JSON formatted output, which includes the following fields: 1. bytes_written - Total number of bytes written during the benchmark. 2. blocksize - Block size used for each write operation. 3. elapsed_sec - Total time taken to complete the benchmark in seconds. 4. bytes_per_sec - Write throughput in bytes per second. 5. iops - Number of input/output operations per second. Example JSON output: ```json { "osd_bench_results": { "bytes_written": 1073741824, "blocksize": 4194304, "elapsed_sec": 0.5, "bytes_per_sec": 2147483648, "iops": 512 } } Fixes: https://tracker.ceph.com/issues/66380 Signed-off-by: Nitzan Mordechai <[email protected]>
1 parent db4959e commit ee84f89

File tree

6 files changed

+187
-1
lines changed

6 files changed

+187
-1
lines changed

qa/suites/crimson-rados/basic/tasks/rados_python.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,4 +17,4 @@ tasks:
1717
timeout: 1h
1818
clients:
1919
client.0:
20-
- rados/test_python.sh -m 'not (tier or ec or bench)'
20+
- rados/test_python.sh -m 'not (tier or ec)'

src/crimson/admin/osd_admin.cc

Lines changed: 100 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919
#include "crimson/osd/pg.h"
2020
#include "crimson/osd/shard_services.h"
2121

22+
SET_SUBSYS(osd);
2223
namespace {
2324
seastar::logger& logger()
2425
{
@@ -93,6 +94,105 @@ class SendBeaconHook : public AdminSocketHook {
9394
template std::unique_ptr<AdminSocketHook>
9495
make_asok_hook<SendBeaconHook>(crimson::osd::OSD& osd);
9596

97+
/**
98+
* An OSD admin hook: run bench
99+
* Usage parameters:
100+
* count=Count of bytes to write
101+
* bsize=block size
102+
* osize=Object size
103+
* onum=Number of objects
104+
*/
105+
class RunOSDBenchHook : public AdminSocketHook {
106+
public:
107+
explicit RunOSDBenchHook(crimson::osd::OSD& osd) :
108+
AdminSocketHook{"bench",
109+
"name=count,type=CephInt,req=false "
110+
"name=size,type=CephInt,req=false "
111+
"name=object_size,type=CephInt,req=false "
112+
"name=object_num,type=CephInt,req=false",
113+
"run OSD bench"},
114+
osd(osd)
115+
{}
116+
seastar::future<tell_result_t> call(const cmdmap_t& cmdmap,
117+
std::string_view format,
118+
ceph::bufferlist&& input) const final
119+
{
120+
LOG_PREFIX(RunOSDBenchHook::call);
121+
int64_t count = cmd_getval_or<int64_t>(cmdmap, "count", 1LL << 30);
122+
int64_t bsize = cmd_getval_or<int64_t>(cmdmap, "size", 4LL << 20);
123+
int64_t osize = cmd_getval_or<int64_t>(cmdmap, "object_size", 0);
124+
int64_t onum = cmd_getval_or<int64_t>(cmdmap, "object_num", 0);
125+
auto duration = local_conf()->osd_bench_duration;
126+
auto max_block_size = local_conf()->osd_bench_max_block_size;
127+
if (bsize > static_cast<int64_t>(max_block_size)) {
128+
// let us limit the block size because the next checks rely on it
129+
// having a sane value. If we allow any block size to be set things
130+
// can still go sideways.
131+
INFO("block 'size' values are capped at {}. If you wish to use"
132+
" a higher value, please adjust 'osd_bench_max_block_size'",
133+
byte_u_t(max_block_size));
134+
return seastar::make_ready_future<tell_result_t>(-EINVAL, "block size too large");
135+
} else if (bsize < (1LL << 20)) {
136+
// entering the realm of small block sizes.
137+
// limit the count to a sane value, assuming a configurable amount of
138+
// IOPS and duration, so that the OSD doesn't get hung up on this,
139+
// preventing timeouts from going off
140+
int64_t max_count = bsize * duration * local_conf()->osd_bench_small_size_max_iops;
141+
if (count > max_count) {
142+
INFO("bench count {} > osd_bench_small_size_max_iops {}",
143+
count, max_count);
144+
return seastar::make_ready_future<tell_result_t>(-EINVAL, "count too large");
145+
}
146+
} else {
147+
// 1MB block sizes are big enough so that we get more stuff done.
148+
// However, to avoid the osd from getting hung on this and having
149+
// timers being triggered, we are going to limit the count assuming
150+
// a configurable throughput and duration.
151+
// NOTE: max_count is the total amount of bytes that we believe we
152+
// will be able to write during 'duration' for the given
153+
// throughput. The block size hardly impacts this unless it's
154+
// way too big. Given we already check how big the block size
155+
// is, it's safe to assume everything will check out.
156+
int64_t max_count = local_conf()->osd_bench_large_size_max_throughput * duration;
157+
if (count > max_count) {
158+
INFO("'count' values greater than {} for a block size of {},"
159+
" assuming {} IOPS, for {} seconds, can cause ill effects"
160+
" on osd. Please adjust 'osd_bench_small_size_max_iops'"
161+
" with a higher value if you wish to use a higher 'count'.",
162+
max_count, byte_u_t(bsize), local_conf()->osd_bench_small_size_max_iops,
163+
duration);
164+
return seastar::make_ready_future<tell_result_t>(-EINVAL, "count too large");
165+
}
166+
}
167+
if (osize && bsize > osize) {
168+
bsize = osize;
169+
}
170+
171+
return osd.run_bench(count, bsize, osize, onum).then(
172+
[format, bsize, count](double elapsed) {
173+
if (elapsed < 0) {
174+
return seastar::make_ready_future<tell_result_t>
175+
(elapsed, "bench failed with error");
176+
}
177+
178+
unique_ptr<Formatter> f{Formatter::create(format, "json-pretty", "json-pretty")};
179+
f->open_object_section("osd_bench_results");
180+
f->dump_int("bytes_written", count);
181+
f->dump_int("blocksize", bsize);
182+
f->dump_float("elapsed_sec", elapsed);
183+
f->dump_float("bytes_per_sec", (elapsed > 0) ? count / elapsed : 0);
184+
f->dump_float("iops", (elapsed > 0) ? (count / elapsed) / bsize : 0);
185+
f->close_section();
186+
187+
return seastar::make_ready_future<tell_result_t>(std::move(f));
188+
});
189+
}
190+
private:
191+
crimson::osd::OSD& osd;
192+
};
193+
template std::unique_ptr<AdminSocketHook>
194+
make_asok_hook<RunOSDBenchHook>(crimson::osd::OSD& osd);
195+
96196
/**
97197
* send the latest pg stats to mgr
98198
*/

src/crimson/admin/osd_admin.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@ class InjectDataErrorHook;
1717
class InjectMDataErrorHook;
1818
class OsdStatusHook;
1919
class SendBeaconHook;
20+
class RunOSDBenchHook;
2021
class DumpInFlightOpsHook;
2122
class DumpHistoricOpsHook;
2223
class DumpSlowestHistoricOpsHook;

src/crimson/osd/osd.cc

Lines changed: 77 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -677,6 +677,7 @@ seastar::future<> OSD::start_asok_admin()
677677
asok->register_admin_commands();
678678
asok->register_command(make_asok_hook<OsdStatusHook>(std::as_const(*this)));
679679
asok->register_command(make_asok_hook<SendBeaconHook>(*this));
680+
asok->register_command(make_asok_hook<RunOSDBenchHook>(*this));
680681
asok->register_command(make_asok_hook<FlushPgStatsHook>(*this));
681682
asok->register_command(
682683
make_asok_hook<DumpPGStateHistory>(std::as_const(pg_shard_manager)));
@@ -1418,6 +1419,82 @@ seastar::future<> OSD::send_beacon()
14181419
return monc->send_message(std::move(beacon));
14191420
}
14201421

1422+
seastar::future<double> OSD::run_bench(int64_t count, int64_t bsize, int64_t osize, int64_t onum) {
1423+
LOG_PREFIX(OSD::run_bench);
1424+
DEBUG();
1425+
std::vector<seastar::future<>> futures;
1426+
std::vector<seastar::future<>> cleanup_futures;
1427+
1428+
auto collection_future = store.get_sharded_store().open_collection(
1429+
coll_t::meta());
1430+
auto collection_ref = co_await std::move(collection_future);
1431+
ceph::os::Transaction cleanup_t;
1432+
1433+
if (osize && onum) {
1434+
std::string data(osize, 'a');
1435+
ceph::buffer::list bl;
1436+
bl.append(data);
1437+
1438+
for (int i = 0; i < onum; ++i) {
1439+
ceph::os::Transaction t;
1440+
std::string oid_str = fmt::format("disk_bw_test_{}", i);
1441+
ghobject_t oid(hobject_t(sobject_t(object_t(oid_str), 0)),
1442+
ghobject_t::NO_GEN,
1443+
shard_id_t::NO_SHARD);
1444+
t.write(coll_t::meta(), oid, 0, data.size(), bl);
1445+
futures.push_back(store.get_sharded_store().do_transaction(
1446+
collection_ref, std::move(t)));
1447+
cleanup_t.remove(coll_t::meta(), oid);
1448+
cleanup_futures.push_back(store.get_sharded_store().do_transaction(
1449+
collection_ref, std::move(cleanup_t)));
1450+
}
1451+
}
1452+
1453+
co_await seastar::when_all_succeed(futures.begin(), futures.end());
1454+
std::random_device rd;
1455+
std::mt19937 gen(rd());
1456+
std::uniform_int_distribution<> dis(0, 255);
1457+
std::vector<seastar::future<>> futures_bench;
1458+
auto start = std::chrono::steady_clock::now();
1459+
1460+
for (int i = 0; i < count / bsize; ++i) {
1461+
ceph::os::Transaction t;
1462+
ceph::buffer::ptr bp(bsize);
1463+
std::generate_n(bp.c_str(), bp.length(), [&dis, &gen]() {
1464+
return static_cast<char>(dis(gen));
1465+
});
1466+
ceph::buffer::list bl(bsize);
1467+
bl.push_back(std::move(bp));
1468+
bl.rebuild_page_aligned();
1469+
1470+
std::string oid_str;
1471+
uint64_t offset = 0;
1472+
if (onum && osize) {
1473+
oid_str = fmt::format("disk_bw_test_{}", dis(gen) % onum);
1474+
offset = (dis(gen) % (osize / bsize)) * bsize;
1475+
} else {
1476+
oid_str = fmt::format("disk_bw_test_{}", i * bsize);
1477+
}
1478+
ghobject_t oid(hobject_t(sobject_t(object_t(oid_str), 0)));
1479+
1480+
t.write(coll_t::meta(), oid, offset, bsize, bl);
1481+
1482+
futures_bench.push_back(store.get_sharded_store().do_transaction(
1483+
collection_ref, std::move(t)));
1484+
1485+
if (!onum || !osize) {
1486+
cleanup_t.remove(coll_t::meta(), oid);
1487+
cleanup_futures.push_back(store.get_sharded_store().do_transaction(
1488+
collection_ref, std::move(cleanup_t)));
1489+
}
1490+
}
1491+
co_await seastar::when_all_succeed(futures_bench.begin(), futures_bench.end());
1492+
auto end = std::chrono::steady_clock::now();
1493+
double elapsed = std::chrono::duration<double>(end - start).count();
1494+
co_await seastar::when_all_succeed(cleanup_futures.begin(), cleanup_futures.end());
1495+
co_return co_await seastar::make_ready_future<double>(elapsed);
1496+
}
1497+
14211498
seastar::future<> OSD::update_heartbeat_peers()
14221499
{
14231500
if (!pg_shard_manager.is_active()) {

src/crimson/osd/osd.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -247,6 +247,10 @@ class OSD final : public crimson::net::Dispatcher,
247247

248248
public:
249249
seastar::future<> send_beacon();
250+
seastar::future<double> run_bench(int64_t count,
251+
int64_t bsize,
252+
int64_t osize,
253+
int64_t onum);
250254

251255
private:
252256
LogClient log_client;

src/include/types.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -482,6 +482,10 @@ struct byte_u_t {
482482
explicit byte_u_t(uint64_t _v) : v(_v) {};
483483
};
484484

485+
#if FMT_VERSION >= 90000
486+
template <> struct fmt::formatter<byte_u_t> : fmt::ostream_formatter {};
487+
#endif
488+
485489
inline std::ostream& operator<<(std::ostream& out, const byte_u_t& b)
486490
{
487491
uint64_t n = b.v;

0 commit comments

Comments
 (0)