Skip to content

Commit 864aa85

Browse files
authored
Merge pull request ceph#61196 from xxhdx1985126/wip-crimson-mgr-dynamic-perf-report
crimson/osd: support mgr's dynamic perf stats Reviewed-by: Samuel Just <[email protected]>
2 parents d9cf7a0 + 88fb54e commit 864aa85

File tree

12 files changed

+160
-11
lines changed

12 files changed

+160
-11
lines changed

src/crimson/mgr/client.cc

Lines changed: 17 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -26,10 +26,14 @@ namespace crimson::mgr
2626
{
2727

2828
Client::Client(crimson::net::Messenger& msgr,
29-
WithStats& with_stats)
29+
WithStats& with_stats,
30+
set_perf_queries_cb_t cb_set,
31+
get_perf_report_cb_t cb_get)
3032
: msgr{msgr},
3133
with_stats{with_stats},
32-
report_timer{[this] {report();}}
34+
report_timer{[this] {report();}},
35+
set_perf_queries_cb(cb_set),
36+
get_perf_report_cb(cb_get)
3337
{}
3438

3539
seastar::future<> Client::start()
@@ -152,6 +156,10 @@ seastar::future<> Client::handle_mgr_conf(crimson::net::ConnectionRef,
152156
} else {
153157
report_timer.cancel();
154158
}
159+
if (!m->osd_perf_metric_queries.empty()) {
160+
ceph_assert(set_perf_queries_cb);
161+
return set_perf_queries_cb(m->osd_perf_metric_queries);
162+
}
155163
return seastar::now();
156164
}
157165

@@ -202,6 +210,13 @@ void Client::_send_report()
202210
report->daemon_health_metrics = std::move(daemon_health_metrics);
203211
local_conf().get_config_bl(last_config_bl_version, &report->config_bl,
204212
&last_config_bl_version);
213+
if (get_perf_report_cb) {
214+
return get_perf_report_cb(
215+
).then([report=std::move(report), this](auto payload) mutable {
216+
report->metric_report_message = MetricReportMessage(std::move(payload));
217+
return conn->send(std::move(report));
218+
});
219+
}
205220
return conn->send(std::move(report));
206221
});
207222
}

src/crimson/mgr/client.h

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
#include "crimson/net/Fwd.h"
1111
#include "mgr/DaemonHealthMetric.h"
1212
#include "mon/MgrMap.h"
13+
#include "mgr/MetricTypes.h"
1314

1415
template<typename Message> using Ref = boost::intrusive_ptr<Message>;
1516
namespace crimson::net {
@@ -30,9 +31,14 @@ class WithStats {
3031
};
3132

3233
class Client : public crimson::net::Dispatcher {
34+
using get_perf_report_cb_t = std::function<seastar::future<MetricPayload> ()>;
35+
using set_perf_queries_cb_t =
36+
std::function<seastar::future<> (const ConfigPayload &)>;
3337
public:
3438
Client(crimson::net::Messenger& msgr,
35-
WithStats& with_stats);
39+
WithStats& with_stats,
40+
set_perf_queries_cb_t cb_set,
41+
get_perf_report_cb_t cb_get);
3642
seastar::future<> start();
3743
seastar::future<> stop();
3844
void report();
@@ -60,6 +66,8 @@ class Client : public crimson::net::Dispatcher {
6066
crimson::common::gate_per_shard gates;
6167
uint64_t last_config_bl_version = 0;
6268
std::string service_name, daemon_name;
69+
set_perf_queries_cb_t set_perf_queries_cb;
70+
get_perf_report_cb_t get_perf_report_cb;
6371

6472
std::vector<DaemonHealthMetric> daemon_health_metrics;
6573

src/crimson/osd/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,7 @@ add_executable(crimson-osd
5656
${PROJECT_SOURCE_DIR}/src/osd/SnapMapper.cc
5757
${PROJECT_SOURCE_DIR}/src/osd/recovery_types.cc
5858
${PROJECT_SOURCE_DIR}/src/osd/osd_perf_counters.cc
59+
${PROJECT_SOURCE_DIR}/src/mgr/OSDPerfMetricTypes.cc
5960
watch.cc
6061
)
6162
if(HAS_VTA)

src/crimson/osd/ops_executer.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -169,6 +169,10 @@ class OpsExecuter {
169169
IOInterruptCondition, osd_op_errorator>;
170170

171171
object_stat_sum_t delta_stats;
172+
173+
size_t get_bytes_written() {
174+
return txn.get_num_bytes();
175+
}
172176
private:
173177
// with_effect can be used to schedule operations to be performed
174178
// at commit time. effects will be discarded if the operation does

src/crimson/osd/osd.cc

Lines changed: 74 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -95,7 +95,16 @@ OSD::OSD(int id, uint32_t nonce,
9595
hb_front_msgr{hb_front_msgr},
9696
hb_back_msgr{hb_back_msgr},
9797
monc{new crimson::mon::Client{*public_msgr, *this}},
98-
mgrc{new crimson::mgr::Client{*public_msgr, *this}},
98+
mgrc{new crimson::mgr::Client{
99+
*public_msgr,
100+
*this,
101+
[this](const ConfigPayload &config_payload) {
102+
return set_perf_queries(config_payload);
103+
},
104+
[this] {
105+
return get_perf_reports();
106+
}
107+
}},
99108
store{store},
100109
pg_shard_manager{osd_singleton_state,
101110
shard_services,
@@ -176,6 +185,70 @@ seastar::future<> OSD::open_meta_coll()
176185
});
177186
}
178187

188+
seastar::future<> OSD::set_perf_queries(const ConfigPayload &config_payload) {
189+
LOG_PREFIX(OSD::set_perf_queries);
190+
const OSDConfigPayload &osd_config_payload =
191+
boost::get<OSDConfigPayload>(config_payload);
192+
const std::map<OSDPerfMetricQuery, OSDPerfMetricLimits> &queries =
193+
osd_config_payload.config;
194+
DEBUG("setting {} queries", queries.size());
195+
196+
std::list<OSDPerfMetricQuery> supported_queries;
197+
for (auto &it : queries) {
198+
auto &query = it.first;
199+
if (!query.key_descriptor.empty()) {
200+
supported_queries.push_back(query);
201+
}
202+
}
203+
if (supported_queries.size() < queries.size()) {
204+
DEBUG("{} unsupported queries", queries.size() - supported_queries.size());
205+
}
206+
207+
return shard_services.invoke_on_all(
208+
[supported_queries, queries](auto &local_service) {
209+
auto &pgs = local_service.local_state.pg_map.get_pgs();
210+
local_service.local_state.m_perf_queries = supported_queries;
211+
local_service.local_state.m_perf_limits = queries;
212+
for (auto &[id, pg] : pgs) {
213+
pg->set_dynamic_perf_stats_queries(supported_queries);
214+
}
215+
});
216+
}
217+
218+
seastar::future<MetricPayload> OSD::get_perf_reports() {
219+
LOG_PREFIX(OSD::get_perf_reports);
220+
OSDMetricPayload payload;
221+
std::map<OSDPerfMetricQuery, OSDPerfMetricReport> &reports = payload.report;
222+
223+
auto dps = co_await shard_services.map_reduce0(
224+
[FNAME](auto &local_service) {
225+
auto &pgs = local_service.local_state.pg_map.get_pgs();
226+
auto &m_perf_queries = local_service.local_state.m_perf_queries;
227+
DynamicPerfStats dps;
228+
for (auto &[id, pg] : pgs) {
229+
// m_perf_queries can be modified only in set_perf_queries by mgr client
230+
// request, and it is protected by by mgr client's lock, which is held
231+
// when set_perf_queries/get_perf_reports are called, so we may not hold
232+
// m_perf_queries_lock here.
233+
DynamicPerfStats pg_dps(m_perf_queries);
234+
pg->get_dynamic_perf_stats(&pg_dps);
235+
dps.merge(pg_dps);
236+
DEBUG("reporting for pg {}", pg->get_pgid());
237+
}
238+
return dps;
239+
},
240+
DynamicPerfStats(shard_services.local().local_state.m_perf_queries),
241+
[](auto left, auto right) {
242+
left.merge(right);
243+
return left;
244+
});
245+
246+
dps.add_to_reports(shard_services.local().local_state.m_perf_limits, &reports);
247+
DEBUG("reports for {} queries", reports.size());
248+
249+
co_return payload;
250+
}
251+
179252
seastar::future<OSDMeta> OSD::open_or_create_meta_coll(FuturizedStore &store)
180253
{
181254
return store.get_sharded_store().open_collection(coll_t::meta()).then([&store](auto ch) {

src/crimson/osd/osd.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -236,6 +236,8 @@ class OSD final : public crimson::net::Dispatcher,
236236

237237
std::vector<DaemonHealthMetric> get_health_metrics();
238238

239+
seastar::future<> set_perf_queries(const ConfigPayload &config_payload);
240+
seastar::future<MetricPayload> get_perf_reports();
239241
private:
240242
crimson::common::gate_per_shard gate;
241243

src/crimson/osd/osd_operations/client_request.cc

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -63,6 +63,7 @@ ClientRequest::ClientRequest(
6363
: shard_services(&_shard_services),
6464
l_conn(std::move(conn)),
6565
m(std::move(m)),
66+
begin_time(std::chrono::steady_clock::now()),
6667
instance_handle(new instance_handle_t)
6768
{}
6869

@@ -525,6 +526,7 @@ ClientRequest::do_process(
525526
co_return;
526527
}
527528

529+
size_t inb = 0, outb = 0;
528530
{
529531
auto all_completed = interruptor::now();
530532
if (ret) {
@@ -540,6 +542,7 @@ ClientRequest::do_process(
540542
// simply return the error below, leaving all_completed alone
541543
} else {
542544
auto submitted = interruptor::now();
545+
inb = ox.get_bytes_written();
543546
std::tie(submitted, all_completed) = co_await pg->submit_executer(
544547
std::move(ox), m->ops);
545548
co_await std::move(submitted);
@@ -573,9 +576,15 @@ ClientRequest::do_process(
573576
reply->set_result(osdop.rval);
574577
break;
575578
}
579+
outb += osdop.outdata.length();
576580
}
577581
}
578582

583+
pg->add_client_request_lat(
584+
*this,
585+
inb,
586+
outb,
587+
utime_t{std::chrono::steady_clock::now() - begin_time});
579588
reply->set_enoent_reply_versions(
580589
pg->peering_state.get_info().last_update,
581590
pg->peering_state.get_info().last_user_version);

src/crimson/osd/osd_operations/client_request.h

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,12 +41,25 @@ class ClientRequest final : public PhasedOperationT<ClientRequest>,
4141
OpInfo op_info;
4242
seastar::promise<> on_complete;
4343
unsigned instance_id = 0;
44+
std::chrono::time_point<std::chrono::steady_clock> begin_time;
4445

4546
public:
4647
epoch_t get_epoch_sent_at() const {
4748
return m->get_map_epoch();
4849
}
4950

51+
bool may_write() const { return op_info.may_write(); }
52+
bool may_cache() const { return op_info.may_cache(); }
53+
bool may_read() const { return op_info.may_read(); }
54+
template <typename T>
55+
T* get_req() const {
56+
static_assert(std::is_same_v<T, MOSDOp>);
57+
return m.get();
58+
}
59+
const crimson::net::ConnectionRef &get_connection() const {
60+
return l_conn;
61+
}
62+
5063
/**
5164
* instance_handle_t
5265
*

src/crimson/osd/pg.h

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121
#include "crimson/osd/object_context.h"
2222
#include "osd/PeeringState.h"
2323
#include "osd/SnapMapper.h"
24+
#include "osd/DynamicPerfStats.h"
2425

2526
#include "crimson/common/interruptible_future.h"
2627
#include "crimson/common/log.h"
@@ -764,8 +765,25 @@ class PG : public boost::intrusive_ref_counter<
764765

765766
private:
766767
std::optional<pg_stat_t> pg_stats;
768+
DynamicPerfStats dp_stats;
767769

768770
public:
771+
void add_client_request_lat(
772+
const ClientRequest& req,
773+
size_t inb,
774+
size_t outb,
775+
const utime_t &lat) {
776+
if (dp_stats.is_enabled()) {
777+
dp_stats.add(pg_whoami.osd, get_info(), req, inb, outb, lat);
778+
}
779+
}
780+
void set_dynamic_perf_stats_queries(
781+
const std::list<OSDPerfMetricQuery> &queries) {
782+
dp_stats.set_queries(queries);
783+
}
784+
void get_dynamic_perf_stats(DynamicPerfStats *stats) {
785+
std::swap(dp_stats, *stats);
786+
}
769787
OSDriver &get_osdriver() final {
770788
return osdriver;
771789
}

src/crimson/osd/shard_services.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@
2525
#include "crimson/osd/state.h"
2626
#include "common/AsyncReserver.h"
2727
#include "crimson/net/Connection.h"
28+
#include "mgr/OSDPerfMetricTypes.h"
2829

2930
namespace crimson::net {
3031
class Messenger;
@@ -197,6 +198,8 @@ class PerShardState {
197198
}
198199

199200
OSDSuperblock per_shard_superblock;
201+
std::list<OSDPerfMetricQuery> m_perf_queries;
202+
std::map<OSDPerfMetricQuery, OSDPerfMetricLimits> m_perf_limits;
200203

201204
public:
202205
PerShardState(

0 commit comments

Comments
 (0)