Skip to content

Commit 96ef390

Browse files
authored
Merge pull request ceph#58225 from guojidan/fix-mem-leak
crimson/osd/osd_operation: fix dump_historic_slow_ops command works Reviewed-by: Samuel Just <[email protected]> Reviewed-by: Matan Breizman <[email protected]>
2 parents c07bae3 + 86ac61c commit 96ef390

File tree

2 files changed

+53
-69
lines changed

2 files changed

+53
-69
lines changed

src/crimson/osd/osd_operation.cc

Lines changed: 47 additions & 68 deletions
Original file line numberDiff line numberDiff line change
@@ -33,18 +33,11 @@ void OSDOperationRegistry::do_stop()
3333
/* add_ref= */ false
3434
};
3535
});
36-
last_of_recents = std::end(historic_registry);
3736
// to_ref_down is going off
3837
}
3938

4039
OSDOperationRegistry::OSDOperationRegistry()
41-
: OperationRegistryT(seastar::this_shard_id())
42-
{
43-
constexpr auto historic_reg_index =
44-
static_cast<size_t>(OperationTypeCode::historic_client_request);
45-
auto& historic_registry = get_registry<historic_reg_index>();
46-
last_of_recents = std::begin(historic_registry);
47-
}
40+
: OperationRegistryT(seastar::this_shard_id()) {}
4841

4942
static auto get_duration(const ClientRequest& client_request)
5043
{
@@ -55,50 +48,49 @@ static auto get_duration(const ClientRequest& client_request)
5548

5649
void OSDOperationRegistry::put_historic(const ClientRequest& op)
5750
{
51+
using crimson::common::local_conf;
5852
// unlink the op from the client request registry. this is a part of
59-
// the re-link procedure. finally it will be in historic registry.
60-
constexpr auto client_reg_index =
61-
static_cast<size_t>(OperationTypeCode::client_request);
53+
// the re-link procedure. finally it will be in historic/historic_slow registry.
6254
constexpr auto historic_reg_index =
6355
static_cast<size_t>(OperationTypeCode::historic_client_request);
64-
auto& client_registry = get_registry<client_reg_index>();
65-
auto& historic_registry = get_registry<historic_reg_index>();
66-
historic_registry.splice(std::end(historic_registry),
67-
client_registry,
68-
client_registry.iterator_to(op));
69-
ClientRequest::ICRef(
70-
&op, /* add_ref= */true
71-
).detach(); // yes, "leak" it for now!
72-
73-
// check whether the history size limit is not exceeded; if so, then
74-
// purge the oldest op.
75-
// NOTE: Operation uses the auto-unlink feature of boost::intrusive.
76-
// NOTE: the cleaning happens in OSDOperationRegistry::do_stop()
77-
using crimson::common::local_conf;
78-
if (num_recent_ops >= local_conf()->osd_op_history_size) {
79-
++last_of_recents;
80-
++num_slow_ops;
56+
constexpr auto slow_historic_reg_index =
57+
static_cast<size_t>(OperationTypeCode::historic_slow_client_request);
58+
59+
if (get_duration(op) > local_conf()->osd_op_complaint_time) {
60+
auto& slow_historic_registry = get_registry<slow_historic_reg_index>();
61+
_put_historic(slow_historic_registry,
62+
op,
63+
local_conf()->osd_op_history_slow_op_size);
8164
} else {
82-
++num_recent_ops;
65+
auto& historic_registry = get_registry<historic_reg_index>();
66+
_put_historic(historic_registry,
67+
op,
68+
local_conf()->osd_op_history_size);
8369
}
84-
if (num_slow_ops > local_conf()->osd_op_history_slow_op_size) {
85-
// we're interested in keeping slowest ops. if the slow op history
86-
// is disabled, the list will have only one element, so the full-blown
87-
// search will boil down into `.front()`.
88-
const auto fastest_historic_iter = std::min_element(
89-
std::cbegin(historic_registry), last_of_recents,
90-
[] (const auto& lop, const auto& rop) {
91-
const auto& lclient_request = static_cast<const ClientRequest&>(lop);
92-
const auto& rclient_request = static_cast<const ClientRequest&>(rop);
93-
return get_duration(lclient_request) < get_duration(rclient_request);
94-
});
95-
assert(fastest_historic_iter != std::end(historic_registry));
96-
const auto& fastest_historic_op =
97-
static_cast<const ClientRequest&>(*fastest_historic_iter);
98-
historic_registry.erase(fastest_historic_iter);
70+
}
71+
72+
void OSDOperationRegistry::_put_historic(
73+
op_list& list,
74+
const class ClientRequest& op,
75+
uint64_t max)
76+
{
77+
constexpr auto client_reg_index =
78+
static_cast<size_t>(OperationTypeCode::client_request);
79+
auto& client_registry = get_registry<client_reg_index>();
80+
81+
// we only save the newest op
82+
list.splice(std::end(list), client_registry, client_registry.iterator_to(op));
83+
ClientRequest::ICRef(
84+
&op, /* add_ref= */true
85+
).detach(); // yes, "leak" it for now!
86+
87+
if (list.size() >= max) {
88+
auto old_op_ptr = &list.front();
89+
list.pop_front();
90+
const auto& old_op =
91+
static_cast<const ClientRequest&>(*old_op_ptr);
9992
// clear a previously "leaked" op
100-
ClientRequest::ICRef(&fastest_historic_op, /* add_ref= */false);
101-
--num_slow_ops;
93+
ClientRequest::ICRef(&old_op, /* add_ref= */false);
10294
}
10395
}
10496

@@ -125,33 +117,20 @@ size_t OSDOperationRegistry::dump_historic_client_requests(ceph::Formatter* f) c
125117

126118
size_t OSDOperationRegistry::dump_slowest_historic_client_requests(ceph::Formatter* f) const
127119
{
128-
const auto& historic_client_registry =
129-
get_registry<static_cast<size_t>(OperationTypeCode::historic_client_request)>(); //ClientRequest::type)>();
120+
const auto& slow_historic_client_registry =
121+
get_registry<static_cast<size_t>(OperationTypeCode::historic_slow_client_request)>(); //ClientRequest::type)>();
130122
f->open_object_section("op_history");
131-
f->dump_int("size", historic_client_registry.size());
123+
f->dump_int("size", slow_historic_client_registry.size());
132124
// TODO: f->dump_int("duration", history_duration.load());
133125
// the intrusive list is configured to not store the size
134-
std::multimap<utime_t,
135-
const ClientRequest*,
136-
std::greater<utime_t>> sorted_slowest_ops;
137-
// iterating over the entire registry as a slow op could be also
138-
// in the "recently added" part.
139-
std::transform(std::begin(historic_client_registry),
140-
std::end(historic_client_registry),
141-
std::inserter(sorted_slowest_ops, std::end(sorted_slowest_ops)),
142-
[] (const Operation& op) {
143-
const auto& cop = static_cast<const ClientRequest&>(op);
144-
return std::make_pair(get_duration(cop), &cop);
145-
});
146-
f->open_array_section("ops");
147-
using crimson::common::local_conf;
148126
size_t ops_count = 0;
149-
for (auto it = std::begin(sorted_slowest_ops);
150-
ops_count < local_conf()->osd_op_history_slow_op_size
151-
&& it != std::end(sorted_slowest_ops);
152-
++it, ++ops_count)
153127
{
154-
it->second->dump(f);
128+
f->open_array_section("ops");
129+
for (const auto& op : slow_historic_client_registry) {
130+
op.dump(f);
131+
++ops_count;
132+
}
133+
f->close_section();
155134
}
156135
f->close_section();
157136
return ops_count;

src/crimson/osd/osd_operation.h

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,7 @@ enum class OperationTypeCode {
5050
background_recovery_sub,
5151
internal_client_request,
5252
historic_client_request,
53+
historic_slow_client_request,
5354
logmissing_request,
5455
logmissing_request_reply,
5556
snaptrim_event,
@@ -72,6 +73,7 @@ static constexpr const char* const OP_NAMES[] = {
7273
"background_recovery_sub",
7374
"internal_client_request",
7475
"historic_client_request",
76+
"historic_slow_client_request",
7577
"logmissing_request",
7678
"logmissing_request_reply",
7779
"snaptrim_event",
@@ -225,12 +227,15 @@ struct OSDOperationRegistry : OperationRegistryT<
225227
void do_stop() override;
226228

227229
void put_historic(const class ClientRequest& op);
230+
void _put_historic(
231+
op_list& list,
232+
const class ClientRequest& op,
233+
uint64_t max);
228234

229235
size_t dump_historic_client_requests(ceph::Formatter* f) const;
230236
size_t dump_slowest_historic_client_requests(ceph::Formatter* f) const;
231237

232238
private:
233-
op_list::const_iterator last_of_recents;
234239
size_t num_recent_ops = 0;
235240
size_t num_slow_ops = 0;
236241
};

0 commit comments

Comments
 (0)