Skip to content

Commit 88d2ce7

Browse files
authored
Merge pull request ceph#59780 from irq0/wip/osd-asok-messenger-dump
Add Asok Command: Dump Messenger Status (Connections, TCP stats, ..)
2 parents f536650 + 36f2ed5 commit 88d2ce7

23 files changed

+825
-8
lines changed

PendingReleaseNotes

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -98,6 +98,8 @@
9898
an empty string can be assigned to it. Additionally, commands
9999
`ceph fs subvolume earmark set`, `ceph fs subvolume earmark get` and
100100
`ceph fs subvolume earmark rm` have been added to set, get and remove earmark from a given subvolume.
101+
* RADOS: Add ``messenger dump`` command to retrieve runtime information
102+
on connections, sockets, and kernel TCP stats from the messenger.
101103

102104
* RADOS: A performance botteneck in the balancer mgr module has been fixed.
103105
Related Tracker: https://tracker.ceph.com/issues/68657

doc/rados/operations/monitoring.rst

Lines changed: 83 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -655,3 +655,86 @@ Runtime`_.
655655

656656
.. _Viewing a Configuration at Runtime: ../../configuration/ceph-conf#viewing-a-configuration-at-runtime
657657
.. _Storage Capacity: ../../configuration/mon-config-ref#storage-capacity
658+
659+
Messenger Status
660+
=================
661+
662+
Ceph daemons and librados clients support an admin socket command
663+
``messenger dump`` that surfaces a snapshot of runtime information
664+
about connections, sockets, bound addresses, and kernel TCP stats (via
665+
tcp(7) TCP_INFO).
666+
667+
.. note:: The queried messenger needs to lock the connection data
668+
structures for the time it takes to create the snapshot. This lock's
669+
duration is in the order of tens of milliseconds. This might
670+
interfere with normal operation. Use the ``dumpcontents`` argument
671+
to limit data structures dumped.
672+
673+
Examples
674+
---------
675+
676+
When a command is issued without specifying a messenger to dump, the
677+
list of available messengers is returned:
678+
679+
.. prompt:: bash $
680+
681+
ceph tell osd.0 messenger dump
682+
683+
.. code-block:: javascript
684+
685+
{
686+
"messengers": [
687+
"client",
688+
"cluster",
689+
"hb_back_client",
690+
"hb_back_server",
691+
"hb_front_client",
692+
"hb_front_server",
693+
"ms_objecter",
694+
"temp_mon_client"
695+
]
696+
}
697+
698+
The ``client`` and ``cluster`` messengers correspond to the configured
699+
client / cluster network (see :doc:`/rados/configuration/network-config-ref`). Messengers
700+
with ``hb_`` prefix are part of the heartbeat system.
701+
702+
List all current connections on the client messenger:
703+
704+
.. code-block:: bash
705+
706+
ceph tell osd.0 messenger dump client \
707+
| jq -r '.messenger.connections[].async_connection |
708+
[.conn_id, .socket_fd, .worker_id,
709+
if .status.connected then "connected" else "disconnected" end,
710+
.state,
711+
"\(.peer.type).\(.peer.entity_name.id).\(.peer.id)",
712+
.protocol.v2.con_mode, .protocol.v2.crypto.rx, .protocol.v2.compression.rx] |
713+
@tsv'
714+
715+
.. code-block:: bash
716+
717+
249 102 0 connected STATE_CONNECTION_ESTABLISHED client.admin.6407 crc PLAIN UNCOMPRESSED
718+
242 99 1 connected STATE_CONNECTION_ESTABLISHED client.rgw.8000.4473 crc PLAIN UNCOMPRESSED
719+
248 89 1 connected STATE_CONNECTION_ESTABLISHED mgr..-1 secure AES-128-GCM UNCOMPRESSED
720+
32 101 2 connected STATE_CONNECTION_ESTABLISHED client.rgw.8000.4483 crc PLAIN UNCOMPRESSED
721+
3 86 2 connected STATE_CONNECTION_ESTABLISHED mon..-1 secure AES-128-GCM UNCOMPRESSED
722+
244 102 0 connected STATE_CONNECTION_ESTABLISHED client.admin.6383 crc PLAIN UNCOMPRESSED
723+
724+
725+
Print active connections and their TCP round trip time and retransmission counters:
726+
727+
.. code-block:: bash
728+
729+
ceph tell osd.0 messenger dump client --tcp-info \
730+
| jq -r '.messenger.connections[].async_connection |
731+
select(.status.connected) |
732+
select(.peer.type != "client") |
733+
[.conn_id, .socket_fd, .worker_id,
734+
"\(.peer.type).\(.peer.global_id)",
735+
.tcp_info.tcpi_rtt_us, .tcp_info.tcpi_rttvar_us, .tcp_info.tcpi_total_retrans] |
736+
@tsv'
737+
.. code-block:: bash
738+
739+
248 89 1 mgr.0 863 1677 0
740+
3 86 2 mon.0 230 278 0

qa/workunits/cephtool/test.sh

Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2900,6 +2900,51 @@ function test_per_pool_scrub_status()
29002900
ceph osd pool rm noscrub_pool2 noscrub_pool2 --yes-i-really-really-mean-it
29012901
}
29022902

2903+
function do_messenger_dump_basics_test()
2904+
{
2905+
local target="$1"
2906+
ceph tell "$target" messenger dump | expect_true jq --exit-status '.messengers | length > 0'
2907+
ceph tell "$target" messenger dump | jq -r '.messengers[]' | while read messenger; do
2908+
dump="$(ceph tell "$target" messenger dump "$messenger" all)"
2909+
expect_true jq --exit-status 'has("messenger")' <<< "$dump"
2910+
expect_true jq --exit-status 'has("name")' <<< "$dump"
2911+
expect_true jq --arg expected_messenger "$messenger" --exit-status '
2912+
.name == $expected_messenger' <<< "$dump"
2913+
expect_true jq --exit-status '.messenger | type == "object"' <<< "$dump"
2914+
expect_true jq --exit-status '.messenger |
2915+
all([.connections,
2916+
.listen_sockets,
2917+
.anon_conns,
2918+
.accepting_conns,
2919+
.deleted_conns][];
2920+
type == "array")' \
2921+
<<< "$dump"
2922+
done
2923+
}
2924+
2925+
function test_osd_messenger_dump()
2926+
{
2927+
do_messenger_dump_basics_test osd.0
2928+
}
2929+
function test_mon_messenger_dump()
2930+
{
2931+
do_messenger_dump_basics_test mon.a
2932+
# Testing the tcp_info feature requires at lease one messenger TCP
2933+
# conneciton. Test only the mon as it is very unlikely that it
2934+
# doesn't have an active connection. Also only test for one
2935+
# connection, as disconnected connections don't set tcp_info
2936+
expect_true ceph tell "$target" messenger dump mon --tcp-info \
2937+
| jq 'any(.messenger.connections[].async_connection.tcp_info; has("tcpi_state"))'
2938+
}
2939+
function test_mgr_messenger_dump()
2940+
{
2941+
do_messenger_dump_basics_test mgr
2942+
}
2943+
function test_mds_messenger_dump()
2944+
{
2945+
do_messenger_dump_basics_test mds.a
2946+
}
2947+
29032948
#
29042949
# New tests should be added to the TESTS array below
29052950
#
@@ -2944,6 +2989,7 @@ MON_TESTS+=" mon_caps"
29442989
MON_TESTS+=" mon_cephdf_commands"
29452990
MON_TESTS+=" mon_tell_help_command"
29462991
MON_TESTS+=" mon_stdin_stdout"
2992+
MON_TESTS+=" mon_messenger_dump"
29472993

29482994
OSD_TESTS+=" osd_bench"
29492995
OSD_TESTS+=" osd_negative_filestore_merge_threshold"
@@ -2952,14 +2998,17 @@ OSD_TESTS+=" admin_heap_profiler"
29522998
OSD_TESTS+=" osd_tell_help_command"
29532999
OSD_TESTS+=" osd_compact"
29543000
OSD_TESTS+=" per_pool_scrub_status"
3001+
OSD_TESTS+=" osd_messenger_dump"
29553002

29563003
MDS_TESTS+=" mds_tell"
29573004
MDS_TESTS+=" mon_mds"
29583005
MDS_TESTS+=" mon_mds_metadata"
29593006
MDS_TESTS+=" mds_tell_help_command"
3007+
MDS_TESTS+=" mds_messenger_dump"
29603008

29613009
MGR_TESTS+=" mgr_tell"
29623010
MGR_TESTS+=" mgr_devices"
3011+
MGR_TESTS+=" mgr_messenger_dump"
29633012

29643013
TESTS+=$MON_TESTS
29653014
TESTS+=$OSD_TESTS

src/common/CMakeLists.txt

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -104,7 +104,8 @@ set(common_srcs
104104
pretty_binary.cc
105105
utf8.c
106106
util.cc
107-
version.cc)
107+
version.cc
108+
tcp_info.cc)
108109

109110
if(WITH_SYSTEMD)
110111
list(APPEND common_srcs

src/common/ceph_context.cc

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -713,6 +713,7 @@ CephContext::CephContext(uint32_t module_type_,
713713
#ifdef CEPH_DEBUG_MUTEX
714714
_lockdep_obs(NULL),
715715
#endif
716+
_msgr_hook(nullptr),
716717
crush_location(this)
717718
{
718719
if (options.create_log) {
@@ -775,6 +776,17 @@ CephContext::CephContext(uint32_t module_type_,
775776
lookup_or_create_singleton_object<MempoolObs>("mempool_obs", false, this);
776777
}
777778

779+
void CephContext::modify_msgr_hook(
780+
std::function<AdminSocketHook*(void)> create,
781+
std::function<void(AdminSocketHook*)> add) {
782+
std::lock_guard l{_msgr_hook_lock};
783+
if (_msgr_hook) {
784+
add(_msgr_hook.get());
785+
} else {
786+
_msgr_hook.reset(create());
787+
}
788+
}
789+
778790
CephContext::~CephContext()
779791
{
780792
associated_objs.clear();
@@ -788,6 +800,9 @@ CephContext::~CephContext()
788800

789801
delete _plugin_registry;
790802

803+
if (_msgr_hook) {
804+
_admin_socket->unregister_commands(_msgr_hook.get());
805+
}
791806
_admin_socket->unregister_commands(_admin_hook);
792807
delete _admin_hook;
793808
delete _admin_socket;

src/common/ceph_context.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,7 @@
4747
#include "crush/CrushLocation.h"
4848

4949
class AdminSocket;
50+
class AdminSocketHook;
5051
class CryptoHandler;
5152
class CryptoRandom;
5253
class MonMap;
@@ -381,8 +382,13 @@ class CephContext {
381382
#ifdef CEPH_DEBUG_MUTEX
382383
md_config_obs_t *_lockdep_obs;
383384
#endif
385+
386+
std::unique_ptr<AdminSocketHook> _msgr_hook;
387+
ceph::mutex _msgr_hook_lock = ceph::make_mutex("CephContext::msgr_hook");
384388
public:
385389
TOPNSPC::crush::CrushLocation crush_location;
390+
void modify_msgr_hook(std::function<AdminSocketHook*(void)> create,
391+
std::function<void(AdminSocketHook*)> add);
386392
private:
387393

388394
enum {

src/common/tcp_info.cc

Lines changed: 121 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,121 @@
1+
// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2+
// vim: ts=8 sw=2 smarttab
3+
/*
4+
* Ceph - scalable distributed file system
5+
*
6+
* Copyright (C) 2024 Clyso GmbH
7+
*
8+
* This is free software; you can redistribute it and/or
9+
* modify it under the terms of the GNU Lesser General Public
10+
* License version 2.1, as published by the Free Software
11+
* Foundation. See file COPYING.
12+
*
13+
*/
14+
15+
#include "common/tcp_info.h"
16+
17+
#include "common/Formatter.h"
18+
19+
namespace ceph {
20+
21+
#ifdef _WIN32
22+
struct tcp_info {};
23+
24+
bool tcp_info(int fd, struct tcp_info& info) {
25+
return false;
26+
}
27+
bool dump_tcp_info(int fd, Formatter* f) {
28+
return false;
29+
}
30+
31+
#else
32+
33+
bool tcp_info(int fd, struct tcp_info& info) {
34+
socklen_t info_len = sizeof(info);
35+
return (getsockopt(fd, SOL_TCP, TCP_INFO, &info, &info_len) == 0);
36+
}
37+
38+
static const char* get_tcpi_state_name(uint8_t state) {
39+
switch (state) {
40+
case TCP_ESTABLISHED:
41+
return "established";
42+
case TCP_SYN_SENT:
43+
return "syn sent";
44+
case TCP_SYN_RECV:
45+
return "syn recv";
46+
case TCP_FIN_WAIT1:
47+
return "fin wait1";
48+
case TCP_FIN_WAIT2:
49+
return "fin wait2";
50+
case TCP_TIME_WAIT:
51+
return "time wait";
52+
case TCP_CLOSE:
53+
return "close";
54+
case TCP_CLOSE_WAIT:
55+
return "close wait";
56+
case TCP_LAST_ACK:
57+
return "last ack";
58+
case TCP_LISTEN:
59+
return "listen";
60+
case TCP_CLOSING:
61+
return "closing";
62+
default:
63+
return "UNKNOWN";
64+
}
65+
}
66+
67+
bool dump_tcp_info(int fd, Formatter* f) {
68+
struct tcp_info info;
69+
if (!tcp_info(fd, info)) {
70+
return false;
71+
}
72+
73+
f->open_object_section("tcp_info");
74+
f->dump_string("tcpi_state", get_tcpi_state_name(info.tcpi_state));
75+
f->dump_unsigned("tcpi_retransmits", info.tcpi_retransmits);
76+
f->dump_unsigned("tcpi_probes", info.tcpi_probes);
77+
f->dump_unsigned("tcpi_backoff", info.tcpi_backoff);
78+
f->dump_unsigned("tcpi_rto_us", info.tcpi_rto);
79+
f->dump_unsigned("tcpi_ato_us", info.tcpi_ato);
80+
f->dump_unsigned("tcpi_snd_mss", info.tcpi_snd_mss);
81+
f->dump_unsigned("tcpi_rcv_mss", info.tcpi_rcv_mss);
82+
f->dump_unsigned("tcpi_unacked", info.tcpi_unacked);
83+
f->dump_unsigned("tcpi_lost", info.tcpi_lost);
84+
f->dump_unsigned("tcpi_retrans", info.tcpi_retrans);
85+
f->dump_unsigned("tcpi_pmtu", info.tcpi_pmtu);
86+
f->dump_unsigned("tcpi_rtt_us", info.tcpi_rtt);
87+
f->dump_unsigned("tcpi_rttvar_us", info.tcpi_rttvar);
88+
f->dump_unsigned("tcpi_total_retrans", info.tcpi_total_retrans);
89+
f->dump_unsigned("tcpi_last_data_sent_ms", info.tcpi_last_data_sent);
90+
f->dump_unsigned("tcpi_last_ack_sent_ms", info.tcpi_last_ack_sent);
91+
f->dump_unsigned("tcpi_last_data_recv_ms", info.tcpi_last_data_recv);
92+
f->dump_unsigned("tcpi_last_ack_recv_ms", info.tcpi_last_ack_recv);
93+
94+
f->open_array_section("tcpi_options");
95+
if (info.tcpi_options & TCPI_OPT_TIMESTAMPS) {
96+
f->dump_string("option", "timestamps");
97+
}
98+
if (info.tcpi_options & TCPI_OPT_SACK) {
99+
f->dump_string("option", "sack");
100+
}
101+
if (info.tcpi_options & TCPI_OPT_WSCALE) {
102+
f->dump_string("option", "wscale");
103+
}
104+
if (info.tcpi_options & TCPI_OPT_ECN) {
105+
f->dump_string("option", "ecn");
106+
}
107+
if (info.tcpi_options & TCPI_OPT_ECN_SEEN) {
108+
f->dump_string("option", "ecn seen");
109+
}
110+
if (info.tcpi_options & TCPI_OPT_SYN_DATA) {
111+
f->dump_string("option", "syn data");
112+
}
113+
f->close_section();
114+
115+
f->close_section();
116+
return true;
117+
}
118+
119+
#endif
120+
121+
} // namespace ceph

src/common/tcp_info.h

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2+
// vim: ts=8 sw=2 smarttab
3+
/*
4+
* Ceph - scalable distributed file system
5+
*
6+
* Copyright (C) 2024 Clyso GmbH
7+
*
8+
* This is free software; you can redistribute it and/or
9+
* modify it under the terms of the GNU Lesser General Public
10+
* License version 2.1, as published by the Free Software
11+
* Foundation. See file COPYING.
12+
*
13+
*/
14+
15+
#pragma once
16+
17+
#include <netinet/tcp.h>
18+
#include <sys/socket.h>
19+
20+
#include "Formatter.h"
21+
22+
namespace ceph {
23+
24+
/// Return TCP_INFO socket stats (see tcp(7)). Return true on success.
25+
bool tcp_info(int fd, struct tcp_info& info);
26+
/// Dump TCP_INFO socket stats to formatter. Use struct tcp_info variables
27+
/// names as keys. Returns true on success.
28+
bool dump_tcp_info(int fd, Formatter* f);
29+
30+
} // namespace ceph

0 commit comments

Comments
 (0)