Skip to content

Commit 0a03a0f

Browse files
committed
ui: rework network page
Trying to make it a bit more functional. First, we put bytes sent/received first. We drop packets sent/received, which largely duplicates this (I could be talked out of that). Packet errors and drops are jammed into a single chart. Step changes are (hopefully) still visible, and it avoids having to scroll through a set of empty graphs on the way down in the common case of no issues. (I might be wrong about this, but I've never seen errors, for example, only drops). I added the new TCP retransmits chart from #149928. The proxy charts probably not useful most of the time, but I kept them around. _If_ they do something, I assume we want to know. | Old Chart Layout | New Chart Layout | | :---------------------------------------- | :--------------------------------------- | | Network Bytes Received | Network Bytes Sent | | Network Packets Received | Network Bytes Received | | Network Packet Errors on Receive | RPC Heartbeat Latency: 50th percentile | | Network Packet Drops on Receive | RPC Heartbeat Latency: 99th percentile | | Network Bytes Sent | Unhealthy RPC Connections | | Network Packets Sent | Network Packet Errors and Drops | | Network Packet Errors on Send | TCP Retransmits | | Network Packet Drops on Send | Proxy requests | | RPC Heartbeat Latency: 50th percentile | Proxy request errors | | RPC Heartbeat Latency: 99th percentile | Proxy forwards | | Unhealthy RPC Connections | Proxy forward errors | | Proxy requests | | | Proxy request errors | | | Proxy forwards | | Epic: none
1 parent f6e9d3f commit 0a03a0f

File tree

1 file changed

+54
-84
lines changed
  • pkg/ui/workspaces/db-console/src/views/cluster/containers/nodeGraphs/dashboards

1 file changed

+54
-84
lines changed

pkg/ui/workspaces/db-console/src/views/cluster/containers/nodeGraphs/dashboards/networking.tsx

Lines changed: 54 additions & 84 deletions
Original file line numberDiff line numberDiff line change
@@ -15,11 +15,11 @@ export default function (props: GraphDashboardProps) {
1515
const { nodeIDs, nodeDisplayNameByID, tenantSource } = props;
1616

1717
return [
18-
<LineGraph title="Network Bytes Received" showMetricsInTooltip={true}>
18+
<LineGraph title="Network Bytes Sent" showMetricsInTooltip={true}>
1919
<Axis units={AxisUnits.Bytes} label="bytes">
2020
{nodeIDs.map(nid => (
2121
<Metric
22-
name="cr.node.sys.host.net.recv.bytes"
22+
name="cr.node.sys.host.net.send.bytes"
2323
title={nodeDisplayName(nodeDisplayNameByID, nid)}
2424
sources={[nid]}
2525
tenantSource={tenantSource}
@@ -29,11 +29,11 @@ export default function (props: GraphDashboardProps) {
2929
</Axis>
3030
</LineGraph>,
3131

32-
<LineGraph title="Network Packets Received" showMetricsInTooltip={true}>
33-
<Axis units={AxisUnits.Count} label="packets">
32+
<LineGraph title="Network Bytes Received" showMetricsInTooltip={true}>
33+
<Axis units={AxisUnits.Bytes} label="bytes">
3434
{nodeIDs.map(nid => (
3535
<Metric
36-
name="cr.node.sys.host.net.recv.packets"
36+
name="cr.node.sys.host.net.recv.bytes"
3737
title={nodeDisplayName(nodeDisplayNameByID, nid)}
3838
sources={[nid]}
3939
tenantSource={tenantSource}
@@ -44,150 +44,120 @@ export default function (props: GraphDashboardProps) {
4444
</LineGraph>,
4545

4646
<LineGraph
47-
title="Network Packet Errors on Receive"
47+
title="RPC Heartbeat Latency: 50th percentile"
48+
isKvGraph={false}
49+
tooltip={`Round-trip latency for recent successful outgoing heartbeats.`}
4850
showMetricsInTooltip={true}
4951
>
50-
<Axis units={AxisUnits.Count} label="packets">
52+
<Axis units={AxisUnits.Duration} label="latency">
5153
{nodeIDs.map(nid => (
5254
<Metric
53-
name="cr.node.sys.host.net.recv.err"
55+
name="cr.node.round-trip-latency-p50"
5456
title={nodeDisplayName(nodeDisplayNameByID, nid)}
5557
sources={[nid]}
5658
tenantSource={tenantSource}
57-
nonNegativeRate
59+
downsampleMax
5860
/>
5961
))}
6062
</Axis>
6163
</LineGraph>,
6264

6365
<LineGraph
64-
title="Network Packet Drops on Receive"
66+
title="RPC Heartbeat Latency: 99th percentile"
67+
isKvGraph={false}
68+
tooltip={`Round-trip latency for recent successful outgoing heartbeats.`}
6569
showMetricsInTooltip={true}
6670
>
67-
<Axis units={AxisUnits.Count} label="packets">
68-
{nodeIDs.map(nid => (
69-
<Metric
70-
name="cr.node.sys.host.net.recv.drop"
71-
title={nodeDisplayName(nodeDisplayNameByID, nid)}
72-
sources={[nid]}
73-
tenantSource={tenantSource}
74-
nonNegativeRate
75-
/>
76-
))}
77-
</Axis>
78-
</LineGraph>,
79-
80-
<LineGraph title="Network Bytes Sent" showMetricsInTooltip={true}>
81-
<Axis units={AxisUnits.Bytes} label="bytes">
71+
<Axis units={AxisUnits.Duration} label="latency">
8272
{nodeIDs.map(nid => (
8373
<Metric
84-
name="cr.node.sys.host.net.send.bytes"
74+
name="cr.node.round-trip-latency-p99"
8575
title={nodeDisplayName(nodeDisplayNameByID, nid)}
8676
sources={[nid]}
8777
tenantSource={tenantSource}
88-
nonNegativeRate
78+
downsampleMax
8979
/>
9080
))}
9181
</Axis>
9282
</LineGraph>,
9383

94-
<LineGraph title="Network Packets Sent" showMetricsInTooltip={true}>
95-
<Axis units={AxisUnits.Count} label="packets">
84+
<LineGraph
85+
title="Unhealthy RPC Connections"
86+
tooltip={`The number of outgoing connections on each node that are in an
87+
unhealthy state.`}
88+
showMetricsInTooltip={true}
89+
>
90+
<Axis label="connections">
9691
{nodeIDs.map(nid => (
9792
<Metric
98-
name="cr.node.sys.host.net.send.packets"
93+
key={nid}
94+
name="cr.node.rpc.connection.unhealthy"
9995
title={nodeDisplayName(nodeDisplayNameByID, nid)}
10096
sources={[nid]}
10197
tenantSource={tenantSource}
102-
nonNegativeRate
10398
/>
10499
))}
105100
</Axis>
106101
</LineGraph>,
107102

108103
<LineGraph
109-
title="Network Packet Errors on Send"
104+
title="Network Packet Errors and Drops"
110105
showMetricsInTooltip={true}
111106
>
112107
<Axis units={AxisUnits.Count} label="packets">
113-
{nodeIDs.map(nid => (
108+
{nodeIDs.flatMap(nid => [
114109
<Metric
115-
name="cr.node.sys.host.net.send.err"
116-
title={nodeDisplayName(nodeDisplayNameByID, nid)}
110+
key={`${nid}-recv-err`}
111+
name="cr.node.sys.host.net.recv.err"
112+
title={`${nodeDisplayName(nodeDisplayNameByID, nid)} - Recv Errors`}
117113
sources={[nid]}
118114
tenantSource={tenantSource}
119115
nonNegativeRate
120-
/>
121-
))}
122-
</Axis>
123-
</LineGraph>,
124-
125-
<LineGraph title="Network Packet Drops on Send" showMetricsInTooltip={true}>
126-
<Axis units={AxisUnits.Count} label="packets">
127-
{nodeIDs.map(nid => (
116+
/>,
128117
<Metric
129-
name="cr.node.sys.host.net.send.drop"
130-
title={nodeDisplayName(nodeDisplayNameByID, nid)}
118+
key={`${nid}-recv-drop`}
119+
name="cr.node.sys.host.net.recv.drop"
120+
title={`${nodeDisplayName(nodeDisplayNameByID, nid)} - Recv Drops`}
131121
sources={[nid]}
132122
tenantSource={tenantSource}
133123
nonNegativeRate
134-
/>
135-
))}
136-
</Axis>
137-
</LineGraph>,
138-
139-
<LineGraph
140-
title="RPC Heartbeat Latency: 50th percentile"
141-
isKvGraph={false}
142-
tooltip={`Round-trip latency for recent successful outgoing heartbeats.`}
143-
showMetricsInTooltip={true}
144-
>
145-
<Axis units={AxisUnits.Duration} label="latency">
146-
{nodeIDs.map(nid => (
124+
/>,
147125
<Metric
148-
name="cr.node.round-trip-latency-p50"
149-
title={nodeDisplayName(nodeDisplayNameByID, nid)}
126+
key={`${nid}-send-err`}
127+
name="cr.node.sys.host.net.send.err"
128+
title={`${nodeDisplayName(nodeDisplayNameByID, nid)} - Send Errors`}
150129
sources={[nid]}
151130
tenantSource={tenantSource}
152-
downsampleMax
153-
/>
154-
))}
155-
</Axis>
156-
</LineGraph>,
157-
158-
<LineGraph
159-
title="RPC Heartbeat Latency: 99th percentile"
160-
isKvGraph={false}
161-
tooltip={`Round-trip latency for recent successful outgoing heartbeats.`}
162-
showMetricsInTooltip={true}
163-
>
164-
<Axis units={AxisUnits.Duration} label="latency">
165-
{nodeIDs.map(nid => (
131+
nonNegativeRate
132+
/>,
166133
<Metric
167-
name="cr.node.round-trip-latency-p99"
168-
title={nodeDisplayName(nodeDisplayNameByID, nid)}
134+
key={`${nid}-send-drop`}
135+
name="cr.node.sys.host.net.send.drop"
136+
title={`${nodeDisplayName(nodeDisplayNameByID, nid)} - Send Drops`}
169137
sources={[nid]}
170138
tenantSource={tenantSource}
171-
downsampleMax
172-
/>
173-
))}
139+
nonNegativeRate
140+
/>,
141+
])}
174142
</Axis>
175143
</LineGraph>,
176144

177145
<LineGraph
178-
title="Unhealthy RPC Connections"
179-
tooltip={`The number of outgoing connections on each node that are in an
180-
unhealthy state.`}
146+
title="TCP Retransmits"
147+
tooltip={
148+
"The number of TCP segments retransmitted. Some retransmissions are benign, but phase changes can be indicative of network congestion or overloaded peers."
149+
}
181150
showMetricsInTooltip={true}
182151
>
183-
<Axis label="connections">
152+
<Axis label="segments">
184153
{nodeIDs.map(nid => (
185154
<Metric
186155
key={nid}
187-
name="cr.node.rpc.connection.unhealthy"
156+
name="cr.node.sys.host.net.send.tcp.retrans_segs"
188157
title={nodeDisplayName(nodeDisplayNameByID, nid)}
189158
sources={[nid]}
190159
tenantSource={tenantSource}
160+
nonNegativeRate
191161
/>
192162
))}
193163
</Axis>

0 commit comments

Comments
 (0)