Skip to content

Commit 4629ed2

Browse files
committed
Merge tag 'rxrpc-fixes-20200520' of git://git.kernel.org/pub/scm/linux/kernel/git/dhowells/linux-fs
David Howells says: ==================== rxrpc: Fix retransmission timeout and ACK discard Here are a couple of fixes and an extra tracepoint for AF_RXRPC: (1) Calculate the RTO pretty much as TCP does, rather than making something up, including an initial 4s timeout (which causes return probes from the fileserver to fail if a packet goes missing), and add backoff. (2) Fix the discarding of out-of-order received ACKs. We mustn't let the hard-ACK point regress, nor do we want to do unnecessary retransmission because the soft-ACK list regresses. This is not trivial, however, due to some loose wording in various old protocol specs, the ACK field that should be used for this sometimes has the wrong information in it. (3) Add a tracepoint to log a discarded ACK. ==================== Signed-off-by: David S. Miller <[email protected]>
2 parents 79dde73 + 441fdee commit 4629ed2

File tree

17 files changed

+335
-159
lines changed

17 files changed

+335
-159
lines changed

fs/afs/fs_probe.c

Lines changed: 5 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -32,9 +32,8 @@ void afs_fileserver_probe_result(struct afs_call *call)
3232
struct afs_server *server = call->server;
3333
unsigned int server_index = call->server_index;
3434
unsigned int index = call->addr_ix;
35-
unsigned int rtt = UINT_MAX;
35+
unsigned int rtt_us;
3636
bool have_result = false;
37-
u64 _rtt;
3837
int ret = call->error;
3938

4039
_enter("%pU,%u", &server->uuid, index);
@@ -93,15 +92,9 @@ void afs_fileserver_probe_result(struct afs_call *call)
9392
}
9493
}
9594

96-
/* Get the RTT and scale it to fit into a 32-bit value that represents
97-
* over a minute of time so that we can access it with one instruction
98-
* on a 32-bit system.
99-
*/
100-
_rtt = rxrpc_kernel_get_rtt(call->net->socket, call->rxcall);
101-
_rtt /= 64;
102-
rtt = (_rtt > UINT_MAX) ? UINT_MAX : _rtt;
103-
if (rtt < server->probe.rtt) {
104-
server->probe.rtt = rtt;
95+
rtt_us = rxrpc_kernel_get_srtt(call->net->socket, call->rxcall);
96+
if (rtt_us < server->probe.rtt) {
97+
server->probe.rtt = rtt_us;
10598
alist->preferred = index;
10699
have_result = true;
107100
}
@@ -113,8 +106,7 @@ void afs_fileserver_probe_result(struct afs_call *call)
113106
spin_unlock(&server->probe_lock);
114107

115108
_debug("probe [%u][%u] %pISpc rtt=%u ret=%d",
116-
server_index, index, &alist->addrs[index].transport,
117-
(unsigned int)rtt, ret);
109+
server_index, index, &alist->addrs[index].transport, rtt_us, ret);
118110

119111
have_result |= afs_fs_probe_done(server);
120112
if (have_result)

fs/afs/vl_probe.c

Lines changed: 5 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -31,10 +31,9 @@ void afs_vlserver_probe_result(struct afs_call *call)
3131
struct afs_addr_list *alist = call->alist;
3232
struct afs_vlserver *server = call->vlserver;
3333
unsigned int server_index = call->server_index;
34+
unsigned int rtt_us = 0;
3435
unsigned int index = call->addr_ix;
35-
unsigned int rtt = UINT_MAX;
3636
bool have_result = false;
37-
u64 _rtt;
3837
int ret = call->error;
3938

4039
_enter("%s,%u,%u,%d,%d", server->name, server_index, index, ret, call->abort_code);
@@ -93,15 +92,9 @@ void afs_vlserver_probe_result(struct afs_call *call)
9392
}
9493
}
9594

96-
/* Get the RTT and scale it to fit into a 32-bit value that represents
97-
* over a minute of time so that we can access it with one instruction
98-
* on a 32-bit system.
99-
*/
100-
_rtt = rxrpc_kernel_get_rtt(call->net->socket, call->rxcall);
101-
_rtt /= 64;
102-
rtt = (_rtt > UINT_MAX) ? UINT_MAX : _rtt;
103-
if (rtt < server->probe.rtt) {
104-
server->probe.rtt = rtt;
95+
rtt_us = rxrpc_kernel_get_srtt(call->net->socket, call->rxcall);
96+
if (rtt_us < server->probe.rtt) {
97+
server->probe.rtt = rtt_us;
10598
alist->preferred = index;
10699
have_result = true;
107100
}
@@ -113,8 +106,7 @@ void afs_vlserver_probe_result(struct afs_call *call)
113106
spin_unlock(&server->probe_lock);
114107

115108
_debug("probe [%u][%u] %pISpc rtt=%u ret=%d",
116-
server_index, index, &alist->addrs[index].transport,
117-
(unsigned int)rtt, ret);
109+
server_index, index, &alist->addrs[index].transport, rtt_us, ret);
118110

119111
have_result |= afs_vl_probe_done(server);
120112
if (have_result) {

include/net/af_rxrpc.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -59,7 +59,7 @@ bool rxrpc_kernel_abort_call(struct socket *, struct rxrpc_call *,
5959
void rxrpc_kernel_end_call(struct socket *, struct rxrpc_call *);
6060
void rxrpc_kernel_get_peer(struct socket *, struct rxrpc_call *,
6161
struct sockaddr_rxrpc *);
62-
u64 rxrpc_kernel_get_rtt(struct socket *, struct rxrpc_call *);
62+
u32 rxrpc_kernel_get_srtt(struct socket *, struct rxrpc_call *);
6363
int rxrpc_kernel_charge_accept(struct socket *, rxrpc_notify_rx_t,
6464
rxrpc_user_attach_call_t, unsigned long, gfp_t,
6565
unsigned int);

include/trace/events/rxrpc.h

Lines changed: 42 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1112,18 +1112,17 @@ TRACE_EVENT(rxrpc_rtt_tx,
11121112
TRACE_EVENT(rxrpc_rtt_rx,
11131113
TP_PROTO(struct rxrpc_call *call, enum rxrpc_rtt_rx_trace why,
11141114
rxrpc_serial_t send_serial, rxrpc_serial_t resp_serial,
1115-
s64 rtt, u8 nr, s64 avg),
1115+
u32 rtt, u32 rto),
11161116

1117-
TP_ARGS(call, why, send_serial, resp_serial, rtt, nr, avg),
1117+
TP_ARGS(call, why, send_serial, resp_serial, rtt, rto),
11181118

11191119
TP_STRUCT__entry(
11201120
__field(unsigned int, call )
11211121
__field(enum rxrpc_rtt_rx_trace, why )
1122-
__field(u8, nr )
11231122
__field(rxrpc_serial_t, send_serial )
11241123
__field(rxrpc_serial_t, resp_serial )
1125-
__field(s64, rtt )
1126-
__field(u64, avg )
1124+
__field(u32, rtt )
1125+
__field(u32, rto )
11271126
),
11281127

11291128
TP_fast_assign(
@@ -1132,18 +1131,16 @@ TRACE_EVENT(rxrpc_rtt_rx,
11321131
__entry->send_serial = send_serial;
11331132
__entry->resp_serial = resp_serial;
11341133
__entry->rtt = rtt;
1135-
__entry->nr = nr;
1136-
__entry->avg = avg;
1134+
__entry->rto = rto;
11371135
),
11381136

1139-
TP_printk("c=%08x %s sr=%08x rr=%08x rtt=%lld nr=%u avg=%lld",
1137+
TP_printk("c=%08x %s sr=%08x rr=%08x rtt=%u rto=%u",
11401138
__entry->call,
11411139
__print_symbolic(__entry->why, rxrpc_rtt_rx_traces),
11421140
__entry->send_serial,
11431141
__entry->resp_serial,
11441142
__entry->rtt,
1145-
__entry->nr,
1146-
__entry->avg)
1143+
__entry->rto)
11471144
);
11481145

11491146
TRACE_EVENT(rxrpc_timer,
@@ -1544,6 +1541,41 @@ TRACE_EVENT(rxrpc_notify_socket,
15441541
__entry->serial)
15451542
);
15461543

1544+
TRACE_EVENT(rxrpc_rx_discard_ack,
1545+
TP_PROTO(unsigned int debug_id, rxrpc_serial_t serial,
1546+
rxrpc_seq_t first_soft_ack, rxrpc_seq_t call_ackr_first,
1547+
rxrpc_seq_t prev_pkt, rxrpc_seq_t call_ackr_prev),
1548+
1549+
TP_ARGS(debug_id, serial, first_soft_ack, call_ackr_first,
1550+
prev_pkt, call_ackr_prev),
1551+
1552+
TP_STRUCT__entry(
1553+
__field(unsigned int, debug_id )
1554+
__field(rxrpc_serial_t, serial )
1555+
__field(rxrpc_seq_t, first_soft_ack)
1556+
__field(rxrpc_seq_t, call_ackr_first)
1557+
__field(rxrpc_seq_t, prev_pkt)
1558+
__field(rxrpc_seq_t, call_ackr_prev)
1559+
),
1560+
1561+
TP_fast_assign(
1562+
__entry->debug_id = debug_id;
1563+
__entry->serial = serial;
1564+
__entry->first_soft_ack = first_soft_ack;
1565+
__entry->call_ackr_first = call_ackr_first;
1566+
__entry->prev_pkt = prev_pkt;
1567+
__entry->call_ackr_prev = call_ackr_prev;
1568+
),
1569+
1570+
TP_printk("c=%08x r=%08x %08x<%08x %08x<%08x",
1571+
__entry->debug_id,
1572+
__entry->serial,
1573+
__entry->first_soft_ack,
1574+
__entry->call_ackr_first,
1575+
__entry->prev_pkt,
1576+
__entry->call_ackr_prev)
1577+
);
1578+
15471579
#endif /* _TRACE_RXRPC_H */
15481580

15491581
/* This part must be outside protection */

net/rxrpc/Makefile

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@ rxrpc-y := \
2525
peer_event.o \
2626
peer_object.o \
2727
recvmsg.o \
28+
rtt.o \
2829
security.o \
2930
sendmsg.o \
3031
skbuff.o \

net/rxrpc/ar-internal.h

Lines changed: 17 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77

88
#include <linux/atomic.h>
99
#include <linux/seqlock.h>
10+
#include <linux/win_minmax.h>
1011
#include <net/net_namespace.h>
1112
#include <net/netns/generic.h>
1213
#include <net/sock.h>
@@ -311,11 +312,14 @@ struct rxrpc_peer {
311312
#define RXRPC_RTT_CACHE_SIZE 32
312313
spinlock_t rtt_input_lock; /* RTT lock for input routine */
313314
ktime_t rtt_last_req; /* Time of last RTT request */
314-
u64 rtt; /* Current RTT estimate (in nS) */
315-
u64 rtt_sum; /* Sum of cache contents */
316-
u64 rtt_cache[RXRPC_RTT_CACHE_SIZE]; /* Determined RTT cache */
317-
u8 rtt_cursor; /* next entry at which to insert */
318-
u8 rtt_usage; /* amount of cache actually used */
315+
unsigned int rtt_count; /* Number of samples we've got */
316+
317+
u32 srtt_us; /* smoothed round trip time << 3 in usecs */
318+
u32 mdev_us; /* medium deviation */
319+
u32 mdev_max_us; /* maximal mdev for the last rtt period */
320+
u32 rttvar_us; /* smoothed mdev_max */
321+
u32 rto_j; /* Retransmission timeout in jiffies */
322+
u8 backoff; /* Backoff timeout */
319323

320324
u8 cong_cwnd; /* Congestion window size */
321325
};
@@ -1041,7 +1045,6 @@ extern unsigned long rxrpc_idle_ack_delay;
10411045
extern unsigned int rxrpc_rx_window_size;
10421046
extern unsigned int rxrpc_rx_mtu;
10431047
extern unsigned int rxrpc_rx_jumbo_max;
1044-
extern unsigned long rxrpc_resend_timeout;
10451048

10461049
extern const s8 rxrpc_ack_priority[];
10471050

@@ -1069,8 +1072,6 @@ void rxrpc_send_keepalive(struct rxrpc_peer *);
10691072
* peer_event.c
10701073
*/
10711074
void rxrpc_error_report(struct sock *);
1072-
void rxrpc_peer_add_rtt(struct rxrpc_call *, enum rxrpc_rtt_rx_trace,
1073-
rxrpc_serial_t, rxrpc_serial_t, ktime_t, ktime_t);
10741075
void rxrpc_peer_keepalive_worker(struct work_struct *);
10751076

10761077
/*
@@ -1102,6 +1103,14 @@ extern const struct seq_operations rxrpc_peer_seq_ops;
11021103
void rxrpc_notify_socket(struct rxrpc_call *);
11031104
int rxrpc_recvmsg(struct socket *, struct msghdr *, size_t, int);
11041105

1106+
/*
1107+
* rtt.c
1108+
*/
1109+
void rxrpc_peer_add_rtt(struct rxrpc_call *, enum rxrpc_rtt_rx_trace,
1110+
rxrpc_serial_t, rxrpc_serial_t, ktime_t, ktime_t);
1111+
unsigned long rxrpc_get_rto_backoff(struct rxrpc_peer *, bool);
1112+
void rxrpc_peer_init_rtt(struct rxrpc_peer *);
1113+
11051114
/*
11061115
* rxkad.c
11071116
*/

net/rxrpc/call_accept.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -248,7 +248,7 @@ static void rxrpc_send_ping(struct rxrpc_call *call, struct sk_buff *skb)
248248
struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
249249
ktime_t now = skb->tstamp;
250250

251-
if (call->peer->rtt_usage < 3 ||
251+
if (call->peer->rtt_count < 3 ||
252252
ktime_before(ktime_add_ms(call->peer->rtt_last_req, 1000), now))
253253
rxrpc_propose_ACK(call, RXRPC_ACK_PING, sp->hdr.serial,
254254
true, true,

net/rxrpc/call_event.c

Lines changed: 8 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -111,8 +111,8 @@ static void __rxrpc_propose_ACK(struct rxrpc_call *call, u8 ack_reason,
111111
} else {
112112
unsigned long now = jiffies, ack_at;
113113

114-
if (call->peer->rtt_usage > 0)
115-
ack_at = nsecs_to_jiffies(call->peer->rtt);
114+
if (call->peer->srtt_us != 0)
115+
ack_at = usecs_to_jiffies(call->peer->srtt_us >> 3);
116116
else
117117
ack_at = expiry;
118118

@@ -157,24 +157,18 @@ static void rxrpc_congestion_timeout(struct rxrpc_call *call)
157157
static void rxrpc_resend(struct rxrpc_call *call, unsigned long now_j)
158158
{
159159
struct sk_buff *skb;
160-
unsigned long resend_at;
160+
unsigned long resend_at, rto_j;
161161
rxrpc_seq_t cursor, seq, top;
162-
ktime_t now, max_age, oldest, ack_ts, timeout, min_timeo;
162+
ktime_t now, max_age, oldest, ack_ts;
163163
int ix;
164164
u8 annotation, anno_type, retrans = 0, unacked = 0;
165165

166166
_enter("{%d,%d}", call->tx_hard_ack, call->tx_top);
167167

168-
if (call->peer->rtt_usage > 1)
169-
timeout = ns_to_ktime(call->peer->rtt * 3 / 2);
170-
else
171-
timeout = ms_to_ktime(rxrpc_resend_timeout);
172-
min_timeo = ns_to_ktime((1000000000 / HZ) * 4);
173-
if (ktime_before(timeout, min_timeo))
174-
timeout = min_timeo;
168+
rto_j = call->peer->rto_j;
175169

176170
now = ktime_get_real();
177-
max_age = ktime_sub(now, timeout);
171+
max_age = ktime_sub(now, jiffies_to_usecs(rto_j));
178172

179173
spin_lock_bh(&call->lock);
180174

@@ -219,7 +213,7 @@ static void rxrpc_resend(struct rxrpc_call *call, unsigned long now_j)
219213
}
220214

221215
resend_at = nsecs_to_jiffies(ktime_to_ns(ktime_sub(now, oldest)));
222-
resend_at += jiffies + rxrpc_resend_timeout;
216+
resend_at += jiffies + rto_j;
223217
WRITE_ONCE(call->resend_at, resend_at);
224218

225219
if (unacked)
@@ -234,7 +228,7 @@ static void rxrpc_resend(struct rxrpc_call *call, unsigned long now_j)
234228
rxrpc_timer_set_for_resend);
235229
spin_unlock_bh(&call->lock);
236230
ack_ts = ktime_sub(now, call->acks_latest_ts);
237-
if (ktime_to_ns(ack_ts) < call->peer->rtt)
231+
if (ktime_to_us(ack_ts) < (call->peer->srtt_us >> 3))
238232
goto out;
239233
rxrpc_propose_ACK(call, RXRPC_ACK_PING, 0, true, false,
240234
rxrpc_propose_ack_ping_for_lost_ack);

net/rxrpc/input.c

Lines changed: 37 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -91,11 +91,11 @@ static void rxrpc_congestion_management(struct rxrpc_call *call,
9191
/* We analyse the number of packets that get ACK'd per RTT
9292
* period and increase the window if we managed to fill it.
9393
*/
94-
if (call->peer->rtt_usage == 0)
94+
if (call->peer->rtt_count == 0)
9595
goto out;
9696
if (ktime_before(skb->tstamp,
97-
ktime_add_ns(call->cong_tstamp,
98-
call->peer->rtt)))
97+
ktime_add_us(call->cong_tstamp,
98+
call->peer->srtt_us >> 3)))
9999
goto out_no_clear_ca;
100100
change = rxrpc_cong_rtt_window_end;
101101
call->cong_tstamp = skb->tstamp;
@@ -802,6 +802,30 @@ static void rxrpc_input_soft_acks(struct rxrpc_call *call, u8 *acks,
802802
}
803803
}
804804

805+
/*
806+
* Return true if the ACK is valid - ie. it doesn't appear to have regressed
807+
* with respect to the ack state conveyed by preceding ACKs.
808+
*/
809+
static bool rxrpc_is_ack_valid(struct rxrpc_call *call,
810+
rxrpc_seq_t first_pkt, rxrpc_seq_t prev_pkt)
811+
{
812+
rxrpc_seq_t base = READ_ONCE(call->ackr_first_seq);
813+
814+
if (after(first_pkt, base))
815+
return true; /* The window advanced */
816+
817+
if (before(first_pkt, base))
818+
return false; /* firstPacket regressed */
819+
820+
if (after_eq(prev_pkt, call->ackr_prev_seq))
821+
return true; /* previousPacket hasn't regressed. */
822+
823+
/* Some rx implementations put a serial number in previousPacket. */
824+
if (after_eq(prev_pkt, base + call->tx_winsize))
825+
return false;
826+
return true;
827+
}
828+
805829
/*
806830
* Process an ACK packet.
807831
*
@@ -865,9 +889,12 @@ static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb)
865889
}
866890

867891
/* Discard any out-of-order or duplicate ACKs (outside lock). */
868-
if (before(first_soft_ack, call->ackr_first_seq) ||
869-
before(prev_pkt, call->ackr_prev_seq))
892+
if (!rxrpc_is_ack_valid(call, first_soft_ack, prev_pkt)) {
893+
trace_rxrpc_rx_discard_ack(call->debug_id, sp->hdr.serial,
894+
first_soft_ack, call->ackr_first_seq,
895+
prev_pkt, call->ackr_prev_seq);
870896
return;
897+
}
871898

872899
buf.info.rxMTU = 0;
873900
ioffset = offset + nr_acks + 3;
@@ -878,9 +905,12 @@ static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb)
878905
spin_lock(&call->input_lock);
879906

880907
/* Discard any out-of-order or duplicate ACKs (inside lock). */
881-
if (before(first_soft_ack, call->ackr_first_seq) ||
882-
before(prev_pkt, call->ackr_prev_seq))
908+
if (!rxrpc_is_ack_valid(call, first_soft_ack, prev_pkt)) {
909+
trace_rxrpc_rx_discard_ack(call->debug_id, sp->hdr.serial,
910+
first_soft_ack, call->ackr_first_seq,
911+
prev_pkt, call->ackr_prev_seq);
883912
goto out;
913+
}
884914
call->acks_latest_ts = skb->tstamp;
885915

886916
call->ackr_first_seq = first_soft_ack;

net/rxrpc/misc.c

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -63,11 +63,6 @@ unsigned int rxrpc_rx_mtu = 5692;
6363
*/
6464
unsigned int rxrpc_rx_jumbo_max = 4;
6565

66-
/*
67-
* Time till packet resend (in milliseconds).
68-
*/
69-
unsigned long rxrpc_resend_timeout = 4 * HZ;
70-
7166
const s8 rxrpc_ack_priority[] = {
7267
[0] = 0,
7368
[RXRPC_ACK_DELAY] = 1,

0 commit comments

Comments
 (0)