Skip to content

Commit 06baf9b

Browse files
committed
Merge branch 'tcp-receiver-changes'
Eric Dumazet says: ==================== tcp: receiver changes Before accepting an incoming packet: - Make sure to not accept a packet beyond advertized RWIN. If not, increment a new SNMP counter (LINUX_MIB_BEYOND_WINDOW) - ooo packets should update rcv_mss and tp->scaling_ratio. - Make sure to not accept packet beyond sk_rcvbuf limit. This series includes three associated packetdrill tests. ==================== Link: https://patch.msgid.link/[email protected] Signed-off-by: Jakub Kicinski <[email protected]>
2 parents a86eb2a + 906893c commit 06baf9b

File tree

9 files changed

+152
-14
lines changed

9 files changed

+152
-14
lines changed

Documentation/networking/net_cachelines/snmp.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@ unsigned_long LINUX_MIB_TIMEWAITRECYCLED
3636
unsigned_long LINUX_MIB_TIMEWAITKILLED
3737
unsigned_long LINUX_MIB_PAWSACTIVEREJECTED
3838
unsigned_long LINUX_MIB_PAWSESTABREJECTED
39+
unsigned_long LINUX_MIB_BEYOND_WINDOW
3940
unsigned_long LINUX_MIB_TSECR_REJECTED
4041
unsigned_long LINUX_MIB_PAWS_OLD_ACK
4142
unsigned_long LINUX_MIB_PAWS_TW_REJECTED

include/net/dropreason-core.h

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,7 @@
4545
FN(TCP_LISTEN_OVERFLOW) \
4646
FN(TCP_OLD_SEQUENCE) \
4747
FN(TCP_INVALID_SEQUENCE) \
48+
FN(TCP_INVALID_END_SEQUENCE) \
4849
FN(TCP_INVALID_ACK_SEQUENCE) \
4950
FN(TCP_RESET) \
5051
FN(TCP_INVALID_SYN) \
@@ -303,8 +304,14 @@ enum skb_drop_reason {
303304
SKB_DROP_REASON_TCP_LISTEN_OVERFLOW,
304305
/** @SKB_DROP_REASON_TCP_OLD_SEQUENCE: Old SEQ field (duplicate packet) */
305306
SKB_DROP_REASON_TCP_OLD_SEQUENCE,
306-
/** @SKB_DROP_REASON_TCP_INVALID_SEQUENCE: Not acceptable SEQ field */
307+
/** @SKB_DROP_REASON_TCP_INVALID_SEQUENCE: Not acceptable SEQ field. */
307308
SKB_DROP_REASON_TCP_INVALID_SEQUENCE,
309+
/**
310+
* @SKB_DROP_REASON_TCP_INVALID_END_SEQUENCE:
311+
* Not acceptable END_SEQ field.
312+
* Corresponds to LINUX_MIB_BEYOND_WINDOW.
313+
*/
314+
SKB_DROP_REASON_TCP_INVALID_END_SEQUENCE,
308315
/**
309316
* @SKB_DROP_REASON_TCP_INVALID_ACK_SEQUENCE: Not acceptable ACK SEQ
310317
* field because ack sequence is not in the window between snd_una

include/net/sock.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1553,7 +1553,7 @@ __sk_rmem_schedule(struct sock *sk, int size, bool pfmemalloc)
15531553
}
15541554

15551555
static inline bool
1556-
sk_rmem_schedule(struct sock *sk, struct sk_buff *skb, int size)
1556+
sk_rmem_schedule(struct sock *sk, const struct sk_buff *skb, int size)
15571557
{
15581558
return __sk_rmem_schedule(sk, size, skb_pfmemalloc(skb));
15591559
}

include/uapi/linux/snmp.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -186,6 +186,7 @@ enum
186186
LINUX_MIB_TIMEWAITKILLED, /* TimeWaitKilled */
187187
LINUX_MIB_PAWSACTIVEREJECTED, /* PAWSActiveRejected */
188188
LINUX_MIB_PAWSESTABREJECTED, /* PAWSEstabRejected */
189+
LINUX_MIB_BEYOND_WINDOW, /* BeyondWindow */
189190
LINUX_MIB_TSECRREJECTED, /* TSEcrRejected */
190191
LINUX_MIB_PAWS_OLD_ACK, /* PAWSOldAck */
191192
LINUX_MIB_PAWS_TW_REJECTED, /* PAWSTimewait */

net/ipv4/proc.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -189,6 +189,7 @@ static const struct snmp_mib snmp4_net_list[] = {
189189
SNMP_MIB_ITEM("TWKilled", LINUX_MIB_TIMEWAITKILLED),
190190
SNMP_MIB_ITEM("PAWSActive", LINUX_MIB_PAWSACTIVEREJECTED),
191191
SNMP_MIB_ITEM("PAWSEstab", LINUX_MIB_PAWSESTABREJECTED),
192+
SNMP_MIB_ITEM("BeyondWindow", LINUX_MIB_BEYOND_WINDOW),
192193
SNMP_MIB_ITEM("TSEcrRejected", LINUX_MIB_TSECRREJECTED),
193194
SNMP_MIB_ITEM("PAWSOldAck", LINUX_MIB_PAWS_OLD_ACK),
194195
SNMP_MIB_ITEM("PAWSTimewait", LINUX_MIB_PAWS_TW_REJECTED),

net/ipv4/tcp_input.c

Lines changed: 36 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -4391,14 +4391,22 @@ static enum skb_drop_reason tcp_disordered_ack_check(const struct sock *sk,
43914391
* (borrowed from freebsd)
43924392
*/
43934393

4394-
static enum skb_drop_reason tcp_sequence(const struct tcp_sock *tp,
4394+
static enum skb_drop_reason tcp_sequence(const struct sock *sk,
43954395
u32 seq, u32 end_seq)
43964396
{
4397+
const struct tcp_sock *tp = tcp_sk(sk);
4398+
43974399
if (before(end_seq, tp->rcv_wup))
43984400
return SKB_DROP_REASON_TCP_OLD_SEQUENCE;
43994401

4400-
if (after(seq, tp->rcv_nxt + tcp_receive_window(tp)))
4401-
return SKB_DROP_REASON_TCP_INVALID_SEQUENCE;
4402+
if (after(end_seq, tp->rcv_nxt + tcp_receive_window(tp))) {
4403+
if (after(seq, tp->rcv_nxt + tcp_receive_window(tp)))
4404+
return SKB_DROP_REASON_TCP_INVALID_SEQUENCE;
4405+
4406+
/* Only accept this packet if receive queue is empty. */
4407+
if (skb_queue_len(&sk->sk_receive_queue))
4408+
return SKB_DROP_REASON_TCP_INVALID_END_SEQUENCE;
4409+
}
44024410

44034411
return SKB_NOT_DROPPED_YET;
44044412
}
@@ -4880,10 +4888,20 @@ static void tcp_ofo_queue(struct sock *sk)
48804888
static bool tcp_prune_ofo_queue(struct sock *sk, const struct sk_buff *in_skb);
48814889
static int tcp_prune_queue(struct sock *sk, const struct sk_buff *in_skb);
48824890

4883-
static int tcp_try_rmem_schedule(struct sock *sk, struct sk_buff *skb,
4891+
/* Check if this incoming skb can be added to socket receive queues
4892+
* while satisfying sk->sk_rcvbuf limit.
4893+
*/
4894+
static bool tcp_can_ingest(const struct sock *sk, const struct sk_buff *skb)
4895+
{
4896+
unsigned int new_mem = atomic_read(&sk->sk_rmem_alloc) + skb->truesize;
4897+
4898+
return new_mem <= sk->sk_rcvbuf;
4899+
}
4900+
4901+
static int tcp_try_rmem_schedule(struct sock *sk, const struct sk_buff *skb,
48844902
unsigned int size)
48854903
{
4886-
if (atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf ||
4904+
if (!tcp_can_ingest(sk, skb) ||
48874905
!sk_rmem_schedule(sk, skb, size)) {
48884906

48894907
if (tcp_prune_queue(sk, skb) < 0)
@@ -4915,6 +4933,7 @@ static void tcp_data_queue_ofo(struct sock *sk, struct sk_buff *skb)
49154933
return;
49164934
}
49174935

4936+
tcp_measure_rcv_mss(sk, skb);
49184937
/* Disable header prediction. */
49194938
tp->pred_flags = 0;
49204939
inet_csk_schedule_ack(sk);
@@ -5498,7 +5517,7 @@ static bool tcp_prune_ofo_queue(struct sock *sk, const struct sk_buff *in_skb)
54985517
tcp_drop_reason(sk, skb, SKB_DROP_REASON_TCP_OFO_QUEUE_PRUNE);
54995518
tp->ooo_last_skb = rb_to_skb(prev);
55005519
if (!prev || goal <= 0) {
5501-
if (atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf &&
5520+
if (tcp_can_ingest(sk, skb) &&
55025521
!tcp_under_memory_pressure(sk))
55035522
break;
55045523
goal = sk->sk_rcvbuf >> 3;
@@ -5532,12 +5551,12 @@ static int tcp_prune_queue(struct sock *sk, const struct sk_buff *in_skb)
55325551

55335552
NET_INC_STATS(sock_net(sk), LINUX_MIB_PRUNECALLED);
55345553

5535-
if (atomic_read(&sk->sk_rmem_alloc) >= sk->sk_rcvbuf)
5554+
if (!tcp_can_ingest(sk, in_skb))
55365555
tcp_clamp_window(sk);
55375556
else if (tcp_under_memory_pressure(sk))
55385557
tcp_adjust_rcv_ssthresh(sk);
55395558

5540-
if (atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf)
5559+
if (tcp_can_ingest(sk, in_skb))
55415560
return 0;
55425561

55435562
tcp_collapse_ofo_queue(sk);
@@ -5547,15 +5566,15 @@ static int tcp_prune_queue(struct sock *sk, const struct sk_buff *in_skb)
55475566
NULL,
55485567
tp->copied_seq, tp->rcv_nxt);
55495568

5550-
if (atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf)
5569+
if (tcp_can_ingest(sk, in_skb))
55515570
return 0;
55525571

55535572
/* Collapsing did not help, destructive actions follow.
55545573
* This must not ever occur. */
55555574

55565575
tcp_prune_ofo_queue(sk, in_skb);
55575576

5558-
if (atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf)
5577+
if (tcp_can_ingest(sk, in_skb))
55595578
return 0;
55605579

55615580
/* If we are really being abused, tell the caller to silently
@@ -5881,7 +5900,7 @@ static bool tcp_validate_incoming(struct sock *sk, struct sk_buff *skb,
58815900

58825901
step1:
58835902
/* Step 1: check sequence number */
5884-
reason = tcp_sequence(tp, TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb)->end_seq);
5903+
reason = tcp_sequence(sk, TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb)->end_seq);
58855904
if (reason) {
58865905
/* RFC793, page 37: "In all states except SYN-SENT, all reset
58875906
* (RST) segments are validated by checking their SEQ-fields."
@@ -5892,6 +5911,7 @@ static bool tcp_validate_incoming(struct sock *sk, struct sk_buff *skb,
58925911
if (!th->rst) {
58935912
if (th->syn)
58945913
goto syn_challenge;
5914+
NET_INC_STATS(sock_net(sk), LINUX_MIB_BEYOND_WINDOW);
58955915
if (!tcp_oow_rate_limited(sock_net(sk), skb,
58965916
LINUX_MIB_TCPACKSKIPPEDSEQ,
58975917
&tp->last_oow_ack_time))
@@ -6110,6 +6130,10 @@ void tcp_rcv_established(struct sock *sk, struct sk_buff *skb)
61106130
if (tcp_checksum_complete(skb))
61116131
goto csum_error;
61126132

6133+
if (after(TCP_SKB_CB(skb)->end_seq,
6134+
tp->rcv_nxt + tcp_receive_window(tp)))
6135+
goto validate;
6136+
61136137
if ((int)skb->truesize > sk->sk_forward_alloc)
61146138
goto step5;
61156139

@@ -6165,7 +6189,7 @@ void tcp_rcv_established(struct sock *sk, struct sk_buff *skb)
61656189
/*
61666190
* Standard slow path.
61676191
*/
6168-
6192+
validate:
61696193
if (!tcp_validate_incoming(sk, skb, th, 1))
61706194
return;
61716195

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
// SPDX-License-Identifier: GPL-2.0
2+
3+
--mss=1000
4+
5+
`./defaults.sh
6+
sysctl -q net.ipv4.tcp_rmem="4096 131072 $((32*1024*1024))"`
7+
8+
+0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
9+
+0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
10+
+0 bind(3, ..., ...) = 0
11+
+0 listen(3, 1) = 0
12+
13+
+0 < S 0:0(0) win 65535 <mss 1000,nop,nop,sackOK,nop,wscale 7>
14+
+0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 10>
15+
+.1 < . 1:1(0) ack 1 win 257
16+
17+
+0 accept(3, ..., ...) = 4
18+
19+
+0 < . 2001:11001(9000) ack 1 win 257
20+
+0 > . 1:1(0) ack 1 win 81 <nop,nop,sack 2001:11001>
21+
22+
// check that ooo packet properly updates tcpi_rcv_mss
23+
+0 %{ assert tcpi_rcv_mss == 1000, tcpi_rcv_mss }%
24+
25+
+0 < . 11001:21001(10000) ack 1 win 257
26+
+0 > . 1:1(0) ack 1 win 81 <nop,nop,sack 2001:21001>
27+
Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,44 @@
1+
// SPDX-License-Identifier: GPL-2.0
2+
3+
--mss=1000
4+
5+
`./defaults.sh`
6+
7+
0 `nstat -n`
8+
9+
// Establish a connection.
10+
+0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
11+
+0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
12+
+0 setsockopt(3, SOL_SOCKET, SO_RCVBUF, [10000], 4) = 0
13+
+0 bind(3, ..., ...) = 0
14+
+0 listen(3, 1) = 0
15+
16+
+0 < S 0:0(0) win 32792 <mss 1000,nop,wscale 7>
17+
+0 > S. 0:0(0) ack 1 <mss 1460,nop,wscale 0>
18+
+.1 < . 1:1(0) ack 1 win 257
19+
20+
+0 accept(3, ..., ...) = 4
21+
22+
+0 < P. 1:4001(4000) ack 1 win 257
23+
+0 > . 1:1(0) ack 4001 win 5000
24+
25+
// packet in sequence : SKB_DROP_REASON_TCP_INVALID_END_SEQUENCE / LINUX_MIB_BEYOND_WINDOW
26+
+0 < P. 4001:54001(50000) ack 1 win 257
27+
+0 > . 1:1(0) ack 4001 win 5000
28+
29+
// ooo packet. : SKB_DROP_REASON_TCP_INVALID_END_SEQUENCE / LINUX_MIB_BEYOND_WINDOW
30+
+1 < P. 5001:55001(50000) ack 1 win 257
31+
+0 > . 1:1(0) ack 4001 win 5000
32+
33+
// SKB_DROP_REASON_TCP_INVALID_SEQUENCE / LINUX_MIB_BEYOND_WINDOW
34+
+0 < P. 70001:80001(10000) ack 1 win 257
35+
+0 > . 1:1(0) ack 4001 win 5000
36+
37+
+0 read(4, ..., 100000) = 4000
38+
39+
// If queue is empty, accept a packet even if its end_seq is above wup + rcv_wnd
40+
+0 < P. 4001:54001(50000) ack 1 win 257
41+
+.040 > . 1:1(0) ack 54001 win 0
42+
43+
// Check LINUX_MIB_BEYOND_WINDOW has been incremented 3 times.
44+
+0 `nstat | grep TcpExtBeyondWindow | grep -q " 3 "`
Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
// SPDX-License-Identifier: GPL-2.0
2+
3+
--mss=1000
4+
5+
`./defaults.sh`
6+
7+
0 `nstat -n`
8+
9+
// Establish a connection.
10+
+0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
11+
+0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
12+
+0 setsockopt(3, SOL_SOCKET, SO_RCVBUF, [20000], 4) = 0
13+
+0 bind(3, ..., ...) = 0
14+
+0 listen(3, 1) = 0
15+
16+
+0 < S 0:0(0) win 32792 <mss 1000,nop,wscale 7>
17+
+0 > S. 0:0(0) ack 1 win 18980 <mss 1460,nop,wscale 0>
18+
+.1 < . 1:1(0) ack 1 win 257
19+
20+
+0 accept(3, ..., ...) = 4
21+
22+
+0 < P. 1:20001(20000) ack 1 win 257
23+
+.04 > . 1:1(0) ack 20001 win 18000
24+
25+
+0 setsockopt(4, SOL_SOCKET, SO_RCVBUF, [12000], 4) = 0
26+
+0 < P. 20001:80001(60000) ack 1 win 257
27+
+0 > . 1:1(0) ack 20001 win 18000
28+
29+
+0 read(4, ..., 20000) = 20000
30+
// A too big packet is accepted if the receive queue is empty
31+
+0 < P. 20001:80001(60000) ack 1 win 257
32+
+0 > . 1:1(0) ack 80001 win 0
33+

0 commit comments

Comments
 (0)