Skip to content

Commit 3afb106

Browse files
committed
Merge branch 'tcp-move-few-fields-for-data-locality'
Eric Dumazet says: ==================== tcp: move few fields for data locality After recent additions (PSP and AccECN) I wanted to make another round on fields locations to increase data locality. This series manages to shrink TCP and TCPv6 objects by 128 bytes, but more importantly should reduce number of touched cache lines in TCP fast paths. There is more to come. v2: removed tcp CACHELINE_ASSERT_GROUP_SIZE after a kernel build bot reported an error. ==================== Link: https://patch.msgid.link/[email protected] Signed-off-by: Jakub Kicinski <[email protected]>
2 parents 4238cbf + 649091e commit 3afb106

File tree

7 files changed

+31
-39
lines changed

7 files changed

+31
-39
lines changed

Documentation/networking/net_cachelines/tcp_sock.rst

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -26,8 +26,8 @@ u64 bytes_acked read_w
2626
u32 dsack_dups
2727
u32 snd_una read_mostly read_write tcp_wnd_end,tcp_urg_mode,tcp_minshall_check,tcp_cwnd_validate(tx);tcp_ack,tcp_may_update_window,tcp_clean_rtx_queue(write),tcp_ack_tstamp(rx)
2828
u32 snd_sml read_write tcp_minshall_check,tcp_minshall_update
29-
u32 rcv_tstamp read_mostly tcp_ack
30-
void * tcp_clean_acked read_mostly tcp_ack
29+
u32 rcv_tstamp read_write read_write tcp_ack
30+
void * tcp_clean_acked read_mostly tcp_ack
3131
u32 lsndtime read_write tcp_slow_start_after_idle_check,tcp_event_data_sent
3232
u32 last_oow_ack_time
3333
u32 compressed_ack_rcv_nxt
@@ -57,7 +57,7 @@ u8:1 is_sack_reneg read_m
5757
u8:2 fastopen_client_fail
5858
u8:4 nonagle read_write tcp_skb_entail,tcp_push_pending_frames
5959
u8:1 thin_lto
60-
u8:1 recvmsg_inq
60+
u8:1 recvmsg_inq read_mostly tcp_recvmsg
6161
u8:1 repair read_mostly tcp_write_xmit
6262
u8:1 frto
6363
u8 repair_queue

include/linux/tcp.h

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -215,6 +215,9 @@ struct tcp_sock {
215215
u16 gso_segs; /* Max number of segs per GSO packet */
216216
/* from STCP, retrans queue hinting */
217217
struct sk_buff *retransmit_skb_hint;
218+
#if defined(CONFIG_TLS_DEVICE)
219+
void (*tcp_clean_acked)(struct sock *sk, u32 acked_seq);
220+
#endif
218221
__cacheline_group_end(tcp_sock_read_tx);
219222

220223
/* TXRX read-mostly hotpath cache lines */
@@ -232,28 +235,24 @@ struct tcp_sock {
232235
repair : 1,
233236
tcp_usec_ts : 1, /* TSval values in usec */
234237
is_sack_reneg:1, /* in recovery from loss with SACK reneg? */
235-
is_cwnd_limited:1;/* forward progress limited by snd_cwnd? */
238+
is_cwnd_limited:1,/* forward progress limited by snd_cwnd? */
239+
recvmsg_inq : 1;/* Indicate # of bytes in queue upon recvmsg */
236240
__cacheline_group_end(tcp_sock_read_txrx);
237241

238242
/* RX read-mostly hotpath cache lines */
239243
__cacheline_group_begin(tcp_sock_read_rx);
240244
u32 copied_seq; /* Head of yet unread data */
241-
u32 rcv_tstamp; /* timestamp of last received ACK (for keepalives) */
242245
u32 snd_wl1; /* Sequence for window update */
243246
u32 tlp_high_seq; /* snd_nxt at the time of TLP */
244247
u32 rttvar_us; /* smoothed mdev_max */
245248
u32 retrans_out; /* Retransmitted packets out */
246249
u16 advmss; /* Advertised MSS */
247250
u16 urg_data; /* Saved octet of OOB data and control flags */
248251
u32 lost; /* Total data packets lost incl. rexmits */
252+
u32 snd_ssthresh; /* Slow start size threshold */
249253
struct minmax rtt_min;
250254
/* OOO segments go in this rbtree. Socket lock must be held. */
251255
struct rb_root out_of_order_queue;
252-
#if defined(CONFIG_TLS_DEVICE)
253-
void (*tcp_clean_acked)(struct sock *sk, u32 acked_seq);
254-
#endif
255-
u32 snd_ssthresh; /* Slow start size threshold */
256-
u8 recvmsg_inq : 1;/* Indicate # of bytes in queue upon recvmsg */
257256
__cacheline_group_end(tcp_sock_read_rx);
258257

259258
/* TX read-write hotpath cache lines */
@@ -319,6 +318,7 @@ struct tcp_sock {
319318
*/
320319
u32 app_limited; /* limited until "delivered" reaches this val */
321320
u32 rcv_wnd; /* Current receiver window */
321+
u32 rcv_tstamp; /* timestamp of last received ACK (for keepalives) */
322322
/*
323323
* Options received (usually on last packet, some only on SYN packets).
324324
*/
@@ -448,6 +448,9 @@ struct tcp_sock {
448448
* the first SYN. */
449449
u32 undo_marker; /* snd_una upon a new recovery episode. */
450450
int undo_retrans; /* number of undoable retransmissions. */
451+
u32 mtu_info; /* We received an ICMP_FRAG_NEEDED / ICMPV6_PKT_TOOBIG
452+
* while socket was owned by user.
453+
*/
451454
u64 bytes_retrans; /* RFC4898 tcpEStatsPerfOctetsRetrans
452455
* Total data bytes retransmitted
453456
*/
@@ -494,9 +497,6 @@ struct tcp_sock {
494497
u32 probe_seq_end;
495498
} mtu_probe;
496499
u32 plb_rehash; /* PLB-triggered rehash attempts */
497-
u32 mtu_info; /* We received an ICMP_FRAG_NEEDED / ICMPV6_PKT_TOOBIG
498-
* while socket was owned by user.
499-
*/
500500
#if IS_ENABLED(CONFIG_MPTCP)
501501
bool is_mptcp;
502502
#endif

include/net/request_sock.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -185,8 +185,8 @@ struct fastopen_queue {
185185
struct request_sock_queue {
186186
spinlock_t rskq_lock;
187187
u8 rskq_defer_accept;
188+
u8 synflood_warned;
188189

189-
u32 synflood_warned;
190190
atomic_t qlen;
191191
atomic_t young;
192192

include/net/sock.h

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -467,7 +467,7 @@ struct sock {
467467
__cacheline_group_begin(sock_write_tx);
468468
int sk_write_pending;
469469
atomic_t sk_omem_alloc;
470-
int sk_sndbuf;
470+
int sk_err_soft;
471471

472472
int sk_wmem_queued;
473473
refcount_t sk_wmem_alloc;
@@ -492,6 +492,9 @@ struct sock {
492492
long sk_sndtimeo;
493493
u32 sk_priority;
494494
u32 sk_mark;
495+
kuid_t sk_uid;
496+
u16 sk_protocol;
497+
u16 sk_type;
495498
struct dst_entry __rcu *sk_dst_cache;
496499
netdev_features_t sk_route_caps;
497500
#ifdef CONFIG_SOCK_VALIDATE_XMIT
@@ -504,6 +507,7 @@ struct sock {
504507
unsigned int sk_gso_max_size;
505508
gfp_t sk_allocation;
506509
u32 sk_txhash;
510+
int sk_sndbuf;
507511
u8 sk_pacing_shift;
508512
bool sk_use_task_frag;
509513
__cacheline_group_end(sock_read_tx);
@@ -517,15 +521,11 @@ struct sock {
517521
sk_no_check_tx : 1,
518522
sk_no_check_rx : 1;
519523
u8 sk_shutdown;
520-
u16 sk_type;
521-
u16 sk_protocol;
522524
unsigned long sk_lingertime;
523525
struct proto *sk_prot_creator;
524526
rwlock_t sk_callback_lock;
525-
int sk_err_soft;
526527
u32 sk_ack_backlog;
527528
u32 sk_max_ack_backlog;
528-
kuid_t sk_uid;
529529
unsigned long sk_ino;
530530
spinlock_t sk_peer_lock;
531531
int sk_bind_phc;

net/core/sock.c

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4452,7 +4452,7 @@ static int __init sock_struct_check(void)
44524452

44534453
CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_write_tx, sk_omem_alloc);
44544454
CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_write_tx, sk_omem_alloc);
4455-
CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_write_tx, sk_sndbuf);
4455+
CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_write_tx, sk_err_soft);
44564456
CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_write_tx, sk_wmem_queued);
44574457
CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_write_tx, sk_wmem_alloc);
44584458
CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_write_tx, sk_tsq_flags);
@@ -4471,12 +4471,15 @@ static int __init sock_struct_check(void)
44714471
CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_read_tx, sk_sndtimeo);
44724472
CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_read_tx, sk_priority);
44734473
CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_read_tx, sk_mark);
4474+
CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_read_tx, sk_uid);
4475+
CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_read_tx, sk_protocol);
44744476
CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_read_tx, sk_dst_cache);
44754477
CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_read_tx, sk_route_caps);
44764478
CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_read_tx, sk_gso_type);
44774479
CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_read_tx, sk_gso_max_size);
44784480
CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_read_tx, sk_allocation);
44794481
CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_read_tx, sk_txhash);
4482+
CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_read_tx, sk_sndbuf);
44804483
CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_read_tx, sk_gso_max_segs);
44814484
CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_read_tx, sk_pacing_shift);
44824485
CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_read_tx, sk_use_task_frag);

net/ipv4/tcp.c

Lines changed: 4 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -5101,7 +5101,9 @@ static void __init tcp_struct_check(void)
51015101
CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_read_tx, notsent_lowat);
51025102
CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_read_tx, gso_segs);
51035103
CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_read_tx, retransmit_skb_hint);
5104-
CACHELINE_ASSERT_GROUP_SIZE(struct tcp_sock, tcp_sock_read_tx, 32);
5104+
#if IS_ENABLED(CONFIG_TLS_DEVICE)
5105+
CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_read_tx, tcp_clean_acked);
5106+
#endif
51055107

51065108
/* TXRX read-mostly hotpath cache lines */
51075109
CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_read_txrx, tsoffset);
@@ -5112,11 +5114,9 @@ static void __init tcp_struct_check(void)
51125114
CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_read_txrx, lost_out);
51135115
CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_read_txrx, sacked_out);
51145116
CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_read_txrx, scaling_ratio);
5115-
CACHELINE_ASSERT_GROUP_SIZE(struct tcp_sock, tcp_sock_read_txrx, 32);
51165117

51175118
/* RX read-mostly hotpath cache lines */
51185119
CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_read_rx, copied_seq);
5119-
CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_read_rx, rcv_tstamp);
51205120
CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_read_rx, snd_wl1);
51215121
CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_read_rx, tlp_high_seq);
51225122
CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_read_rx, rttvar_us);
@@ -5127,12 +5127,6 @@ static void __init tcp_struct_check(void)
51275127
CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_read_rx, rtt_min);
51285128
CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_read_rx, out_of_order_queue);
51295129
CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_read_rx, snd_ssthresh);
5130-
#if IS_ENABLED(CONFIG_TLS_DEVICE)
5131-
CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_read_rx, tcp_clean_acked);
5132-
CACHELINE_ASSERT_GROUP_SIZE(struct tcp_sock, tcp_sock_read_rx, 77);
5133-
#else
5134-
CACHELINE_ASSERT_GROUP_SIZE(struct tcp_sock, tcp_sock_read_rx, 69);
5135-
#endif
51365130

51375131
/* TX read-write hotpath cache lines */
51385132
CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_tx, segs_out);
@@ -5151,7 +5145,6 @@ static void __init tcp_struct_check(void)
51515145
CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_tx, tsorted_sent_queue);
51525146
CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_tx, highest_sack);
51535147
CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_tx, ecn_flags);
5154-
CACHELINE_ASSERT_GROUP_SIZE(struct tcp_sock, tcp_sock_write_tx, 97);
51555148

51565149
/* TXRX read-write hotpath cache lines */
51575150
CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_txrx, pred_flags);
@@ -5170,13 +5163,9 @@ static void __init tcp_struct_check(void)
51705163
CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_txrx, received_ecn_bytes);
51715164
CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_txrx, app_limited);
51725165
CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_txrx, rcv_wnd);
5166+
CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_txrx, rcv_tstamp);
51735167
CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_txrx, rx_opt);
51745168

5175-
/* 32bit arches with 8byte alignment on u64 fields might need padding
5176-
* before tcp_clock_cache.
5177-
*/
5178-
CACHELINE_ASSERT_GROUP_SIZE(struct tcp_sock, tcp_sock_write_txrx, 107 + 4);
5179-
51805169
/* RX read-write hotpath cache lines */
51815170
CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_rx, bytes_received);
51825171
CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_rx, segs_in);
@@ -5193,7 +5182,6 @@ static void __init tcp_struct_check(void)
51935182
CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_rx, bytes_acked);
51945183
CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_rx, rcv_rtt_est);
51955184
CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_rx, rcvq_space);
5196-
CACHELINE_ASSERT_GROUP_SIZE(struct tcp_sock, tcp_sock_write_rx, 112);
51975185
}
51985186

51995187
void __init tcp_init(void)

net/ipv4/tcp_input.c

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4085,7 +4085,8 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
40854085
/* We passed data and got it acked, remove any soft error
40864086
* log. Something worked...
40874087
*/
4088-
WRITE_ONCE(sk->sk_err_soft, 0);
4088+
if (READ_ONCE(sk->sk_err_soft))
4089+
WRITE_ONCE(sk->sk_err_soft, 0);
40894090
WRITE_ONCE(icsk->icsk_probes_out, 0);
40904091
tp->rcv_tstamp = tcp_jiffies32;
40914092
if (!prior_packets)
@@ -7281,8 +7282,8 @@ static bool tcp_syn_flood_action(struct sock *sk, const char *proto)
72817282
#endif
72827283
__NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPREQQFULLDROP);
72837284

7284-
if (!READ_ONCE(queue->synflood_warned) && syncookies != 2 &&
7285-
xchg(&queue->synflood_warned, 1) == 0) {
7285+
if (syncookies != 2 && !READ_ONCE(queue->synflood_warned)) {
7286+
WRITE_ONCE(queue->synflood_warned, 1);
72867287
if (IS_ENABLED(CONFIG_IPV6) && sk->sk_family == AF_INET6) {
72877288
net_info_ratelimited("%s: Possible SYN flooding on port [%pI6c]:%u. %s.\n",
72887289
proto, inet6_rcv_saddr(sk),

0 commit comments

Comments
 (0)