Skip to content

Commit 1c17f43

Browse files
q2venkuba-moo
authored andcommitted
ipv6: Move ipv6_fl_list from ipv6_pinfo to inet_sock.
In {tcp6,udp6,raw6}_sock, struct ipv6_pinfo is always placed at the beginning of a new cache line because 1. __alignof__(struct tcp_sock) is 64 due to ____cacheline_aligned of __cacheline_group_begin(tcp_sock_write_tx) 2. __alignof__(struct udp_sock) is 64 due to ____cacheline_aligned of struct numa_drop_counters 3. in raw6_sock, struct numa_drop_counters is placed before struct ipv6_pinfo . struct ipv6_pinfo is 136 bytes, but the last cache line is only used by ipv6_fl_list: $ pahole -C ipv6_pinfo vmlinux struct ipv6_pinfo { ... /* --- cacheline 2 boundary (128 bytes) --- */ struct ipv6_fl_socklist * ipv6_fl_list; /* 128 8 */ /* size: 136, cachelines: 3, members: 23 */ Let's move ipv6_fl_list from struct ipv6_pinfo to struct inet_sock to save a full cache line for {tcp6,udp6,raw6}_sock. Now, struct ipv6_pinfo is 128 bytes, and {tcp6,udp6,raw6}_sock have 64 bytes less, while {tcp,udp,raw}_sock retain the same size. Before: # grep -E "^(RAW|UDP[^L\-]|TCP)" /proc/slabinfo | awk '{print $1, "\t", $4}' RAWv6 1408 UDPv6 1472 TCPv6 2560 RAW 1152 UDP 1280 TCP 2368 After: # grep -E "^(RAW|UDP[^L\-]|TCP)" /proc/slabinfo | awk '{print $1, "\t", $4}' RAWv6 1344 UDPv6 1408 TCPv6 2496 RAW 1152 UDP 1280 TCP 2368 Also, ipv6_fl_list and inet_flags (SNDFLOW bit) are placed in the same cache line. $ pahole -C inet_sock vmlinux ... /* --- cacheline 11 boundary (704 bytes) was 56 bytes ago --- */ struct ipv6_pinfo * pinet6; /* 760 8 */ /* --- cacheline 12 boundary (768 bytes) --- */ struct ipv6_fl_socklist * ipv6_fl_list; /* 768 8 */ unsigned long inet_flags; /* 776 8 */ Doc churn is due to the insufficient Type column (only 1 space short). Suggested-by: Eric Dumazet <[email protected]> Signed-off-by: Kuniyuki Iwashima <[email protected]> Link: https://patch.msgid.link/[email protected] Signed-off-by: Jakub Kicinski <[email protected]>
1 parent 0746da0 commit 1c17f43

File tree

7 files changed

+76
-74
lines changed

7 files changed

+76
-74
lines changed

Documentation/networking/net_cachelines/inet_sock.rst

Lines changed: 40 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -5,42 +5,43 @@
55
inet_sock struct fast path usage breakdown
66
==========================================
77

8-
======================= ===================== =================== =================== ======================================================================================================
9-
Type Name fastpath_tx_access fastpath_rx_access comment
10-
======================= ===================== =================== =================== ======================================================================================================
11-
struct sock sk read_mostly read_mostly tcp_init_buffer_space,tcp_init_transfer,tcp_finish_connect,tcp_connect,tcp_send_rcvq,tcp_send_syn_data
12-
struct ipv6_pinfo* pinet6
13-
be16 inet_sport read_mostly __tcp_transmit_skb
14-
be32 inet_daddr read_mostly ip_select_ident_segs
15-
be32 inet_rcv_saddr
16-
be16 inet_dport read_mostly __tcp_transmit_skb
17-
u16 inet_num
18-
be32 inet_saddr
19-
s16 uc_ttl read_mostly __ip_queue_xmit/ip_select_ttl
20-
u16 cmsg_flags
21-
struct ip_options_rcu* inet_opt read_mostly __ip_queue_xmit
22-
u16 inet_id read_mostly ip_select_ident_segs
23-
u8 tos read_mostly ip_queue_xmit
24-
u8 min_ttl
25-
u8 mc_ttl
26-
u8 pmtudisc
27-
u8:1 recverr
28-
u8:1 is_icsk
29-
u8:1 freebind
30-
u8:1 hdrincl
31-
u8:1 mc_loop
32-
u8:1 transparent
33-
u8:1 mc_all
34-
u8:1 nodefrag
35-
u8:1 bind_address_no_port
36-
u8:1 recverr_rfc4884
37-
u8:1 defer_connect read_mostly tcp_sendmsg_fastopen
38-
u8 rcv_tos
39-
u8 convert_csum
40-
int uc_index
41-
int mc_index
42-
be32 mc_addr
43-
struct ip_mc_socklist* mc_list
44-
struct inet_cork_full cork read_mostly __tcp_transmit_skb
45-
struct local_port_range
46-
======================= ===================== =================== =================== ======================================================================================================
8+
======================== ===================== =================== =================== ======================================================================================================
9+
Type Name fastpath_tx_access fastpath_rx_access comment
10+
======================== ===================== =================== =================== ======================================================================================================
11+
struct sock sk read_mostly read_mostly tcp_init_buffer_space,tcp_init_transfer,tcp_finish_connect,tcp_connect,tcp_send_rcvq,tcp_send_syn_data
12+
struct ipv6_pinfo* pinet6
13+
struct ipv6_fl_socklist* ipv6_fl_list read_mostly tcp_v6_connect,__ip6_datagram_connect,udpv6_sendmsg,rawv6_sendmsg
14+
be16 inet_sport read_mostly __tcp_transmit_skb
15+
be32 inet_daddr read_mostly ip_select_ident_segs
16+
be32 inet_rcv_saddr
17+
be16 inet_dport read_mostly __tcp_transmit_skb
18+
u16 inet_num
19+
be32 inet_saddr
20+
s16 uc_ttl read_mostly __ip_queue_xmit/ip_select_ttl
21+
u16 cmsg_flags
22+
struct ip_options_rcu* inet_opt read_mostly __ip_queue_xmit
23+
u16 inet_id read_mostly ip_select_ident_segs
24+
u8 tos read_mostly ip_queue_xmit
25+
u8 min_ttl
26+
u8 mc_ttl
27+
u8 pmtudisc
28+
u8:1 recverr
29+
u8:1 is_icsk
30+
u8:1 freebind
31+
u8:1 hdrincl
32+
u8:1 mc_loop
33+
u8:1 transparent
34+
u8:1 mc_all
35+
u8:1 nodefrag
36+
u8:1 bind_address_no_port
37+
u8:1 recverr_rfc4884
38+
u8:1 defer_connect read_mostly tcp_sendmsg_fastopen
39+
u8 rcv_tos
40+
u8 convert_csum
41+
int uc_index
42+
int mc_index
43+
be32 mc_addr
44+
struct ip_mc_socklist* mc_list
45+
struct inet_cork_full cork read_mostly __tcp_transmit_skb
46+
struct local_port_range
47+
======================== ===================== =================== =================== ======================================================================================================

drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_cm.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1199,12 +1199,12 @@ static struct sock *chtls_recv_sock(struct sock *lsk,
11991199
struct ipv6_pinfo *newnp = inet6_sk(newsk);
12001200
struct ipv6_pinfo *np = inet6_sk(lsk);
12011201

1202-
inet_sk(newsk)->pinet6 = &newtcp6sk->inet6;
1202+
newinet->pinet6 = &newtcp6sk->inet6;
1203+
newinet->ipv6_fl_list = NULL;
12031204
memcpy(newnp, np, sizeof(struct ipv6_pinfo));
12041205
newsk->sk_v6_daddr = treq->ir_v6_rmt_addr;
12051206
newsk->sk_v6_rcv_saddr = treq->ir_v6_loc_addr;
12061207
inet6_sk(newsk)->saddr = treq->ir_v6_loc_addr;
1207-
newnp->ipv6_fl_list = NULL;
12081208
newnp->pktoptions = NULL;
12091209
newsk->sk_bound_dev_if = treq->ir_iif;
12101210
newinet->inet_opt = NULL;

include/linux/ipv6.h

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -271,7 +271,6 @@ struct ipv6_pinfo {
271271

272272
struct ipv6_mc_socklist __rcu *ipv6_mc_list;
273273
struct ipv6_ac_socklist *ipv6_ac_list;
274-
struct ipv6_fl_socklist __rcu *ipv6_fl_list;
275274
};
276275

277276
/* We currently use available bits from inet_sk(sk)->inet_flags,

include/net/inet_sock.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -214,6 +214,7 @@ struct inet_sock {
214214
struct sock sk;
215215
#if IS_ENABLED(CONFIG_IPV6)
216216
struct ipv6_pinfo *pinet6;
217+
struct ipv6_fl_socklist __rcu *ipv6_fl_list;
217218
#endif
218219
/* Socket demultiplex comparisons on incoming packets. */
219220
#define inet_daddr sk.__sk_common.skc_daddr

net/ipv6/ip6_flowlabel.c

Lines changed: 21 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -66,8 +66,8 @@ EXPORT_SYMBOL(ipv6_flowlabel_exclusive);
6666
fl != NULL; \
6767
fl = rcu_dereference(fl->next))
6868

69-
#define for_each_sk_fl_rcu(np, sfl) \
70-
for (sfl = rcu_dereference(np->ipv6_fl_list); \
69+
#define for_each_sk_fl_rcu(sk, sfl) \
70+
for (sfl = rcu_dereference(inet_sk(sk)->ipv6_fl_list); \
7171
sfl != NULL; \
7272
sfl = rcu_dereference(sfl->next))
7373

@@ -262,12 +262,11 @@ static struct ip6_flowlabel *fl_intern(struct net *net,
262262
struct ip6_flowlabel *__fl6_sock_lookup(struct sock *sk, __be32 label)
263263
{
264264
struct ipv6_fl_socklist *sfl;
265-
struct ipv6_pinfo *np = inet6_sk(sk);
266265

267266
label &= IPV6_FLOWLABEL_MASK;
268267

269268
rcu_read_lock();
270-
for_each_sk_fl_rcu(np, sfl) {
269+
for_each_sk_fl_rcu(sk, sfl) {
271270
struct ip6_flowlabel *fl = sfl->fl;
272271

273272
if (fl->label == label && atomic_inc_not_zero(&fl->users)) {
@@ -283,16 +282,16 @@ EXPORT_SYMBOL_GPL(__fl6_sock_lookup);
283282

284283
void fl6_free_socklist(struct sock *sk)
285284
{
286-
struct ipv6_pinfo *np = inet6_sk(sk);
285+
struct inet_sock *inet = inet_sk(sk);
287286
struct ipv6_fl_socklist *sfl;
288287

289-
if (!rcu_access_pointer(np->ipv6_fl_list))
288+
if (!rcu_access_pointer(inet->ipv6_fl_list))
290289
return;
291290

292291
spin_lock_bh(&ip6_sk_fl_lock);
293-
while ((sfl = rcu_dereference_protected(np->ipv6_fl_list,
292+
while ((sfl = rcu_dereference_protected(inet->ipv6_fl_list,
294293
lockdep_is_held(&ip6_sk_fl_lock))) != NULL) {
295-
np->ipv6_fl_list = sfl->next;
294+
inet->ipv6_fl_list = sfl->next;
296295
spin_unlock_bh(&ip6_sk_fl_lock);
297296

298297
fl_release(sfl->fl);
@@ -470,16 +469,15 @@ fl_create(struct net *net, struct sock *sk, struct in6_flowlabel_req *freq,
470469

471470
static int mem_check(struct sock *sk)
472471
{
473-
struct ipv6_pinfo *np = inet6_sk(sk);
474-
struct ipv6_fl_socklist *sfl;
475472
int room = FL_MAX_SIZE - atomic_read(&fl_size);
473+
struct ipv6_fl_socklist *sfl;
476474
int count = 0;
477475

478476
if (room > FL_MAX_SIZE - FL_MAX_PER_SOCK)
479477
return 0;
480478

481479
rcu_read_lock();
482-
for_each_sk_fl_rcu(np, sfl)
480+
for_each_sk_fl_rcu(sk, sfl)
483481
count++;
484482
rcu_read_unlock();
485483

@@ -492,13 +490,15 @@ static int mem_check(struct sock *sk)
492490
return 0;
493491
}
494492

495-
static inline void fl_link(struct ipv6_pinfo *np, struct ipv6_fl_socklist *sfl,
496-
struct ip6_flowlabel *fl)
493+
static inline void fl_link(struct sock *sk, struct ipv6_fl_socklist *sfl,
494+
struct ip6_flowlabel *fl)
497495
{
496+
struct inet_sock *inet = inet_sk(sk);
497+
498498
spin_lock_bh(&ip6_sk_fl_lock);
499499
sfl->fl = fl;
500-
sfl->next = np->ipv6_fl_list;
501-
rcu_assign_pointer(np->ipv6_fl_list, sfl);
500+
sfl->next = inet->ipv6_fl_list;
501+
rcu_assign_pointer(inet->ipv6_fl_list, sfl);
502502
spin_unlock_bh(&ip6_sk_fl_lock);
503503
}
504504

@@ -520,7 +520,7 @@ int ipv6_flowlabel_opt_get(struct sock *sk, struct in6_flowlabel_req *freq,
520520

521521
rcu_read_lock();
522522

523-
for_each_sk_fl_rcu(np, sfl) {
523+
for_each_sk_fl_rcu(sk, sfl) {
524524
if (sfl->fl->label == (np->flow_label & IPV6_FLOWLABEL_MASK)) {
525525
spin_lock_bh(&ip6_fl_lock);
526526
freq->flr_label = sfl->fl->label;
@@ -559,7 +559,7 @@ static int ipv6_flowlabel_put(struct sock *sk, struct in6_flowlabel_req *freq)
559559
}
560560

561561
spin_lock_bh(&ip6_sk_fl_lock);
562-
for (sflp = &np->ipv6_fl_list;
562+
for (sflp = &inet_sk(sk)->ipv6_fl_list;
563563
(sfl = socklist_dereference(*sflp)) != NULL;
564564
sflp = &sfl->next) {
565565
if (sfl->fl->label == freq->flr_label)
@@ -579,13 +579,12 @@ static int ipv6_flowlabel_put(struct sock *sk, struct in6_flowlabel_req *freq)
579579

580580
static int ipv6_flowlabel_renew(struct sock *sk, struct in6_flowlabel_req *freq)
581581
{
582-
struct ipv6_pinfo *np = inet6_sk(sk);
583582
struct net *net = sock_net(sk);
584583
struct ipv6_fl_socklist *sfl;
585584
int err;
586585

587586
rcu_read_lock();
588-
for_each_sk_fl_rcu(np, sfl) {
587+
for_each_sk_fl_rcu(sk, sfl) {
589588
if (sfl->fl->label == freq->flr_label) {
590589
err = fl6_renew(sfl->fl, freq->flr_linger,
591590
freq->flr_expires);
@@ -614,7 +613,6 @@ static int ipv6_flowlabel_get(struct sock *sk, struct in6_flowlabel_req *freq,
614613
{
615614
struct ipv6_fl_socklist *sfl, *sfl1 = NULL;
616615
struct ip6_flowlabel *fl, *fl1 = NULL;
617-
struct ipv6_pinfo *np = inet6_sk(sk);
618616
struct net *net = sock_net(sk);
619617
int err;
620618

@@ -645,7 +643,7 @@ static int ipv6_flowlabel_get(struct sock *sk, struct in6_flowlabel_req *freq,
645643
if (freq->flr_label) {
646644
err = -EEXIST;
647645
rcu_read_lock();
648-
for_each_sk_fl_rcu(np, sfl) {
646+
for_each_sk_fl_rcu(sk, sfl) {
649647
if (sfl->fl->label == freq->flr_label) {
650648
if (freq->flr_flags & IPV6_FL_F_EXCL) {
651649
rcu_read_unlock();
@@ -682,7 +680,7 @@ static int ipv6_flowlabel_get(struct sock *sk, struct in6_flowlabel_req *freq,
682680
fl1->linger = fl->linger;
683681
if ((long)(fl->expires - fl1->expires) > 0)
684682
fl1->expires = fl->expires;
685-
fl_link(np, sfl1, fl1);
683+
fl_link(sk, sfl1, fl1);
686684
fl_free(fl);
687685
return 0;
688686

@@ -716,7 +714,7 @@ static int ipv6_flowlabel_get(struct sock *sk, struct in6_flowlabel_req *freq,
716714
}
717715
}
718716

719-
fl_link(np, sfl1, fl);
717+
fl_link(sk, sfl1, fl);
720718
return 0;
721719
done:
722720
fl_free(fl);

net/ipv6/tcp_ipv6.c

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1386,7 +1386,9 @@ static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff *
13861386
if (!newsk)
13871387
return NULL;
13881388

1389-
inet_sk(newsk)->pinet6 = tcp_inet6_sk(newsk);
1389+
newinet = inet_sk(newsk);
1390+
newinet->pinet6 = tcp_inet6_sk(newsk);
1391+
newinet->ipv6_fl_list = NULL;
13901392

13911393
newnp = tcp_inet6_sk(newsk);
13921394
newtp = tcp_sk(newsk);
@@ -1405,7 +1407,6 @@ static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff *
14051407

14061408
newnp->ipv6_mc_list = NULL;
14071409
newnp->ipv6_ac_list = NULL;
1408-
newnp->ipv6_fl_list = NULL;
14091410
newnp->pktoptions = NULL;
14101411
newnp->opt = NULL;
14111412
newnp->mcast_oif = inet_iif(skb);
@@ -1453,10 +1454,12 @@ static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff *
14531454
newsk->sk_gso_type = SKB_GSO_TCPV6;
14541455
inet6_sk_rx_dst_set(newsk, skb);
14551456

1456-
inet_sk(newsk)->pinet6 = tcp_inet6_sk(newsk);
1457+
newinet = inet_sk(newsk);
1458+
newinet->pinet6 = tcp_inet6_sk(newsk);
1459+
newinet->ipv6_fl_list = NULL;
1460+
newinet->inet_opt = NULL;
14571461

14581462
newtp = tcp_sk(newsk);
1459-
newinet = inet_sk(newsk);
14601463
newnp = tcp_inet6_sk(newsk);
14611464

14621465
memcpy(newnp, np, sizeof(struct ipv6_pinfo));
@@ -1469,10 +1472,8 @@ static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff *
14691472
14701473
First: no IPv4 options.
14711474
*/
1472-
newinet->inet_opt = NULL;
14731475
newnp->ipv6_mc_list = NULL;
14741476
newnp->ipv6_ac_list = NULL;
1475-
newnp->ipv6_fl_list = NULL;
14761477

14771478
/* Clone RX bits */
14781479
newnp->rxopt.all = np->rxopt.all;

net/sctp/ipv6.c

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -782,9 +782,10 @@ static struct sock *sctp_v6_create_accept_sk(struct sock *sk,
782782
struct sctp_association *asoc,
783783
bool kern)
784784
{
785-
struct sock *newsk;
786785
struct ipv6_pinfo *newnp, *np = inet6_sk(sk);
787786
struct sctp6_sock *newsctp6sk;
787+
struct inet_sock *newinet;
788+
struct sock *newsk;
788789

789790
newsk = sk_alloc(sock_net(sk), PF_INET6, GFP_KERNEL, sk->sk_prot, kern);
790791
if (!newsk)
@@ -796,7 +797,9 @@ static struct sock *sctp_v6_create_accept_sk(struct sock *sk,
796797
sock_reset_flag(sk, SOCK_ZAPPED);
797798

798799
newsctp6sk = (struct sctp6_sock *)newsk;
799-
inet_sk(newsk)->pinet6 = &newsctp6sk->inet6;
800+
newinet = inet_sk(newsk);
801+
newinet->pinet6 = &newsctp6sk->inet6;
802+
newinet->ipv6_fl_list = NULL;
800803

801804
sctp_sk(newsk)->v4mapped = sctp_sk(sk)->v4mapped;
802805

@@ -805,7 +808,6 @@ static struct sock *sctp_v6_create_accept_sk(struct sock *sk,
805808
memcpy(newnp, np, sizeof(struct ipv6_pinfo));
806809
newnp->ipv6_mc_list = NULL;
807810
newnp->ipv6_ac_list = NULL;
808-
newnp->ipv6_fl_list = NULL;
809811

810812
sctp_v6_copy_ip_options(sk, newsk);
811813

0 commit comments

Comments
 (0)