Skip to content

Commit 95fa788

Browse files
edumazetkuba-moo
authored andcommitted
inet_diag: avoid cache line misses in inet_diag_bc_sk()
inet_diag_bc_sk() pulls five cache lines per socket, while most filters only need the two first ones. Add three booleans to struct inet_diag_dump_data, that are selectively set if a filter needs specific socket fields. - mark_needed /* INET_DIAG_BC_MARK_COND present. */ - cgroup_needed /* INET_DIAG_BC_CGROUP_COND present. */ - userlocks_needed /* INET_DIAG_BC_AUTO present. */ This removes millions of cache lines misses per ss invocation when simple filters are specified on busy servers. offsetof(struct sock, sk_userlocks) = 0xf3 offsetof(struct sock, sk_mark) = 0x20c offsetof(struct sock, sk_cgrp_data) = 0x298 Signed-off-by: Eric Dumazet <[email protected]> Reviewed-by: Kuniyuki Iwashima <[email protected]> Link: https://patch.msgid.link/[email protected] Signed-off-by: Jakub Kicinski <[email protected]>
1 parent 9529320 commit 95fa788

File tree

2 files changed

+36
-21
lines changed

2 files changed

+36
-21
lines changed

include/linux/inet_diag.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,11 @@ struct inet_diag_dump_data {
3838
#define inet_diag_nla_bpf_stgs req_nlas[INET_DIAG_REQ_SK_BPF_STORAGES]
3939

4040
struct bpf_sk_storage_diag *bpf_stg_diag;
41+
bool mark_needed; /* INET_DIAG_BC_MARK_COND present. */
42+
#ifdef CONFIG_SOCK_CGROUP_DATA
43+
bool cgroup_needed; /* INET_DIAG_BC_CGROUP_COND present. */
44+
#endif
45+
bool userlocks_needed; /* INET_DIAG_BC_AUTO present. */
4146
};
4247

4348
struct inet_connection_sock;

net/ipv4/inet_diag.c

Lines changed: 31 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -605,18 +605,22 @@ int inet_diag_bc_sk(const struct inet_diag_dump_data *cb_data, struct sock *sk)
605605
entry.sport = READ_ONCE(inet->inet_num);
606606
entry.dport = ntohs(READ_ONCE(inet->inet_dport));
607607
entry.ifindex = READ_ONCE(sk->sk_bound_dev_if);
608-
entry.userlocks = sk_fullsock(sk) ? READ_ONCE(sk->sk_userlocks) : 0;
609-
if (sk_fullsock(sk))
610-
entry.mark = READ_ONCE(sk->sk_mark);
611-
else if (sk->sk_state == TCP_NEW_SYN_RECV)
612-
entry.mark = inet_rsk(inet_reqsk(sk))->ir_mark;
613-
else if (sk->sk_state == TCP_TIME_WAIT)
614-
entry.mark = inet_twsk(sk)->tw_mark;
615-
else
616-
entry.mark = 0;
608+
if (cb_data->userlocks_needed)
609+
entry.userlocks = sk_fullsock(sk) ? READ_ONCE(sk->sk_userlocks) : 0;
610+
if (cb_data->mark_needed) {
611+
if (sk_fullsock(sk))
612+
entry.mark = READ_ONCE(sk->sk_mark);
613+
else if (sk->sk_state == TCP_NEW_SYN_RECV)
614+
entry.mark = inet_rsk(inet_reqsk(sk))->ir_mark;
615+
else if (sk->sk_state == TCP_TIME_WAIT)
616+
entry.mark = inet_twsk(sk)->tw_mark;
617+
else
618+
entry.mark = 0;
619+
}
617620
#ifdef CONFIG_SOCK_CGROUP_DATA
618-
entry.cgroup_id = sk_fullsock(sk) ?
619-
cgroup_id(sock_cgroup_ptr(&sk->sk_cgrp_data)) : 0;
621+
if (cb_data->cgroup_needed)
622+
entry.cgroup_id = sk_fullsock(sk) ?
623+
cgroup_id(sock_cgroup_ptr(&sk->sk_cgrp_data)) : 0;
620624
#endif
621625

622626
return inet_diag_bc_run(bc, &entry);
@@ -716,16 +720,21 @@ static bool valid_cgroupcond(const struct inet_diag_bc_op *op, int len,
716720
}
717721
#endif
718722

719-
static int inet_diag_bc_audit(const struct nlattr *attr,
723+
static int inet_diag_bc_audit(struct inet_diag_dump_data *cb_data,
720724
const struct sk_buff *skb)
721725
{
722-
bool net_admin = netlink_net_capable(skb, CAP_NET_ADMIN);
726+
const struct nlattr *attr = cb_data->inet_diag_nla_bc;
723727
const void *bytecode, *bc;
724728
int bytecode_len, len;
729+
bool net_admin;
730+
731+
if (!attr)
732+
return 0;
725733

726-
if (!attr || nla_len(attr) < sizeof(struct inet_diag_bc_op))
734+
if (nla_len(attr) < sizeof(struct inet_diag_bc_op))
727735
return -EINVAL;
728736

737+
net_admin = netlink_net_capable(skb, CAP_NET_ADMIN);
729738
bytecode = bc = nla_data(attr);
730739
len = bytecode_len = nla_len(attr);
731740

@@ -757,14 +766,18 @@ static int inet_diag_bc_audit(const struct nlattr *attr,
757766
return -EPERM;
758767
if (!valid_markcond(bc, len, &min_len))
759768
return -EINVAL;
769+
cb_data->mark_needed = true;
760770
break;
761771
#ifdef CONFIG_SOCK_CGROUP_DATA
762772
case INET_DIAG_BC_CGROUP_COND:
763773
if (!valid_cgroupcond(bc, len, &min_len))
764774
return -EINVAL;
775+
cb_data->cgroup_needed = true;
765776
break;
766777
#endif
767778
case INET_DIAG_BC_AUTO:
779+
cb_data->userlocks_needed = true;
780+
fallthrough;
768781
case INET_DIAG_BC_JMP:
769782
case INET_DIAG_BC_NOP:
770783
break;
@@ -841,13 +854,10 @@ static int __inet_diag_dump_start(struct netlink_callback *cb, int hdrlen)
841854
kfree(cb_data);
842855
return err;
843856
}
844-
nla = cb_data->inet_diag_nla_bc;
845-
if (nla) {
846-
err = inet_diag_bc_audit(nla, skb);
847-
if (err) {
848-
kfree(cb_data);
849-
return err;
850-
}
857+
err = inet_diag_bc_audit(cb_data, skb);
858+
if (err) {
859+
kfree(cb_data);
860+
return err;
851861
}
852862

853863
nla = cb_data->inet_diag_nla_bpf_stgs;

0 commit comments

Comments
 (0)