Skip to content

Commit e71aa5a

Browse files
committed
Merge branch 'inet_diag-make-dumps-faster-with-simple-filters'
Eric Dumazet says: ==================== inet_diag: make dumps faster with simple filters inet_diag_bc_sk() pulls five cache lines per socket, while most filters only need the two first ones. We can change it to only pull needed cache lines, to make things like "ss -temoi src :21456" much faster. First patches (1-3) are annotating data-races as a first step. ==================== Link: https://patch.msgid.link/[email protected] Signed-off-by: Jakub Kicinski <[email protected]>
2 parents 34c21e9 + 95fa788 commit e71aa5a

File tree

7 files changed

+70
-71
lines changed

7 files changed

+70
-71
lines changed

include/linux/inet_diag.h

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,11 @@ struct inet_diag_dump_data {
3838
#define inet_diag_nla_bpf_stgs req_nlas[INET_DIAG_REQ_SK_BPF_STORAGES]
3939

4040
struct bpf_sk_storage_diag *bpf_stg_diag;
41+
bool mark_needed; /* INET_DIAG_BC_MARK_COND present. */
42+
#ifdef CONFIG_SOCK_CGROUP_DATA
43+
bool cgroup_needed; /* INET_DIAG_BC_CGROUP_COND present. */
44+
#endif
45+
bool userlocks_needed; /* INET_DIAG_BC_AUTO present. */
4146
};
4247

4348
struct inet_connection_sock;
@@ -46,7 +51,7 @@ int inet_sk_diag_fill(struct sock *sk, struct inet_connection_sock *icsk,
4651
const struct inet_diag_req_v2 *req,
4752
u16 nlmsg_flags, bool net_admin);
4853

49-
int inet_diag_bc_sk(const struct nlattr *_bc, struct sock *sk);
54+
int inet_diag_bc_sk(const struct inet_diag_dump_data *cb_data, struct sock *sk);
5055

5156
void inet_diag_msg_common_fill(struct inet_diag_msg *r, struct sock *sk);
5257

net/ipv4/inet_diag.c

Lines changed: 48 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -71,25 +71,25 @@ static void inet_diag_unlock_handler(const struct inet_diag_handler *handler)
7171

7272
void inet_diag_msg_common_fill(struct inet_diag_msg *r, struct sock *sk)
7373
{
74-
r->idiag_family = sk->sk_family;
74+
r->idiag_family = READ_ONCE(sk->sk_family);
7575

76-
r->id.idiag_sport = htons(sk->sk_num);
77-
r->id.idiag_dport = sk->sk_dport;
78-
r->id.idiag_if = sk->sk_bound_dev_if;
76+
r->id.idiag_sport = htons(READ_ONCE(sk->sk_num));
77+
r->id.idiag_dport = READ_ONCE(sk->sk_dport);
78+
r->id.idiag_if = READ_ONCE(sk->sk_bound_dev_if);
7979
sock_diag_save_cookie(sk, r->id.idiag_cookie);
8080

8181
#if IS_ENABLED(CONFIG_IPV6)
82-
if (sk->sk_family == AF_INET6) {
83-
*(struct in6_addr *)r->id.idiag_src = sk->sk_v6_rcv_saddr;
84-
*(struct in6_addr *)r->id.idiag_dst = sk->sk_v6_daddr;
82+
if (r->idiag_family == AF_INET6) {
83+
data_race(*(struct in6_addr *)r->id.idiag_src = sk->sk_v6_rcv_saddr);
84+
data_race(*(struct in6_addr *)r->id.idiag_dst = sk->sk_v6_daddr);
8585
} else
8686
#endif
8787
{
8888
memset(&r->id.idiag_src, 0, sizeof(r->id.idiag_src));
8989
memset(&r->id.idiag_dst, 0, sizeof(r->id.idiag_dst));
9090

91-
r->id.idiag_src[0] = sk->sk_rcv_saddr;
92-
r->id.idiag_dst[0] = sk->sk_daddr;
91+
r->id.idiag_src[0] = READ_ONCE(sk->sk_rcv_saddr);
92+
r->id.idiag_dst[0] = READ_ONCE(sk->sk_daddr);
9393
}
9494
}
9595
EXPORT_SYMBOL_GPL(inet_diag_msg_common_fill);
@@ -580,7 +580,7 @@ static void entry_fill_addrs(struct inet_diag_entry *entry,
580580
const struct sock *sk)
581581
{
582582
#if IS_ENABLED(CONFIG_IPV6)
583-
if (sk->sk_family == AF_INET6) {
583+
if (entry->family == AF_INET6) {
584584
entry->saddr = sk->sk_v6_rcv_saddr.s6_addr32;
585585
entry->daddr = sk->sk_v6_daddr.s6_addr32;
586586
} else
@@ -591,31 +591,36 @@ static void entry_fill_addrs(struct inet_diag_entry *entry,
591591
}
592592
}
593593

594-
int inet_diag_bc_sk(const struct nlattr *bc, struct sock *sk)
594+
int inet_diag_bc_sk(const struct inet_diag_dump_data *cb_data, struct sock *sk)
595595
{
596-
struct inet_sock *inet = inet_sk(sk);
596+
const struct nlattr *bc = cb_data->inet_diag_nla_bc;
597+
const struct inet_sock *inet = inet_sk(sk);
597598
struct inet_diag_entry entry;
598599

599600
if (!bc)
600601
return 1;
601602

602-
entry.family = sk->sk_family;
603+
entry.family = READ_ONCE(sk->sk_family);
603604
entry_fill_addrs(&entry, sk);
604-
entry.sport = inet->inet_num;
605-
entry.dport = ntohs(inet->inet_dport);
606-
entry.ifindex = sk->sk_bound_dev_if;
607-
entry.userlocks = sk_fullsock(sk) ? sk->sk_userlocks : 0;
608-
if (sk_fullsock(sk))
609-
entry.mark = READ_ONCE(sk->sk_mark);
610-
else if (sk->sk_state == TCP_NEW_SYN_RECV)
611-
entry.mark = inet_rsk(inet_reqsk(sk))->ir_mark;
612-
else if (sk->sk_state == TCP_TIME_WAIT)
613-
entry.mark = inet_twsk(sk)->tw_mark;
614-
else
615-
entry.mark = 0;
605+
entry.sport = READ_ONCE(inet->inet_num);
606+
entry.dport = ntohs(READ_ONCE(inet->inet_dport));
607+
entry.ifindex = READ_ONCE(sk->sk_bound_dev_if);
608+
if (cb_data->userlocks_needed)
609+
entry.userlocks = sk_fullsock(sk) ? READ_ONCE(sk->sk_userlocks) : 0;
610+
if (cb_data->mark_needed) {
611+
if (sk_fullsock(sk))
612+
entry.mark = READ_ONCE(sk->sk_mark);
613+
else if (sk->sk_state == TCP_NEW_SYN_RECV)
614+
entry.mark = inet_rsk(inet_reqsk(sk))->ir_mark;
615+
else if (sk->sk_state == TCP_TIME_WAIT)
616+
entry.mark = inet_twsk(sk)->tw_mark;
617+
else
618+
entry.mark = 0;
619+
}
616620
#ifdef CONFIG_SOCK_CGROUP_DATA
617-
entry.cgroup_id = sk_fullsock(sk) ?
618-
cgroup_id(sock_cgroup_ptr(&sk->sk_cgrp_data)) : 0;
621+
if (cb_data->cgroup_needed)
622+
entry.cgroup_id = sk_fullsock(sk) ?
623+
cgroup_id(sock_cgroup_ptr(&sk->sk_cgrp_data)) : 0;
619624
#endif
620625

621626
return inet_diag_bc_run(bc, &entry);
@@ -715,16 +720,21 @@ static bool valid_cgroupcond(const struct inet_diag_bc_op *op, int len,
715720
}
716721
#endif
717722

718-
static int inet_diag_bc_audit(const struct nlattr *attr,
723+
static int inet_diag_bc_audit(struct inet_diag_dump_data *cb_data,
719724
const struct sk_buff *skb)
720725
{
721-
bool net_admin = netlink_net_capable(skb, CAP_NET_ADMIN);
726+
const struct nlattr *attr = cb_data->inet_diag_nla_bc;
722727
const void *bytecode, *bc;
723728
int bytecode_len, len;
729+
bool net_admin;
724730

725-
if (!attr || nla_len(attr) < sizeof(struct inet_diag_bc_op))
731+
if (!attr)
732+
return 0;
733+
734+
if (nla_len(attr) < sizeof(struct inet_diag_bc_op))
726735
return -EINVAL;
727736

737+
net_admin = netlink_net_capable(skb, CAP_NET_ADMIN);
728738
bytecode = bc = nla_data(attr);
729739
len = bytecode_len = nla_len(attr);
730740

@@ -756,14 +766,18 @@ static int inet_diag_bc_audit(const struct nlattr *attr,
756766
return -EPERM;
757767
if (!valid_markcond(bc, len, &min_len))
758768
return -EINVAL;
769+
cb_data->mark_needed = true;
759770
break;
760771
#ifdef CONFIG_SOCK_CGROUP_DATA
761772
case INET_DIAG_BC_CGROUP_COND:
762773
if (!valid_cgroupcond(bc, len, &min_len))
763774
return -EINVAL;
775+
cb_data->cgroup_needed = true;
764776
break;
765777
#endif
766778
case INET_DIAG_BC_AUTO:
779+
cb_data->userlocks_needed = true;
780+
fallthrough;
767781
case INET_DIAG_BC_JMP:
768782
case INET_DIAG_BC_NOP:
769783
break;
@@ -840,13 +854,10 @@ static int __inet_diag_dump_start(struct netlink_callback *cb, int hdrlen)
840854
kfree(cb_data);
841855
return err;
842856
}
843-
nla = cb_data->inet_diag_nla_bc;
844-
if (nla) {
845-
err = inet_diag_bc_audit(nla, skb);
846-
if (err) {
847-
kfree(cb_data);
848-
return err;
849-
}
857+
err = inet_diag_bc_audit(cb_data, skb);
858+
if (err) {
859+
kfree(cb_data);
860+
return err;
850861
}
851862

852863
nla = cb_data->inet_diag_nla_bpf_stgs;

net/ipv4/raw_diag.c

Lines changed: 3 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -126,9 +126,9 @@ static int raw_diag_dump_one(struct netlink_callback *cb,
126126
static int sk_diag_dump(struct sock *sk, struct sk_buff *skb,
127127
struct netlink_callback *cb,
128128
const struct inet_diag_req_v2 *r,
129-
struct nlattr *bc, bool net_admin)
129+
bool net_admin)
130130
{
131-
if (!inet_diag_bc_sk(bc, sk))
131+
if (!inet_diag_bc_sk(cb->data, sk))
132132
return 0;
133133

134134
return inet_sk_diag_fill(sk, NULL, skb, cb, r, NLM_F_MULTI, net_admin);
@@ -140,17 +140,13 @@ static void raw_diag_dump(struct sk_buff *skb, struct netlink_callback *cb,
140140
bool net_admin = netlink_net_capable(cb->skb, CAP_NET_ADMIN);
141141
struct raw_hashinfo *hashinfo = raw_get_hashinfo(r);
142142
struct net *net = sock_net(skb->sk);
143-
struct inet_diag_dump_data *cb_data;
144143
int num, s_num, slot, s_slot;
145144
struct hlist_head *hlist;
146145
struct sock *sk = NULL;
147-
struct nlattr *bc;
148146

149147
if (IS_ERR(hashinfo))
150148
return;
151149

152-
cb_data = cb->data;
153-
bc = cb_data->inet_diag_nla_bc;
154150
s_slot = cb->args[0];
155151
num = s_num = cb->args[1];
156152

@@ -174,7 +170,7 @@ static void raw_diag_dump(struct sk_buff *skb, struct netlink_callback *cb,
174170
if (r->id.idiag_dport != inet->inet_dport &&
175171
r->id.idiag_dport)
176172
goto next;
177-
if (sk_diag_dump(sk, skb, cb, r, bc, net_admin) < 0)
173+
if (sk_diag_dump(sk, skb, cb, r, net_admin) < 0)
178174
goto out_unlock;
179175
next:
180176
num++;

net/ipv4/tcp_diag.c

Lines changed: 5 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -248,12 +248,12 @@ static int tcp_req_diag_fill(struct sock *sk, struct sk_buff *skb,
248248
inet_diag_msg_common_fill(r, sk);
249249
r->idiag_state = TCP_SYN_RECV;
250250
r->idiag_timer = 1;
251-
r->idiag_retrans = reqsk->num_retrans;
251+
r->idiag_retrans = READ_ONCE(reqsk->num_retrans);
252252

253253
BUILD_BUG_ON(offsetof(struct inet_request_sock, ir_cookie) !=
254254
offsetof(struct sock, sk_cookie));
255255

256-
tmo = inet_reqsk(sk)->rsk_timer.expires - jiffies;
256+
tmo = READ_ONCE(inet_reqsk(sk)->rsk_timer.expires) - jiffies;
257257
r->idiag_expires = jiffies_delta_to_msecs(tmo);
258258
r->idiag_rqueue = 0;
259259
r->idiag_wqueue = 0;
@@ -320,11 +320,9 @@ static void tcp_diag_dump(struct sk_buff *skb, struct netlink_callback *cb,
320320
u32 idiag_states = r->idiag_states;
321321
struct inet_hashinfo *hashinfo;
322322
int i, num, s_i, s_num;
323-
struct nlattr *bc;
324323
struct sock *sk;
325324

326325
hashinfo = net->ipv4.tcp_death_row.hashinfo;
327-
bc = cb_data->inet_diag_nla_bc;
328326
if (idiag_states & TCPF_SYN_RECV)
329327
idiag_states |= TCPF_NEW_SYN_RECV;
330328
s_i = cb->args[1];
@@ -365,7 +363,7 @@ static void tcp_diag_dump(struct sk_buff *skb, struct netlink_callback *cb,
365363
r->id.idiag_sport)
366364
goto next_listen;
367365

368-
if (!inet_diag_bc_sk(bc, sk))
366+
if (!inet_diag_bc_sk(cb_data, sk))
369367
goto next_listen;
370368

371369
if (inet_sk_diag_fill(sk, inet_csk(sk), skb,
@@ -432,7 +430,7 @@ static void tcp_diag_dump(struct sk_buff *skb, struct netlink_callback *cb,
432430
r->sdiag_family != sk->sk_family)
433431
goto next_bind;
434432

435-
if (!inet_diag_bc_sk(bc, sk))
433+
if (!inet_diag_bc_sk(cb_data, sk))
436434
goto next_bind;
437435

438436
sock_hold(sk);
@@ -519,7 +517,7 @@ static void tcp_diag_dump(struct sk_buff *skb, struct netlink_callback *cb,
519517
goto next_normal;
520518
twsk_build_assert();
521519

522-
if (!inet_diag_bc_sk(bc, sk))
520+
if (!inet_diag_bc_sk(cb_data, sk))
523521
goto next_normal;
524522

525523
if (!refcount_inc_not_zero(&sk->sk_refcnt))

net/ipv4/tcp_output.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4438,7 +4438,7 @@ int tcp_rtx_synack(const struct sock *sk, struct request_sock *req)
44384438
tcp_sk_rw(sk)->total_retrans++;
44394439
}
44404440
trace_tcp_retransmit_synack(sk, req);
4441-
req->num_retrans++;
4441+
WRITE_ONCE(req->num_retrans, req->num_retrans + 1);
44424442
}
44434443
return res;
44444444
}

net/ipv4/udp_diag.c

Lines changed: 3 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -16,9 +16,9 @@
1616
static int sk_diag_dump(struct sock *sk, struct sk_buff *skb,
1717
struct netlink_callback *cb,
1818
const struct inet_diag_req_v2 *req,
19-
struct nlattr *bc, bool net_admin)
19+
bool net_admin)
2020
{
21-
if (!inet_diag_bc_sk(bc, sk))
21+
if (!inet_diag_bc_sk(cb->data, sk))
2222
return 0;
2323

2424
return inet_sk_diag_fill(sk, NULL, skb, cb, req, NLM_F_MULTI,
@@ -92,12 +92,8 @@ static void udp_dump(struct udp_table *table, struct sk_buff *skb,
9292
{
9393
bool net_admin = netlink_net_capable(cb->skb, CAP_NET_ADMIN);
9494
struct net *net = sock_net(skb->sk);
95-
struct inet_diag_dump_data *cb_data;
9695
int num, s_num, slot, s_slot;
97-
struct nlattr *bc;
9896

99-
cb_data = cb->data;
100-
bc = cb_data->inet_diag_nla_bc;
10197
s_slot = cb->args[0];
10298
num = s_num = cb->args[1];
10399

@@ -130,7 +126,7 @@ static void udp_dump(struct udp_table *table, struct sk_buff *skb,
130126
r->id.idiag_dport)
131127
goto next;
132128

133-
if (sk_diag_dump(sk, skb, cb, r, bc, net_admin) < 0) {
129+
if (sk_diag_dump(sk, skb, cb, r, net_admin) < 0) {
134130
spin_unlock_bh(&hslot->lock);
135131
goto done;
136132
}

net/mptcp/mptcp_diag.c

Lines changed: 4 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -15,9 +15,9 @@
1515
static int sk_diag_dump(struct sock *sk, struct sk_buff *skb,
1616
struct netlink_callback *cb,
1717
const struct inet_diag_req_v2 *req,
18-
struct nlattr *bc, bool net_admin)
18+
bool net_admin)
1919
{
20-
if (!inet_diag_bc_sk(bc, sk))
20+
if (!inet_diag_bc_sk(cb->data, sk))
2121
return 0;
2222

2323
return inet_sk_diag_fill(sk, inet_csk(sk), skb, cb, req, NLM_F_MULTI,
@@ -76,9 +76,7 @@ static void mptcp_diag_dump_listeners(struct sk_buff *skb, struct netlink_callba
7676
const struct inet_diag_req_v2 *r,
7777
bool net_admin)
7878
{
79-
struct inet_diag_dump_data *cb_data = cb->data;
8079
struct mptcp_diag_ctx *diag_ctx = (void *)cb->ctx;
81-
struct nlattr *bc = cb_data->inet_diag_nla_bc;
8280
struct net *net = sock_net(skb->sk);
8381
struct inet_hashinfo *hinfo;
8482
int i;
@@ -121,7 +119,7 @@ static void mptcp_diag_dump_listeners(struct sk_buff *skb, struct netlink_callba
121119
if (!refcount_inc_not_zero(&sk->sk_refcnt))
122120
goto next_listen;
123121

124-
ret = sk_diag_dump(sk, skb, cb, r, bc, net_admin);
122+
ret = sk_diag_dump(sk, skb, cb, r, net_admin);
125123

126124
sock_put(sk);
127125

@@ -154,15 +152,10 @@ static void mptcp_diag_dump(struct sk_buff *skb, struct netlink_callback *cb,
154152
bool net_admin = netlink_net_capable(cb->skb, CAP_NET_ADMIN);
155153
struct mptcp_diag_ctx *diag_ctx = (void *)cb->ctx;
156154
struct net *net = sock_net(skb->sk);
157-
struct inet_diag_dump_data *cb_data;
158155
struct mptcp_sock *msk;
159-
struct nlattr *bc;
160156

161157
BUILD_BUG_ON(sizeof(cb->ctx) < sizeof(*diag_ctx));
162158

163-
cb_data = cb->data;
164-
bc = cb_data->inet_diag_nla_bc;
165-
166159
while ((msk = mptcp_token_iter_next(net, &diag_ctx->s_slot,
167160
&diag_ctx->s_num)) != NULL) {
168161
struct inet_sock *inet = (struct inet_sock *)msk;
@@ -181,7 +174,7 @@ static void mptcp_diag_dump(struct sk_buff *skb, struct netlink_callback *cb,
181174
r->id.idiag_dport)
182175
goto next;
183176

184-
ret = sk_diag_dump(sk, skb, cb, r, bc, net_admin);
177+
ret = sk_diag_dump(sk, skb, cb, r, net_admin);
185178
next:
186179
sock_put(sk);
187180
if (ret < 0) {

0 commit comments

Comments
 (0)