Skip to content

Commit 5d6b58c

Browse files
edumazetkuba-moo
authored andcommitted
net: lockless sock_i_ino()
Followup of commit c51da3f ("net: remove sock_i_uid()") A recent syzbot report was the trigger for this change. Over the years, we had many problems caused by the read_lock[_bh](&sk->sk_callback_lock) in sock_i_uid(). We could fix smc_diag_dump_proto() or make a more radical move: Instead of waiting for new syzbot reports, cache the socket inode number in sk->sk_ino, so that we no longer need to acquire sk->sk_callback_lock in sock_i_ino(). This makes socket dumps faster (one less cache line miss, and two atomic ops avoided). Prior art: commit 25a9c8a ("netlink: Add __sock_i_ino() for __netlink_diag_dump().") commit 4f9bf2a ("tcp: Don't acquire inet_listen_hashbucket::lock with disabled BH.") commit efc3dbc ("rds: Make rds_sock_lock BH rather than IRQ safe.") Fixes: d2d6422 ("x86: Allow to enable PREEMPT_RT.") Reported-by: [email protected] Closes: https://lore.kernel.org/netdev/[email protected]/T/#u Signed-off-by: Eric Dumazet <[email protected]> Reviewed-by: Kuniyuki Iwashima <[email protected]> Reviewed-by: Sebastian Andrzej Siewior <[email protected]> Link: https://patch.msgid.link/[email protected] Signed-off-by: Jakub Kicinski <[email protected]>
1 parent b4ada06 commit 5d6b58c

File tree

4 files changed

+14
-28
lines changed

4 files changed

+14
-28
lines changed

include/net/sock.h

Lines changed: 13 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -285,6 +285,7 @@ struct sk_filter;
285285
* @sk_ack_backlog: current listen backlog
286286
* @sk_max_ack_backlog: listen backlog set in listen()
287287
* @sk_uid: user id of owner
288+
* @sk_ino: inode number (zero if orphaned)
288289
* @sk_prefer_busy_poll: prefer busypolling over softirq processing
289290
* @sk_busy_poll_budget: napi processing budget when busypolling
290291
* @sk_priority: %SO_PRIORITY setting
@@ -518,6 +519,7 @@ struct sock {
518519
u32 sk_ack_backlog;
519520
u32 sk_max_ack_backlog;
520521
kuid_t sk_uid;
522+
unsigned long sk_ino;
521523
spinlock_t sk_peer_lock;
522524
int sk_bind_phc;
523525
struct pid *sk_peer_pid;
@@ -2056,6 +2058,10 @@ static inline int sk_rx_queue_get(const struct sock *sk)
20562058
static inline void sk_set_socket(struct sock *sk, struct socket *sock)
20572059
{
20582060
sk->sk_socket = sock;
2061+
if (sock) {
2062+
WRITE_ONCE(sk->sk_uid, SOCK_INODE(sock)->i_uid);
2063+
WRITE_ONCE(sk->sk_ino, SOCK_INODE(sock)->i_ino);
2064+
}
20592065
}
20602066

20612067
static inline wait_queue_head_t *sk_sleep(struct sock *sk)
@@ -2077,6 +2083,7 @@ static inline void sock_orphan(struct sock *sk)
20772083
sk_set_socket(sk, NULL);
20782084
sk->sk_wq = NULL;
20792085
/* Note: sk_uid is unchanged. */
2086+
WRITE_ONCE(sk->sk_ino, 0);
20802087
write_unlock_bh(&sk->sk_callback_lock);
20812088
}
20822089

@@ -2087,20 +2094,22 @@ static inline void sock_graft(struct sock *sk, struct socket *parent)
20872094
rcu_assign_pointer(sk->sk_wq, &parent->wq);
20882095
parent->sk = sk;
20892096
sk_set_socket(sk, parent);
2090-
WRITE_ONCE(sk->sk_uid, SOCK_INODE(parent)->i_uid);
20912097
security_sock_graft(sk, parent);
20922098
write_unlock_bh(&sk->sk_callback_lock);
20932099
}
20942100

2101+
static inline unsigned long sock_i_ino(const struct sock *sk)
2102+
{
2103+
/* Paired with WRITE_ONCE() in sock_graft() and sock_orphan() */
2104+
return READ_ONCE(sk->sk_ino);
2105+
}
2106+
20952107
static inline kuid_t sk_uid(const struct sock *sk)
20962108
{
20972109
/* Paired with WRITE_ONCE() in sockfs_setattr() */
20982110
return READ_ONCE(sk->sk_uid);
20992111
}
21002112

2101-
unsigned long __sock_i_ino(struct sock *sk);
2102-
unsigned long sock_i_ino(struct sock *sk);
2103-
21042113
static inline kuid_t sock_net_uid(const struct net *net, const struct sock *sk)
21052114
{
21062115
return sk ? sk_uid(sk) : make_kuid(net->user_ns, 0);

net/core/sock.c

Lines changed: 0 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -2780,28 +2780,6 @@ void sock_pfree(struct sk_buff *skb)
27802780
EXPORT_SYMBOL(sock_pfree);
27812781
#endif /* CONFIG_INET */
27822782

2783-
unsigned long __sock_i_ino(struct sock *sk)
2784-
{
2785-
unsigned long ino;
2786-
2787-
read_lock(&sk->sk_callback_lock);
2788-
ino = sk->sk_socket ? SOCK_INODE(sk->sk_socket)->i_ino : 0;
2789-
read_unlock(&sk->sk_callback_lock);
2790-
return ino;
2791-
}
2792-
EXPORT_SYMBOL(__sock_i_ino);
2793-
2794-
unsigned long sock_i_ino(struct sock *sk)
2795-
{
2796-
unsigned long ino;
2797-
2798-
local_bh_disable();
2799-
ino = __sock_i_ino(sk);
2800-
local_bh_enable();
2801-
return ino;
2802-
}
2803-
EXPORT_SYMBOL(sock_i_ino);
2804-
28052783
/*
28062784
* Allocate a skb from the socket's send buffer.
28072785
*/

net/mptcp/protocol.c

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3554,7 +3554,6 @@ void mptcp_sock_graft(struct sock *sk, struct socket *parent)
35543554
write_lock_bh(&sk->sk_callback_lock);
35553555
rcu_assign_pointer(sk->sk_wq, &parent->wq);
35563556
sk_set_socket(sk, parent);
3557-
WRITE_ONCE(sk->sk_uid, SOCK_INODE(parent)->i_uid);
35583557
write_unlock_bh(&sk->sk_callback_lock);
35593558
}
35603559

net/netlink/diag.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -168,7 +168,7 @@ static int __netlink_diag_dump(struct sk_buff *skb, struct netlink_callback *cb,
168168
NETLINK_CB(cb->skb).portid,
169169
cb->nlh->nlmsg_seq,
170170
NLM_F_MULTI,
171-
__sock_i_ino(sk)) < 0) {
171+
sock_i_ino(sk)) < 0) {
172172
ret = 1;
173173
break;
174174
}

0 commit comments

Comments
 (0)