Skip to content

Commit b8ec80b

Browse files
Xuanqiang Luokuba-moo
authored andcommitted
inet: Avoid ehash lookup race in inet_twsk_hashdance_schedule()
Since ehash lookups are lockless, if another CPU is converting sk to tw concurrently, fetching the newly inserted tw with tw->tw_refcnt == 0 cause lookup failure. The call trace map is drawn as follows: CPU 0 CPU 1 ----- ----- inet_twsk_hashdance_schedule() spin_lock() inet_twsk_add_node_rcu(tw, ...) __inet_lookup_established() (find tw, failure due to tw_refcnt = 0) __sk_nulls_del_node_init_rcu(sk) refcount_set(&tw->tw_refcnt, 3) spin_unlock() By replacing sk with tw atomically via hlist_nulls_replace_init_rcu() after setting tw_refcnt, we ensure that tw is either fully initialized or not visible to other CPUs, eliminating the race. It's worth noting that we held lock_sock() before the replacement, so there's no need to check if sk is hashed. Thanks to Kuniyuki Iwashima! Fixes: 3ab5aee ("net: Convert TCP & DCCP hash tables to use RCU / hlist_nulls") Reviewed-by: Kuniyuki Iwashima <[email protected]> Reviewed-by: Jiayuan Chen <[email protected]> Signed-off-by: Xuanqiang Luo <[email protected]> Reviewed-by: Eric Dumazet <[email protected]> Link: https://patch.msgid.link/[email protected] Signed-off-by: Jakub Kicinski <[email protected]>
1 parent 1532ed0 commit b8ec80b

File tree

1 file changed

+12
-23
lines changed

1 file changed

+12
-23
lines changed

net/ipv4/inet_timewait_sock.c

Lines changed: 12 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -88,12 +88,6 @@ void inet_twsk_put(struct inet_timewait_sock *tw)
8888
}
8989
EXPORT_SYMBOL_GPL(inet_twsk_put);
9090

91-
static void inet_twsk_add_node_rcu(struct inet_timewait_sock *tw,
92-
struct hlist_nulls_head *list)
93-
{
94-
hlist_nulls_add_head_rcu(&tw->tw_node, list);
95-
}
96-
9791
static void inet_twsk_schedule(struct inet_timewait_sock *tw, int timeo)
9892
{
9993
__inet_twsk_schedule(tw, timeo, false);
@@ -113,13 +107,12 @@ void inet_twsk_hashdance_schedule(struct inet_timewait_sock *tw,
113107
{
114108
const struct inet_sock *inet = inet_sk(sk);
115109
const struct inet_connection_sock *icsk = inet_csk(sk);
116-
struct inet_ehash_bucket *ehead = inet_ehash_bucket(hashinfo, sk->sk_hash);
117110
spinlock_t *lock = inet_ehash_lockp(hashinfo, sk->sk_hash);
118111
struct inet_bind_hashbucket *bhead, *bhead2;
119112

120-
/* Step 1: Put TW into bind hash. Original socket stays there too.
121-
Note, that any socket with inet->num != 0 MUST be bound in
122-
binding cache, even if it is closed.
113+
/* Put TW into bind hash. Original socket stays there too.
114+
* Note, that any socket with inet->num != 0 MUST be bound in
115+
* binding cache, even if it is closed.
123116
*/
124117
bhead = &hashinfo->bhash[inet_bhashfn(twsk_net(tw), inet->inet_num,
125118
hashinfo->bhash_size)];
@@ -141,19 +134,6 @@ void inet_twsk_hashdance_schedule(struct inet_timewait_sock *tw,
141134

142135
spin_lock(lock);
143136

144-
/* Step 2: Hash TW into tcp ehash chain */
145-
inet_twsk_add_node_rcu(tw, &ehead->chain);
146-
147-
/* Step 3: Remove SK from hash chain */
148-
if (__sk_nulls_del_node_init_rcu(sk))
149-
sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1);
150-
151-
152-
/* Ensure above writes are committed into memory before updating the
153-
* refcount.
154-
* Provides ordering vs later refcount_inc().
155-
*/
156-
smp_wmb();
157137
/* tw_refcnt is set to 3 because we have :
158138
* - one reference for bhash chain.
159139
* - one reference for ehash chain.
@@ -163,6 +143,15 @@ void inet_twsk_hashdance_schedule(struct inet_timewait_sock *tw,
163143
*/
164144
refcount_set(&tw->tw_refcnt, 3);
165145

146+
/* Ensure tw_refcnt has been set before tw is published.
147+
* smp_wmb() provides the necessary memory barrier to enforce this
148+
* ordering.
149+
*/
150+
smp_wmb();
151+
152+
hlist_nulls_replace_init_rcu(&sk->sk_nulls_node, &tw->tw_node);
153+
sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1);
154+
166155
inet_twsk_schedule(tw, timeo);
167156

168157
spin_unlock(lock);

0 commit comments

Comments
 (0)