Skip to content

Commit 633f5b6

Browse files
committed
Merge branch 'net-initialize-fastreuse-on-inet_inherit_port'
Tim Froidcoeur says: ==================== net: initialize fastreuse on inet_inherit_port In the case of TPROXY, bind_conflict optimizations for SO_REUSEADDR or SO_REUSEPORT are broken, possibly resulting in O(n) instead of O(1) bind behaviour or in the incorrect reuse of a bind. the kernel keeps track for each bind_bucket if all sockets in the bind_bucket support SO_REUSEADDR or SO_REUSEPORT in two fastreuse flags. These flags allow skipping the costly bind_conflict check when possible (meaning when all sockets have the proper SO_REUSE option). For every socket added to a bind_bucket, these flags need to be updated. As soon as a socket that does not support reuse is added, the flag is set to false and will never go back to true, unless the bind_bucket is deleted. Note that there is no mechanism to re-evaluate these flags when a socket is removed (this might make sense when removing a socket that would not allow reuse; this leaves room for a future patch). For this optimization to work, it is mandatory that these flags are properly initialized and updated. When a child socket is created from a listen socket in __inet_inherit_port, the TPROXY case could create a new bind bucket without properly initializing these flags, thus preventing the optimization to work. Alternatively, a socket not allowing reuse could be added to an existing bind bucket without updating the flags, causing bind_conflict to never be called as it should. Patch 1/2 refactors the fastreuse update code in inet_csk_get_port into a small helper function, making the actual fix tiny and easier to understand. Patch 2/2 calls this new helper when __inet_inherit_port decides to create a new bind_bucket or use a different bind_bucket than the one of the listen socket. v4: - rebase on latest linux/net master branch v3: - remove company disclaimer from automatic signature v2: - remove unnecessary cast ==================== Signed-off-by: David S. Miller <[email protected]>
2 parents 1b8ef14 + d76f335 commit 633f5b6

File tree

3 files changed

+58
-44
lines changed

3 files changed

+58
-44
lines changed

include/net/inet_connection_sock.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -304,6 +304,10 @@ void inet_csk_listen_stop(struct sock *sk);
304304

305305
void inet_csk_addr2sockaddr(struct sock *sk, struct sockaddr *uaddr);
306306

307+
/* update the fast reuse flag when adding a socket */
308+
void inet_csk_update_fastreuse(struct inet_bind_bucket *tb,
309+
struct sock *sk);
310+
307311
struct dst_entry *inet_csk_update_pmtu(struct sock *sk, u32 mtu);
308312

309313
#define TCP_PINGPONG_THRESH 3

net/ipv4/inet_connection_sock.c

Lines changed: 53 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -296,6 +296,57 @@ static inline int sk_reuseport_match(struct inet_bind_bucket *tb,
296296
ipv6_only_sock(sk), true, false);
297297
}
298298

299+
void inet_csk_update_fastreuse(struct inet_bind_bucket *tb,
300+
struct sock *sk)
301+
{
302+
kuid_t uid = sock_i_uid(sk);
303+
bool reuse = sk->sk_reuse && sk->sk_state != TCP_LISTEN;
304+
305+
if (hlist_empty(&tb->owners)) {
306+
tb->fastreuse = reuse;
307+
if (sk->sk_reuseport) {
308+
tb->fastreuseport = FASTREUSEPORT_ANY;
309+
tb->fastuid = uid;
310+
tb->fast_rcv_saddr = sk->sk_rcv_saddr;
311+
tb->fast_ipv6_only = ipv6_only_sock(sk);
312+
tb->fast_sk_family = sk->sk_family;
313+
#if IS_ENABLED(CONFIG_IPV6)
314+
tb->fast_v6_rcv_saddr = sk->sk_v6_rcv_saddr;
315+
#endif
316+
} else {
317+
tb->fastreuseport = 0;
318+
}
319+
} else {
320+
if (!reuse)
321+
tb->fastreuse = 0;
322+
if (sk->sk_reuseport) {
323+
/* We didn't match or we don't have fastreuseport set on
324+
* the tb, but we have sk_reuseport set on this socket
325+
* and we know that there are no bind conflicts with
326+
* this socket in this tb, so reset our tb's reuseport
327+
* settings so that any subsequent sockets that match
328+
* our current socket will be put on the fast path.
329+
*
330+
* If we reset we need to set FASTREUSEPORT_STRICT so we
331+
* do extra checking for all subsequent sk_reuseport
332+
* socks.
333+
*/
334+
if (!sk_reuseport_match(tb, sk)) {
335+
tb->fastreuseport = FASTREUSEPORT_STRICT;
336+
tb->fastuid = uid;
337+
tb->fast_rcv_saddr = sk->sk_rcv_saddr;
338+
tb->fast_ipv6_only = ipv6_only_sock(sk);
339+
tb->fast_sk_family = sk->sk_family;
340+
#if IS_ENABLED(CONFIG_IPV6)
341+
tb->fast_v6_rcv_saddr = sk->sk_v6_rcv_saddr;
342+
#endif
343+
}
344+
} else {
345+
tb->fastreuseport = 0;
346+
}
347+
}
348+
}
349+
299350
/* Obtain a reference to a local port for the given sock,
300351
* if snum is zero it means select any available local port.
301352
* We try to allocate an odd port (and leave even ports for connect())
@@ -308,7 +359,6 @@ int inet_csk_get_port(struct sock *sk, unsigned short snum)
308359
struct inet_bind_hashbucket *head;
309360
struct net *net = sock_net(sk);
310361
struct inet_bind_bucket *tb = NULL;
311-
kuid_t uid = sock_i_uid(sk);
312362
int l3mdev;
313363

314364
l3mdev = inet_sk_bound_l3mdev(sk);
@@ -345,49 +395,8 @@ int inet_csk_get_port(struct sock *sk, unsigned short snum)
345395
goto fail_unlock;
346396
}
347397
success:
348-
if (hlist_empty(&tb->owners)) {
349-
tb->fastreuse = reuse;
350-
if (sk->sk_reuseport) {
351-
tb->fastreuseport = FASTREUSEPORT_ANY;
352-
tb->fastuid = uid;
353-
tb->fast_rcv_saddr = sk->sk_rcv_saddr;
354-
tb->fast_ipv6_only = ipv6_only_sock(sk);
355-
tb->fast_sk_family = sk->sk_family;
356-
#if IS_ENABLED(CONFIG_IPV6)
357-
tb->fast_v6_rcv_saddr = sk->sk_v6_rcv_saddr;
358-
#endif
359-
} else {
360-
tb->fastreuseport = 0;
361-
}
362-
} else {
363-
if (!reuse)
364-
tb->fastreuse = 0;
365-
if (sk->sk_reuseport) {
366-
/* We didn't match or we don't have fastreuseport set on
367-
* the tb, but we have sk_reuseport set on this socket
368-
* and we know that there are no bind conflicts with
369-
* this socket in this tb, so reset our tb's reuseport
370-
* settings so that any subsequent sockets that match
371-
* our current socket will be put on the fast path.
372-
*
373-
* If we reset we need to set FASTREUSEPORT_STRICT so we
374-
* do extra checking for all subsequent sk_reuseport
375-
* socks.
376-
*/
377-
if (!sk_reuseport_match(tb, sk)) {
378-
tb->fastreuseport = FASTREUSEPORT_STRICT;
379-
tb->fastuid = uid;
380-
tb->fast_rcv_saddr = sk->sk_rcv_saddr;
381-
tb->fast_ipv6_only = ipv6_only_sock(sk);
382-
tb->fast_sk_family = sk->sk_family;
383-
#if IS_ENABLED(CONFIG_IPV6)
384-
tb->fast_v6_rcv_saddr = sk->sk_v6_rcv_saddr;
385-
#endif
386-
}
387-
} else {
388-
tb->fastreuseport = 0;
389-
}
390-
}
398+
inet_csk_update_fastreuse(tb, sk);
399+
391400
if (!inet_csk(sk)->icsk_bind_hash)
392401
inet_bind_hash(sk, tb, port);
393402
WARN_ON(inet_csk(sk)->icsk_bind_hash != tb);

net/ipv4/inet_hashtables.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -163,6 +163,7 @@ int __inet_inherit_port(const struct sock *sk, struct sock *child)
163163
return -ENOMEM;
164164
}
165165
}
166+
inet_csk_update_fastreuse(tb, child);
166167
}
167168
inet_bind_hash(child, tb, port);
168169
spin_unlock(&head->lock);

0 commit comments

Comments
 (0)