Skip to content

Commit 5f12303

Browse files
committed
Merge branch 'tcp-refactor-bhash2'
Kuniyuki Iwashima says: ==================== tcp: Refactor bhash2 and remove sk_bind2_node. This series refactors code around bhash2 and remove some bhash2-specific fields; sock.sk_bind2_node, and inet_timewait_sock.tw_bind2_node. patch 1 : optimise bind() for non-wildcard v4-mapped-v6 address patch 2 - 4 : optimise bind() conflict tests patch 5 - 12 : Link bhash2 to bhash and unlink sk from bhash2 to remove sk_bind2_node The patch 8 will trigger a false-positive error by checkpatch. v2: resend of https://lore.kernel.org/netdev/[email protected]/ * Rebase on latest net-next * Patch 11 * Add change in inet_diag_dump_icsk() for recent bhash dump patch v1: https://lore.kernel.org/netdev/[email protected]/ ==================== Signed-off-by: David S. Miller <[email protected]>
2 parents 67f440c + 8191792 commit 5f12303

File tree

8 files changed

+92
-146
lines changed

8 files changed

+92
-146
lines changed

include/net/inet_hashtables.h

Lines changed: 8 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -88,30 +88,25 @@ struct inet_bind_bucket {
8888
unsigned short fast_sk_family;
8989
bool fast_ipv6_only;
9090
struct hlist_node node;
91-
struct hlist_head owners;
91+
struct hlist_head bhash2;
9292
};
9393

9494
struct inet_bind2_bucket {
9595
possible_net_t ib_net;
9696
int l3mdev;
9797
unsigned short port;
9898
#if IS_ENABLED(CONFIG_IPV6)
99-
unsigned short family;
100-
#endif
101-
union {
102-
#if IS_ENABLED(CONFIG_IPV6)
103-
struct in6_addr v6_rcv_saddr;
99+
unsigned short addr_type;
100+
struct in6_addr v6_rcv_saddr;
101+
#define rcv_saddr v6_rcv_saddr.s6_addr32[3]
102+
#else
103+
__be32 rcv_saddr;
104104
#endif
105-
__be32 rcv_saddr;
106-
};
107105
/* Node in the bhash2 inet_bind_hashbucket chain */
108106
struct hlist_node node;
107+
struct hlist_node bhash_node;
109108
/* List of sockets hashed to this bucket */
110109
struct hlist_head owners;
111-
/* bhash has twsk in owners, but bhash2 has twsk in
112-
* deathrow not to add a member in struct sock_common.
113-
*/
114-
struct hlist_head deathrow;
115110
};
116111

117112
static inline struct net *ib_net(const struct inet_bind_bucket *ib)
@@ -241,7 +236,7 @@ bool inet_bind_bucket_match(const struct inet_bind_bucket *tb,
241236
struct inet_bind2_bucket *
242237
inet_bind2_bucket_create(struct kmem_cache *cachep, struct net *net,
243238
struct inet_bind_hashbucket *head,
244-
unsigned short port, int l3mdev,
239+
struct inet_bind_bucket *tb,
245240
const struct sock *sk);
246241

247242
void inet_bind2_bucket_destroy(struct kmem_cache *cachep,

include/net/inet_timewait_sock.h

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -75,13 +75,9 @@ struct inet_timewait_sock {
7575
struct timer_list tw_timer;
7676
struct inet_bind_bucket *tw_tb;
7777
struct inet_bind2_bucket *tw_tb2;
78-
struct hlist_node tw_bind2_node;
7978
};
8079
#define tw_tclass tw_tos
8180

82-
#define twsk_for_each_bound_bhash2(__tw, list) \
83-
hlist_for_each_entry(__tw, list, tw_bind2_node)
84-
8581
static inline struct inet_timewait_sock *inet_twsk(const struct sock *sk)
8682
{
8783
return (struct inet_timewait_sock *)sk;

include/net/ipv6.h

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -784,11 +784,6 @@ static inline bool ipv6_addr_v4mapped(const struct in6_addr *a)
784784
cpu_to_be32(0x0000ffff))) == 0UL;
785785
}
786786

787-
static inline bool ipv6_addr_v4mapped_any(const struct in6_addr *a)
788-
{
789-
return ipv6_addr_v4mapped(a) && ipv4_is_zeronet(a->s6_addr32[3]);
790-
}
791-
792787
static inline bool ipv6_addr_v4mapped_loopback(const struct in6_addr *a)
793788
{
794789
return ipv6_addr_v4mapped(a) && ipv4_is_loopback(a->s6_addr32[3]);

include/net/sock.h

Lines changed: 0 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -352,7 +352,6 @@ struct sk_filter;
352352
* @sk_txtime_report_errors: set report errors mode for SO_TXTIME
353353
* @sk_txtime_unused: unused txtime flags
354354
* @ns_tracker: tracker for netns reference
355-
* @sk_bind2_node: bind node in the bhash2 table
356355
*/
357356
struct sock {
358357
/*
@@ -544,7 +543,6 @@ struct sock {
544543
#endif
545544
struct rcu_head sk_rcu;
546545
netns_tracker ns_tracker;
547-
struct hlist_node sk_bind2_node;
548546
};
549547

550548
enum sk_pacing {
@@ -873,16 +871,6 @@ static inline void sk_add_bind_node(struct sock *sk,
873871
hlist_add_head(&sk->sk_bind_node, list);
874872
}
875873

876-
static inline void __sk_del_bind2_node(struct sock *sk)
877-
{
878-
__hlist_del(&sk->sk_bind2_node);
879-
}
880-
881-
static inline void sk_add_bind2_node(struct sock *sk, struct hlist_head *list)
882-
{
883-
hlist_add_head(&sk->sk_bind2_node, list);
884-
}
885-
886874
#define sk_for_each(__sk, list) \
887875
hlist_for_each_entry(__sk, list, sk_node)
888876
#define sk_for_each_rcu(__sk, list) \
@@ -900,8 +888,6 @@ static inline void sk_add_bind2_node(struct sock *sk, struct hlist_head *list)
900888
hlist_for_each_entry_safe(__sk, tmp, list, sk_node)
901889
#define sk_for_each_bound(__sk, list) \
902890
hlist_for_each_entry(__sk, list, sk_bind_node)
903-
#define sk_for_each_bound_bhash2(__sk, list) \
904-
hlist_for_each_entry(__sk, list, sk_bind2_node)
905891

906892
/**
907893
* sk_for_each_entry_offset_rcu - iterate over a list at a given struct offset

net/ipv4/inet_connection_sock.c

Lines changed: 34 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -159,8 +159,11 @@ static bool inet_use_bhash2_on_bind(const struct sock *sk)
159159
if (sk->sk_family == AF_INET6) {
160160
int addr_type = ipv6_addr_type(&sk->sk_v6_rcv_saddr);
161161

162-
return addr_type != IPV6_ADDR_ANY &&
163-
addr_type != IPV6_ADDR_MAPPED;
162+
if (addr_type == IPV6_ADDR_ANY)
163+
return false;
164+
165+
if (addr_type != IPV6_ADDR_MAPPED)
166+
return true;
164167
}
165168
#endif
166169
return sk->sk_rcv_saddr != htonl(INADDR_ANY);
@@ -213,18 +216,9 @@ static bool inet_bhash2_conflict(const struct sock *sk,
213216
bool relax, bool reuseport_cb_ok,
214217
bool reuseport_ok)
215218
{
216-
struct inet_timewait_sock *tw2;
217219
struct sock *sk2;
218220

219-
sk_for_each_bound_bhash2(sk2, &tb2->owners) {
220-
if (__inet_bhash2_conflict(sk, sk2, sk_uid, relax,
221-
reuseport_cb_ok, reuseport_ok))
222-
return true;
223-
}
224-
225-
twsk_for_each_bound_bhash2(tw2, &tb2->deathrow) {
226-
sk2 = (struct sock *)tw2;
227-
221+
sk_for_each_bound(sk2, &tb2->owners) {
228222
if (__inet_bhash2_conflict(sk, sk2, sk_uid, relax,
229223
reuseport_cb_ok, reuseport_ok))
230224
return true;
@@ -233,48 +227,50 @@ static bool inet_bhash2_conflict(const struct sock *sk,
233227
return false;
234228
}
235229

230+
#define sk_for_each_bound_bhash(__sk, __tb2, __tb) \
231+
hlist_for_each_entry(__tb2, &(__tb)->bhash2, bhash_node) \
232+
sk_for_each_bound(sk2, &(__tb2)->owners)
233+
236234
/* This should be called only when the tb and tb2 hashbuckets' locks are held */
237235
static int inet_csk_bind_conflict(const struct sock *sk,
238236
const struct inet_bind_bucket *tb,
239237
const struct inet_bind2_bucket *tb2, /* may be null */
240238
bool relax, bool reuseport_ok)
241239
{
242-
bool reuseport_cb_ok;
243-
struct sock_reuseport *reuseport_cb;
244240
kuid_t uid = sock_i_uid((struct sock *)sk);
241+
struct sock_reuseport *reuseport_cb;
242+
bool reuseport_cb_ok;
243+
struct sock *sk2;
245244

246245
rcu_read_lock();
247246
reuseport_cb = rcu_dereference(sk->sk_reuseport_cb);
248247
/* paired with WRITE_ONCE() in __reuseport_(add|detach)_closed_sock */
249248
reuseport_cb_ok = !reuseport_cb || READ_ONCE(reuseport_cb->num_closed_socks);
250249
rcu_read_unlock();
251250

252-
/*
253-
* Unlike other sk lookup places we do not check
251+
/* Conflicts with an existing IPV6_ADDR_ANY (if ipv6) or INADDR_ANY (if
252+
* ipv4) should have been checked already. We need to do these two
253+
* checks separately because their spinlocks have to be acquired/released
254+
* independently of each other, to prevent possible deadlocks
255+
*/
256+
if (inet_use_bhash2_on_bind(sk))
257+
return tb2 && inet_bhash2_conflict(sk, tb2, uid, relax,
258+
reuseport_cb_ok, reuseport_ok);
259+
260+
/* Unlike other sk lookup places we do not check
254261
* for sk_net here, since _all_ the socks listed
255262
* in tb->owners and tb2->owners list belong
256263
* to the same net - the one this bucket belongs to.
257264
*/
265+
sk_for_each_bound_bhash(sk2, tb2, tb) {
266+
if (!inet_bind_conflict(sk, sk2, uid, relax, reuseport_cb_ok, reuseport_ok))
267+
continue;
258268

259-
if (!inet_use_bhash2_on_bind(sk)) {
260-
struct sock *sk2;
261-
262-
sk_for_each_bound(sk2, &tb->owners)
263-
if (inet_bind_conflict(sk, sk2, uid, relax,
264-
reuseport_cb_ok, reuseport_ok) &&
265-
inet_rcv_saddr_equal(sk, sk2, true))
266-
return true;
267-
268-
return false;
269+
if (inet_rcv_saddr_equal(sk, sk2, true))
270+
return true;
269271
}
270272

271-
/* Conflicts with an existing IPV6_ADDR_ANY (if ipv6) or INADDR_ANY (if
272-
* ipv4) should have been checked already. We need to do these two
273-
* checks separately because their spinlocks have to be acquired/released
274-
* independently of each other, to prevent possible deadlocks
275-
*/
276-
return tb2 && inet_bhash2_conflict(sk, tb2, uid, relax, reuseport_cb_ok,
277-
reuseport_ok);
273+
return false;
278274
}
279275

280276
/* Determine if there is a bind conflict with an existing IPV6_ADDR_ANY (if ipv6) or
@@ -457,7 +453,7 @@ void inet_csk_update_fastreuse(struct inet_bind_bucket *tb,
457453
kuid_t uid = sock_i_uid(sk);
458454
bool reuse = sk->sk_reuse && sk->sk_state != TCP_LISTEN;
459455

460-
if (hlist_empty(&tb->owners)) {
456+
if (hlist_empty(&tb->bhash2)) {
461457
tb->fastreuse = reuse;
462458
if (sk->sk_reuseport) {
463459
tb->fastreuseport = FASTREUSEPORT_ANY;
@@ -549,7 +545,7 @@ int inet_csk_get_port(struct sock *sk, unsigned short snum)
549545
}
550546

551547
if (!found_port) {
552-
if (!hlist_empty(&tb->owners)) {
548+
if (!hlist_empty(&tb->bhash2)) {
553549
if (sk->sk_reuse == SK_FORCE_REUSE ||
554550
(tb->fastreuse > 0 && reuse) ||
555551
sk_reuseport_match(tb, sk))
@@ -569,7 +565,7 @@ int inet_csk_get_port(struct sock *sk, unsigned short snum)
569565

570566
if (!tb2) {
571567
tb2 = inet_bind2_bucket_create(hinfo->bind2_bucket_cachep,
572-
net, head2, port, l3mdev, sk);
568+
net, head2, tb, sk);
573569
if (!tb2)
574570
goto fail_unlock;
575571
bhash2_created = true;
@@ -591,11 +587,10 @@ int inet_csk_get_port(struct sock *sk, unsigned short snum)
591587

592588
fail_unlock:
593589
if (ret) {
590+
if (bhash2_created)
591+
inet_bind2_bucket_destroy(hinfo->bind2_bucket_cachep, tb2);
594592
if (bhash_created)
595593
inet_bind_bucket_destroy(hinfo->bind_bucket_cachep, tb);
596-
if (bhash2_created)
597-
inet_bind2_bucket_destroy(hinfo->bind2_bucket_cachep,
598-
tb2);
599594
}
600595
if (head2_lock_acquired)
601596
spin_unlock(&head2->lock);

net/ipv4/inet_diag.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1104,7 +1104,7 @@ void inet_diag_dump_icsk(struct inet_hashinfo *hashinfo, struct sk_buff *skb,
11041104
if (!net_eq(ib2_net(tb2), net))
11051105
continue;
11061106

1107-
sk_for_each_bound_bhash2(sk, &tb2->owners) {
1107+
sk_for_each_bound(sk, &tb2->owners) {
11081108
struct inet_sock *inet = inet_sk(sk);
11091109

11101110
if (num < s_num)

0 commit comments

Comments
 (0)