Skip to content

Commit 0283636

Browse files
qsnklassert
authored andcommitted
espintcp: remove encap socket caching to avoid reference leak
The current scheme for caching the encap socket can lead to reference leaks when we try to delete the netns. The reference chain is: xfrm_state -> enacp_sk -> netns Since the encap socket is a userspace socket, it holds a reference on the netns. If we delete the espintcp state (through flush or individual delete) before removing the netns, the reference on the socket is dropped and the netns is correctly deleted. Otherwise, the netns may not be reachable anymore (if all processes within the ns have terminated), so we cannot delete the xfrm state to drop its reference on the socket. This patch results in a small (~2% in my tests) performance regression. A GC-type mechanism could be added for the socket cache, to clear references if the state hasn't been used "recently", but it's a lot more complex than just not caching the socket. Fixes: e27cca9 ("xfrm: add espintcp (RFC 8229)") Signed-off-by: Sabrina Dubroca <[email protected]> Reviewed-by: Simon Horman <[email protected]> Signed-off-by: Steffen Klassert <[email protected]>
1 parent 63c1f19 commit 0283636

File tree

4 files changed

+8
-94
lines changed

4 files changed

+8
-94
lines changed

include/net/xfrm.h

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -236,7 +236,6 @@ struct xfrm_state {
236236

237237
/* Data for encapsulator */
238238
struct xfrm_encap_tmpl *encap;
239-
struct sock __rcu *encap_sk;
240239

241240
/* NAT keepalive */
242241
u32 nat_keepalive_interval; /* seconds */

net/ipv4/esp4.c

Lines changed: 4 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -120,47 +120,16 @@ static void esp_ssg_unref(struct xfrm_state *x, void *tmp, struct sk_buff *skb)
120120
}
121121

122122
#ifdef CONFIG_INET_ESPINTCP
123-
struct esp_tcp_sk {
124-
struct sock *sk;
125-
struct rcu_head rcu;
126-
};
127-
128-
static void esp_free_tcp_sk(struct rcu_head *head)
129-
{
130-
struct esp_tcp_sk *esk = container_of(head, struct esp_tcp_sk, rcu);
131-
132-
sock_put(esk->sk);
133-
kfree(esk);
134-
}
135-
136123
static struct sock *esp_find_tcp_sk(struct xfrm_state *x)
137124
{
138125
struct xfrm_encap_tmpl *encap = x->encap;
139126
struct net *net = xs_net(x);
140-
struct esp_tcp_sk *esk;
141127
__be16 sport, dport;
142-
struct sock *nsk;
143128
struct sock *sk;
144129

145-
sk = rcu_dereference(x->encap_sk);
146-
if (sk && sk->sk_state == TCP_ESTABLISHED)
147-
return sk;
148-
149130
spin_lock_bh(&x->lock);
150131
sport = encap->encap_sport;
151132
dport = encap->encap_dport;
152-
nsk = rcu_dereference_protected(x->encap_sk,
153-
lockdep_is_held(&x->lock));
154-
if (sk && sk == nsk) {
155-
esk = kmalloc(sizeof(*esk), GFP_ATOMIC);
156-
if (!esk) {
157-
spin_unlock_bh(&x->lock);
158-
return ERR_PTR(-ENOMEM);
159-
}
160-
RCU_INIT_POINTER(x->encap_sk, NULL);
161-
esk->sk = sk;
162-
call_rcu(&esk->rcu, esp_free_tcp_sk);
163-
}
164133
spin_unlock_bh(&x->lock);
165134

166135
sk = inet_lookup_established(net, net->ipv4.tcp_death_row.hashinfo, x->id.daddr.a4,
@@ -173,20 +142,6 @@ static struct sock *esp_find_tcp_sk(struct xfrm_state *x)
173142
return ERR_PTR(-EINVAL);
174143
}
175144

176-
spin_lock_bh(&x->lock);
177-
nsk = rcu_dereference_protected(x->encap_sk,
178-
lockdep_is_held(&x->lock));
179-
if (encap->encap_sport != sport ||
180-
encap->encap_dport != dport) {
181-
sock_put(sk);
182-
sk = nsk ?: ERR_PTR(-EREMCHG);
183-
} else if (sk == nsk) {
184-
sock_put(sk);
185-
} else {
186-
rcu_assign_pointer(x->encap_sk, sk);
187-
}
188-
spin_unlock_bh(&x->lock);
189-
190145
return sk;
191146
}
192147

@@ -211,6 +166,8 @@ static int esp_output_tcp_finish(struct xfrm_state *x, struct sk_buff *skb)
211166
err = espintcp_push_skb(sk, skb);
212167
bh_unlock_sock(sk);
213168

169+
sock_put(sk);
170+
214171
out:
215172
rcu_read_unlock();
216173
return err;
@@ -394,6 +351,8 @@ static struct ip_esp_hdr *esp_output_tcp_encap(struct xfrm_state *x,
394351
if (IS_ERR(sk))
395352
return ERR_CAST(sk);
396353

354+
sock_put(sk);
355+
397356
*lenp = htons(len);
398357
esph = (struct ip_esp_hdr *)(lenp + 1);
399358

net/ipv6/esp6.c

Lines changed: 4 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -137,47 +137,16 @@ static void esp_ssg_unref(struct xfrm_state *x, void *tmp, struct sk_buff *skb)
137137
}
138138

139139
#ifdef CONFIG_INET6_ESPINTCP
140-
struct esp_tcp_sk {
141-
struct sock *sk;
142-
struct rcu_head rcu;
143-
};
144-
145-
static void esp_free_tcp_sk(struct rcu_head *head)
146-
{
147-
struct esp_tcp_sk *esk = container_of(head, struct esp_tcp_sk, rcu);
148-
149-
sock_put(esk->sk);
150-
kfree(esk);
151-
}
152-
153140
static struct sock *esp6_find_tcp_sk(struct xfrm_state *x)
154141
{
155142
struct xfrm_encap_tmpl *encap = x->encap;
156143
struct net *net = xs_net(x);
157-
struct esp_tcp_sk *esk;
158144
__be16 sport, dport;
159-
struct sock *nsk;
160145
struct sock *sk;
161146

162-
sk = rcu_dereference(x->encap_sk);
163-
if (sk && sk->sk_state == TCP_ESTABLISHED)
164-
return sk;
165-
166147
spin_lock_bh(&x->lock);
167148
sport = encap->encap_sport;
168149
dport = encap->encap_dport;
169-
nsk = rcu_dereference_protected(x->encap_sk,
170-
lockdep_is_held(&x->lock));
171-
if (sk && sk == nsk) {
172-
esk = kmalloc(sizeof(*esk), GFP_ATOMIC);
173-
if (!esk) {
174-
spin_unlock_bh(&x->lock);
175-
return ERR_PTR(-ENOMEM);
176-
}
177-
RCU_INIT_POINTER(x->encap_sk, NULL);
178-
esk->sk = sk;
179-
call_rcu(&esk->rcu, esp_free_tcp_sk);
180-
}
181150
spin_unlock_bh(&x->lock);
182151

183152
sk = __inet6_lookup_established(net, net->ipv4.tcp_death_row.hashinfo, &x->id.daddr.in6,
@@ -190,20 +159,6 @@ static struct sock *esp6_find_tcp_sk(struct xfrm_state *x)
190159
return ERR_PTR(-EINVAL);
191160
}
192161

193-
spin_lock_bh(&x->lock);
194-
nsk = rcu_dereference_protected(x->encap_sk,
195-
lockdep_is_held(&x->lock));
196-
if (encap->encap_sport != sport ||
197-
encap->encap_dport != dport) {
198-
sock_put(sk);
199-
sk = nsk ?: ERR_PTR(-EREMCHG);
200-
} else if (sk == nsk) {
201-
sock_put(sk);
202-
} else {
203-
rcu_assign_pointer(x->encap_sk, sk);
204-
}
205-
spin_unlock_bh(&x->lock);
206-
207162
return sk;
208163
}
209164

@@ -228,6 +183,8 @@ static int esp_output_tcp_finish(struct xfrm_state *x, struct sk_buff *skb)
228183
err = espintcp_push_skb(sk, skb);
229184
bh_unlock_sock(sk);
230185

186+
sock_put(sk);
187+
231188
out:
232189
rcu_read_unlock();
233190
return err;
@@ -424,6 +381,8 @@ static struct ip_esp_hdr *esp6_output_tcp_encap(struct xfrm_state *x,
424381
if (IS_ERR(sk))
425382
return ERR_CAST(sk);
426383

384+
sock_put(sk);
385+
427386
*lenp = htons(len);
428387
esph = (struct ip_esp_hdr *)(lenp + 1);
429388

net/xfrm/xfrm_state.c

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -838,9 +838,6 @@ int __xfrm_state_delete(struct xfrm_state *x)
838838
xfrm_nat_keepalive_state_updated(x);
839839
spin_unlock(&net->xfrm.xfrm_state_lock);
840840

841-
if (x->encap_sk)
842-
sock_put(rcu_dereference_raw(x->encap_sk));
843-
844841
xfrm_dev_state_delete(x);
845842

846843
/* All xfrm_state objects are created by xfrm_state_alloc.

0 commit comments

Comments
 (0)