Skip to content

Commit e377240

Browse files
committed
Merge branch 'xfrm: Support GRO decapsulation for ESP in UDP encapsulation'
Antony Antony says: ============ I have added how to enable this feature, and more description to the second patch. Here is copy of that. xfrm: Support GRO for IPv4i & IPv6 ESP in UDP encapsulation This patchset enables the GRO codepath for ESP in UDP encapsulated packets. Decapsulation happens at L2 and saves a full round through the stack for each packet. This is also needed to support HW offload for ESP in UDP encapsulation. Enabling this would imporove performance for ESP in UDP datapath, i.e IPsec with NAT in between. Our initial tests show 20% improvement. By default GRP for ESP-in-UDP is disabled for UDP sockets. To enable this feature for an ESP socket, the following two options need to be set: 1. enable ESP-in-UDP: (this is already set by an IKE daemon). int type = UDP_ENCAP_ESPINUDP; setsockopt(fd, SOL_UDP, UDP_ENCAP, &type, sizeof(type)); 2. To enable GRO for ESP in UDP socket: type = true; setsockopt(fd, SOL_UDP, UDP_GRO, &type, sizeof(type)); Enabling ESP-in-UDP has the side effect of preventing the Linux stack from seeing ESP packets at the L3 (when ESP OFFLOAD is disabled), as packets are immediately decapsulated from UDP and decrypted. This change may affect nftable rules that match on ESP packets at L3. Also tcpdump won't see the ESP packet. Developers/admins are advised to review and adapt any nftable rules accordingly before enabling this feature to prevent potential rule breakage. Also tcpdump will not see from ESP packets from a ESP in UDP flow when this is enabled. Initial, a quick test showed performance difference of about 20% impromvent on the receiver, when using iperf, tcp flow, over ESP in UDP. ============ Signed-off-by: Steffen Klassert <[email protected]>
2 parents 1d495f1 + 221ddb7 commit e377240

File tree

10 files changed

+192
-44
lines changed

10 files changed

+192
-44
lines changed

include/net/gro.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,7 @@ struct napi_gro_cb {
4141
/* Number of segments aggregated. */
4242
u16 count;
4343

44-
/* Used in ipv6_gro_receive() and foo-over-udp */
44+
/* Used in ipv6_gro_receive() and foo-over-udp and esp-in-udp */
4545
u16 proto;
4646

4747
/* Used in napi_gro_cb::free */

include/net/ipv6_stubs.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,9 @@ struct ipv6_stub {
6060
#if IS_ENABLED(CONFIG_XFRM)
6161
void (*xfrm6_local_rxpmtu)(struct sk_buff *skb, u32 mtu);
6262
int (*xfrm6_udp_encap_rcv)(struct sock *sk, struct sk_buff *skb);
63+
struct sk_buff *(*xfrm6_gro_udp_encap_rcv)(struct sock *sk,
64+
struct list_head *head,
65+
struct sk_buff *skb);
6366
int (*xfrm6_rcv_encap)(struct sk_buff *skb, int nexthdr, __be32 spi,
6467
int encap_type);
6568
#endif

include/net/xfrm.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1710,6 +1710,10 @@ int xfrm6_output(struct net *net, struct sock *sk, struct sk_buff *skb);
17101710
void xfrm6_local_rxpmtu(struct sk_buff *skb, u32 mtu);
17111711
int xfrm4_udp_encap_rcv(struct sock *sk, struct sk_buff *skb);
17121712
int xfrm6_udp_encap_rcv(struct sock *sk, struct sk_buff *skb);
1713+
struct sk_buff *xfrm4_gro_udp_encap_rcv(struct sock *sk, struct list_head *head,
1714+
struct sk_buff *skb);
1715+
struct sk_buff *xfrm6_gro_udp_encap_rcv(struct sock *sk, struct list_head *head,
1716+
struct sk_buff *skb);
17131717
int xfrm_user_policy(struct sock *sk, int optname, sockptr_t optval,
17141718
int optlen);
17151719
#else

net/ipv4/esp4_offload.c

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@ static struct sk_buff *esp4_gro_receive(struct list_head *head,
3333
int offset = skb_gro_offset(skb);
3434
struct xfrm_offload *xo;
3535
struct xfrm_state *x;
36+
int encap_type = 0;
3637
__be32 seq;
3738
__be32 spi;
3839

@@ -70,14 +71,17 @@ static struct sk_buff *esp4_gro_receive(struct list_head *head,
7071

7172
xo->flags |= XFRM_GRO;
7273

74+
if (NAPI_GRO_CB(skb)->proto == IPPROTO_UDP)
75+
encap_type = UDP_ENCAP_ESPINUDP;
76+
7377
XFRM_TUNNEL_SKB_CB(skb)->tunnel.ip4 = NULL;
7478
XFRM_SPI_SKB_CB(skb)->family = AF_INET;
7579
XFRM_SPI_SKB_CB(skb)->daddroff = offsetof(struct iphdr, daddr);
7680
XFRM_SPI_SKB_CB(skb)->seq = seq;
7781

7882
/* We don't need to handle errors from xfrm_input, it does all
7983
* the error handling and frees the resources on error. */
80-
xfrm_input(skb, IPPROTO_ESP, spi, -2);
84+
xfrm_input(skb, IPPROTO_ESP, spi, encap_type);
8185

8286
return ERR_PTR(-EINPROGRESS);
8387
out_reset:

net/ipv4/udp.c

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2625,6 +2625,19 @@ void udp_destroy_sock(struct sock *sk)
26252625
}
26262626
}
26272627

2628+
static void set_xfrm_gro_udp_encap_rcv(__u16 encap_type, unsigned short family,
2629+
struct sock *sk)
2630+
{
2631+
#ifdef CONFIG_XFRM
2632+
if (udp_test_bit(GRO_ENABLED, sk) && encap_type == UDP_ENCAP_ESPINUDP) {
2633+
if (family == AF_INET)
2634+
WRITE_ONCE(udp_sk(sk)->gro_receive, xfrm4_gro_udp_encap_rcv);
2635+
else if (IS_ENABLED(CONFIG_IPV6) && family == AF_INET6)
2636+
WRITE_ONCE(udp_sk(sk)->gro_receive, ipv6_stub->xfrm6_gro_udp_encap_rcv);
2637+
}
2638+
#endif
2639+
}
2640+
26282641
/*
26292642
* Socket option code for UDP
26302643
*/
@@ -2674,6 +2687,8 @@ int udp_lib_setsockopt(struct sock *sk, int level, int optname,
26742687
case 0:
26752688
#ifdef CONFIG_XFRM
26762689
case UDP_ENCAP_ESPINUDP:
2690+
set_xfrm_gro_udp_encap_rcv(val, sk->sk_family, sk);
2691+
fallthrough;
26772692
case UDP_ENCAP_ESPINUDP_NON_IKE:
26782693
#if IS_ENABLED(CONFIG_IPV6)
26792694
if (sk->sk_family == AF_INET6)
@@ -2716,6 +2731,7 @@ int udp_lib_setsockopt(struct sock *sk, int level, int optname,
27162731
udp_tunnel_encap_enable(sk);
27172732
udp_assign_bit(GRO_ENABLED, sk, valbool);
27182733
udp_assign_bit(ACCEPT_L4, sk, valbool);
2734+
set_xfrm_gro_udp_encap_rcv(up->encap_type, sk->sk_family, sk);
27192735
break;
27202736

27212737
/*

net/ipv4/xfrm4_input.c

Lines changed: 76 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,8 @@
1717
#include <linux/netfilter_ipv4.h>
1818
#include <net/ip.h>
1919
#include <net/xfrm.h>
20+
#include <net/protocol.h>
21+
#include <net/gro.h>
2022

2123
static int xfrm4_rcv_encap_finish2(struct net *net, struct sock *sk,
2224
struct sk_buff *skb)
@@ -72,14 +74,7 @@ int xfrm4_transport_finish(struct sk_buff *skb, int async)
7274
return 0;
7375
}
7476

75-
/* If it's a keepalive packet, then just eat it.
76-
* If it's an encapsulated packet, then pass it to the
77-
* IPsec xfrm input.
78-
* Returns 0 if skb passed to xfrm or was dropped.
79-
* Returns >0 if skb should be passed to UDP.
80-
* Returns <0 if skb should be resubmitted (-ret is protocol)
81-
*/
82-
int xfrm4_udp_encap_rcv(struct sock *sk, struct sk_buff *skb)
77+
static int __xfrm4_udp_encap_rcv(struct sock *sk, struct sk_buff *skb, bool pull)
8378
{
8479
struct udp_sock *up = udp_sk(sk);
8580
struct udphdr *uh;
@@ -110,7 +105,7 @@ int xfrm4_udp_encap_rcv(struct sock *sk, struct sk_buff *skb)
110105
case UDP_ENCAP_ESPINUDP:
111106
/* Check if this is a keepalive packet. If so, eat it. */
112107
if (len == 1 && udpdata[0] == 0xff) {
113-
goto drop;
108+
return -EINVAL;
114109
} else if (len > sizeof(struct ip_esp_hdr) && udpdata32[0] != 0) {
115110
/* ESP Packet without Non-ESP header */
116111
len = sizeof(struct udphdr);
@@ -121,7 +116,7 @@ int xfrm4_udp_encap_rcv(struct sock *sk, struct sk_buff *skb)
121116
case UDP_ENCAP_ESPINUDP_NON_IKE:
122117
/* Check if this is a keepalive packet. If so, eat it. */
123118
if (len == 1 && udpdata[0] == 0xff) {
124-
goto drop;
119+
return -EINVAL;
125120
} else if (len > 2 * sizeof(u32) + sizeof(struct ip_esp_hdr) &&
126121
udpdata32[0] == 0 && udpdata32[1] == 0) {
127122

@@ -139,33 +134,96 @@ int xfrm4_udp_encap_rcv(struct sock *sk, struct sk_buff *skb)
139134
* protocol to ESP, and then call into the transform receiver.
140135
*/
141136
if (skb_unclone(skb, GFP_ATOMIC))
142-
goto drop;
137+
return -EINVAL;
143138

144139
/* Now we can update and verify the packet length... */
145140
iph = ip_hdr(skb);
146141
iphlen = iph->ihl << 2;
147142
iph->tot_len = htons(ntohs(iph->tot_len) - len);
148143
if (skb->len < iphlen + len) {
149144
/* packet is too small!?! */
150-
goto drop;
145+
return -EINVAL;
151146
}
152147

153148
/* pull the data buffer up to the ESP header and set the
154149
* transport header to point to ESP. Keep UDP on the stack
155150
* for later.
156151
*/
157-
__skb_pull(skb, len);
158-
skb_reset_transport_header(skb);
152+
if (pull) {
153+
__skb_pull(skb, len);
154+
skb_reset_transport_header(skb);
155+
} else {
156+
skb_set_transport_header(skb, len);
157+
}
159158

160159
/* process ESP */
161-
return xfrm4_rcv_encap(skb, IPPROTO_ESP, 0, encap_type);
162-
163-
drop:
164-
kfree_skb(skb);
165160
return 0;
166161
}
167162
EXPORT_SYMBOL(xfrm4_udp_encap_rcv);
168163

164+
/* If it's a keepalive packet, then just eat it.
165+
* If it's an encapsulated packet, then pass it to the
166+
* IPsec xfrm input.
167+
* Returns 0 if skb passed to xfrm or was dropped.
168+
* Returns >0 if skb should be passed to UDP.
169+
* Returns <0 if skb should be resubmitted (-ret is protocol)
170+
*/
171+
int xfrm4_udp_encap_rcv(struct sock *sk, struct sk_buff *skb)
172+
{
173+
int ret;
174+
175+
ret = __xfrm4_udp_encap_rcv(sk, skb, true);
176+
if (!ret)
177+
return xfrm4_rcv_encap(skb, IPPROTO_ESP, 0,
178+
udp_sk(sk)->encap_type);
179+
180+
if (ret < 0) {
181+
kfree_skb(skb);
182+
return 0;
183+
}
184+
185+
return ret;
186+
}
187+
188+
struct sk_buff *xfrm4_gro_udp_encap_rcv(struct sock *sk, struct list_head *head,
189+
struct sk_buff *skb)
190+
{
191+
int offset = skb_gro_offset(skb);
192+
const struct net_offload *ops;
193+
struct sk_buff *pp = NULL;
194+
int ret;
195+
196+
offset = offset - sizeof(struct udphdr);
197+
198+
if (!pskb_pull(skb, offset))
199+
return NULL;
200+
201+
rcu_read_lock();
202+
ops = rcu_dereference(inet_offloads[IPPROTO_ESP]);
203+
if (!ops || !ops->callbacks.gro_receive)
204+
goto out;
205+
206+
ret = __xfrm4_udp_encap_rcv(sk, skb, false);
207+
if (ret)
208+
goto out;
209+
210+
skb_push(skb, offset);
211+
NAPI_GRO_CB(skb)->proto = IPPROTO_UDP;
212+
213+
pp = call_gro_receive(ops->callbacks.gro_receive, head, skb);
214+
rcu_read_unlock();
215+
216+
return pp;
217+
218+
out:
219+
rcu_read_unlock();
220+
skb_push(skb, offset);
221+
NAPI_GRO_CB(skb)->same_flow = 0;
222+
NAPI_GRO_CB(skb)->flush = 1;
223+
224+
return NULL;
225+
}
226+
169227
int xfrm4_rcv(struct sk_buff *skb)
170228
{
171229
return xfrm4_rcv_spi(skb, ip_hdr(skb)->protocol, 0);

net/ipv6/af_inet6.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1049,6 +1049,7 @@ static const struct ipv6_stub ipv6_stub_impl = {
10491049
#if IS_ENABLED(CONFIG_XFRM)
10501050
.xfrm6_local_rxpmtu = xfrm6_local_rxpmtu,
10511051
.xfrm6_udp_encap_rcv = xfrm6_udp_encap_rcv,
1052+
.xfrm6_gro_udp_encap_rcv = xfrm6_gro_udp_encap_rcv,
10521053
.xfrm6_rcv_encap = xfrm6_rcv_encap,
10531054
#endif
10541055
.nd_tbl = &nd_tbl,

net/ipv6/esp6_offload.c

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,9 @@ static __u16 esp6_nexthdr_esp_offset(struct ipv6hdr *ipv6_hdr, int nhlen)
3434
int off = sizeof(struct ipv6hdr);
3535
struct ipv6_opt_hdr *exthdr;
3636

37-
if (likely(ipv6_hdr->nexthdr == NEXTHDR_ESP))
37+
/* ESP or ESPINUDP */
38+
if (likely(ipv6_hdr->nexthdr == NEXTHDR_ESP ||
39+
ipv6_hdr->nexthdr == NEXTHDR_UDP))
3840
return offsetof(struct ipv6hdr, nexthdr);
3941

4042
while (off < nhlen) {
@@ -54,10 +56,14 @@ static struct sk_buff *esp6_gro_receive(struct list_head *head,
5456
int offset = skb_gro_offset(skb);
5557
struct xfrm_offload *xo;
5658
struct xfrm_state *x;
59+
int encap_type = 0;
5760
__be32 seq;
5861
__be32 spi;
5962
int nhoff;
6063

64+
if (NAPI_GRO_CB(skb)->proto == IPPROTO_UDP)
65+
encap_type = UDP_ENCAP_ESPINUDP;
66+
6167
if (!pskb_pull(skb, offset))
6268
return NULL;
6369

@@ -104,7 +110,7 @@ static struct sk_buff *esp6_gro_receive(struct list_head *head,
104110

105111
/* We don't need to handle errors from xfrm_input, it does all
106112
* the error handling and frees the resources on error. */
107-
xfrm_input(skb, IPPROTO_ESP, spi, -2);
113+
xfrm_input(skb, IPPROTO_ESP, spi, encap_type);
108114

109115
return ERR_PTR(-EINPROGRESS);
110116
out_reset:

0 commit comments

Comments
 (0)