Skip to content

Commit 8c4e479

Browse files
author
Paolo Abeni
committed
Merge branch 'add-tcp-fraglist-gro-support'
Felix Fietkau says: ==================== Add TCP fraglist GRO support When forwarding TCP after GRO, software segmentation is very expensive, especially when the checksum needs to be recalculated. One case where that's currently unavoidable is when routing packets over PPPoE. Performance improves significantly when using fraglist GRO implemented in the same way as for UDP. When NETIF_F_GRO_FRAGLIST is enabled, perform a lookup for an established socket in the same netns as the receiving device. While this may not cover all relevant use cases in multi-netns configurations, it should be good enough for most configurations that need this. Here's a measurement of running 2 TCP streams through a MediaTek MT7622 device (2-core Cortex-A53), which runs NAT with flow offload enabled from one ethernet port to PPPoE on another ethernet port + cake qdisc set to 1Gbps. rx-gro-list off: 630 Mbit/s, CPU 35% idle rx-gro-list on: 770 Mbit/s, CPU 40% idle Changes since v4: - add likely() to prefer the non-fraglist path in check Changes since v3: - optimize __tcpv4_gso_segment_csum - add unlikely() - reorder dev_net/skb_gro_network_header calls after NETIF_F_GRO_FRAGLIST check - add support for ipv6 nat - drop redundant pskb_may_pull check Changes since v2: - create tcp_gro_header_pull helper function to pull tcp header only once - optimize __tcpv4_gso_segment_list_csum, drop obsolete flags check Changes since v1: - revert bogus tcp flags overwrite on segmentation - fix kbuild issue with !CONFIG_IPV6 - only perform socket lookup for the first skb in the GRO train Changes since RFC: - split up patches - handle TCP flags mutations ==================== Link: https://lore.kernel.org/r/[email protected] Signed-off-by: Paolo Abeni <[email protected]>
2 parents b1de3c0 + c9d1d23 commit 8c4e479

File tree

6 files changed

+325
-69
lines changed

6 files changed

+325
-69
lines changed

include/net/gro.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -438,6 +438,7 @@ static inline __wsum ip6_gro_compute_pseudo(const struct sk_buff *skb,
438438
}
439439

440440
int skb_gro_receive(struct sk_buff *p, struct sk_buff *skb);
441+
int skb_gro_receive_list(struct sk_buff *p, struct sk_buff *skb);
441442

442443
/* Pass the currently batched GRO_NORMAL SKBs up to the stack. */
443444
static inline void gro_normal_list(struct napi_struct *napi)

include/net/tcp.h

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2191,7 +2191,10 @@ void tcp_v4_destroy_sock(struct sock *sk);
21912191

21922192
struct sk_buff *tcp_gso_segment(struct sk_buff *skb,
21932193
netdev_features_t features);
2194-
struct sk_buff *tcp_gro_receive(struct list_head *head, struct sk_buff *skb);
2194+
struct tcphdr *tcp_gro_pull_header(struct sk_buff *skb);
2195+
struct sk_buff *tcp_gro_lookup(struct list_head *head, struct tcphdr *th);
2196+
struct sk_buff *tcp_gro_receive(struct list_head *head, struct sk_buff *skb,
2197+
struct tcphdr *th);
21952198
INDIRECT_CALLABLE_DECLARE(int tcp4_gro_complete(struct sk_buff *skb, int thoff));
21962199
INDIRECT_CALLABLE_DECLARE(struct sk_buff *tcp4_gro_receive(struct list_head *head, struct sk_buff *skb));
21972200
INDIRECT_CALLABLE_DECLARE(int tcp6_gro_complete(struct sk_buff *skb, int thoff));

net/core/gro.c

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -231,6 +231,33 @@ int skb_gro_receive(struct sk_buff *p, struct sk_buff *skb)
231231
return 0;
232232
}
233233

234+
int skb_gro_receive_list(struct sk_buff *p, struct sk_buff *skb)
235+
{
236+
if (unlikely(p->len + skb->len >= 65536))
237+
return -E2BIG;
238+
239+
if (NAPI_GRO_CB(p)->last == p)
240+
skb_shinfo(p)->frag_list = skb;
241+
else
242+
NAPI_GRO_CB(p)->last->next = skb;
243+
244+
skb_pull(skb, skb_gro_offset(skb));
245+
246+
NAPI_GRO_CB(p)->last = skb;
247+
NAPI_GRO_CB(p)->count++;
248+
p->data_len += skb->len;
249+
250+
/* sk ownership - if any - completely transferred to the aggregated packet */
251+
skb->destructor = NULL;
252+
skb->sk = NULL;
253+
p->truesize += skb->truesize;
254+
p->len += skb->len;
255+
256+
NAPI_GRO_CB(skb)->same_flow = 1;
257+
258+
return 0;
259+
}
260+
234261

235262
static void napi_gro_complete(struct napi_struct *napi, struct sk_buff *skb)
236263
{

net/ipv4/tcp_offload.c

Lines changed: 178 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,70 @@ static void tcp_gso_tstamp(struct sk_buff *skb, unsigned int ts_seq,
2828
}
2929
}
3030

31+
static void __tcpv4_gso_segment_csum(struct sk_buff *seg,
32+
__be32 *oldip, __be32 newip,
33+
__be16 *oldport, __be16 newport)
34+
{
35+
struct tcphdr *th;
36+
struct iphdr *iph;
37+
38+
if (*oldip == newip && *oldport == newport)
39+
return;
40+
41+
th = tcp_hdr(seg);
42+
iph = ip_hdr(seg);
43+
44+
inet_proto_csum_replace4(&th->check, seg, *oldip, newip, true);
45+
inet_proto_csum_replace2(&th->check, seg, *oldport, newport, false);
46+
*oldport = newport;
47+
48+
csum_replace4(&iph->check, *oldip, newip);
49+
*oldip = newip;
50+
}
51+
52+
static struct sk_buff *__tcpv4_gso_segment_list_csum(struct sk_buff *segs)
53+
{
54+
const struct tcphdr *th;
55+
const struct iphdr *iph;
56+
struct sk_buff *seg;
57+
struct tcphdr *th2;
58+
struct iphdr *iph2;
59+
60+
seg = segs;
61+
th = tcp_hdr(seg);
62+
iph = ip_hdr(seg);
63+
th2 = tcp_hdr(seg->next);
64+
iph2 = ip_hdr(seg->next);
65+
66+
if (!(*(const u32 *)&th->source ^ *(const u32 *)&th2->source) &&
67+
iph->daddr == iph2->daddr && iph->saddr == iph2->saddr)
68+
return segs;
69+
70+
while ((seg = seg->next)) {
71+
th2 = tcp_hdr(seg);
72+
iph2 = ip_hdr(seg);
73+
74+
__tcpv4_gso_segment_csum(seg,
75+
&iph2->saddr, iph->saddr,
76+
&th2->source, th->source);
77+
__tcpv4_gso_segment_csum(seg,
78+
&iph2->daddr, iph->daddr,
79+
&th2->dest, th->dest);
80+
}
81+
82+
return segs;
83+
}
84+
85+
static struct sk_buff *__tcp4_gso_segment_list(struct sk_buff *skb,
86+
netdev_features_t features)
87+
{
88+
skb = skb_segment_list(skb, features, skb_mac_header_len(skb));
89+
if (IS_ERR(skb))
90+
return skb;
91+
92+
return __tcpv4_gso_segment_list_csum(skb);
93+
}
94+
3195
static struct sk_buff *tcp4_gso_segment(struct sk_buff *skb,
3296
netdev_features_t features)
3397
{
@@ -37,6 +101,9 @@ static struct sk_buff *tcp4_gso_segment(struct sk_buff *skb,
37101
if (!pskb_may_pull(skb, sizeof(struct tcphdr)))
38102
return ERR_PTR(-EINVAL);
39103

104+
if (skb_shinfo(skb)->gso_type & SKB_GSO_FRAGLIST)
105+
return __tcp4_gso_segment_list(skb, features);
106+
40107
if (unlikely(skb->ip_summed != CHECKSUM_PARTIAL)) {
41108
const struct iphdr *iph = ip_hdr(skb);
42109
struct tcphdr *th = tcp_hdr(skb);
@@ -178,61 +245,76 @@ struct sk_buff *tcp_gso_segment(struct sk_buff *skb,
178245
return segs;
179246
}
180247

181-
struct sk_buff *tcp_gro_receive(struct list_head *head, struct sk_buff *skb)
248+
struct sk_buff *tcp_gro_lookup(struct list_head *head, struct tcphdr *th)
182249
{
183-
struct sk_buff *pp = NULL;
250+
struct tcphdr *th2;
184251
struct sk_buff *p;
252+
253+
list_for_each_entry(p, head, list) {
254+
if (!NAPI_GRO_CB(p)->same_flow)
255+
continue;
256+
257+
th2 = tcp_hdr(p);
258+
if (*(u32 *)&th->source ^ *(u32 *)&th2->source) {
259+
NAPI_GRO_CB(p)->same_flow = 0;
260+
continue;
261+
}
262+
263+
return p;
264+
}
265+
266+
return NULL;
267+
}
268+
269+
struct tcphdr *tcp_gro_pull_header(struct sk_buff *skb)
270+
{
271+
unsigned int thlen, hlen, off;
185272
struct tcphdr *th;
186-
struct tcphdr *th2;
187-
unsigned int len;
188-
unsigned int thlen;
189-
__be32 flags;
190-
unsigned int mss = 1;
191-
unsigned int hlen;
192-
unsigned int off;
193-
int flush = 1;
194-
int i;
195273

196274
off = skb_gro_offset(skb);
197275
hlen = off + sizeof(*th);
198276
th = skb_gro_header(skb, hlen, off);
199277
if (unlikely(!th))
200-
goto out;
278+
return NULL;
201279

202280
thlen = th->doff * 4;
203281
if (thlen < sizeof(*th))
204-
goto out;
282+
return NULL;
205283

206284
hlen = off + thlen;
207285
if (!skb_gro_may_pull(skb, hlen)) {
208286
th = skb_gro_header_slow(skb, hlen, off);
209287
if (unlikely(!th))
210-
goto out;
288+
return NULL;
211289
}
212290

213291
skb_gro_pull(skb, thlen);
214292

215-
len = skb_gro_len(skb);
216-
flags = tcp_flag_word(th);
217-
218-
list_for_each_entry(p, head, list) {
219-
if (!NAPI_GRO_CB(p)->same_flow)
220-
continue;
293+
return th;
294+
}
221295

222-
th2 = tcp_hdr(p);
296+
struct sk_buff *tcp_gro_receive(struct list_head *head, struct sk_buff *skb,
297+
struct tcphdr *th)
298+
{
299+
unsigned int thlen = th->doff * 4;
300+
struct sk_buff *pp = NULL;
301+
struct sk_buff *p;
302+
struct tcphdr *th2;
303+
unsigned int len;
304+
__be32 flags;
305+
unsigned int mss = 1;
306+
int flush = 1;
307+
int i;
223308

224-
if (*(u32 *)&th->source ^ *(u32 *)&th2->source) {
225-
NAPI_GRO_CB(p)->same_flow = 0;
226-
continue;
227-
}
309+
len = skb_gro_len(skb);
310+
flags = tcp_flag_word(th);
228311

229-
goto found;
230-
}
231-
p = NULL;
232-
goto out_check_final;
312+
p = tcp_gro_lookup(head, th);
313+
if (!p)
314+
goto out_check_final;
233315

234-
found:
235316
/* Include the IP ID check below from the inner most IP hdr */
317+
th2 = tcp_hdr(p);
236318
flush = NAPI_GRO_CB(p)->flush;
237319
flush |= (__force int)(flags & TCP_FLAG_CWR);
238320
flush |= (__force int)((flags ^ tcp_flag_word(th2)) &
@@ -267,6 +349,18 @@ struct sk_buff *tcp_gro_receive(struct list_head *head, struct sk_buff *skb)
267349
flush |= (ntohl(th2->seq) + skb_gro_len(p)) ^ ntohl(th->seq);
268350
flush |= skb_cmp_decrypted(p, skb);
269351

352+
if (unlikely(NAPI_GRO_CB(p)->is_flist)) {
353+
flush |= (__force int)(flags ^ tcp_flag_word(th2));
354+
flush |= skb->ip_summed != p->ip_summed;
355+
flush |= skb->csum_level != p->csum_level;
356+
flush |= NAPI_GRO_CB(p)->count >= 64;
357+
358+
if (flush || skb_gro_receive_list(p, skb))
359+
mss = 1;
360+
361+
goto out_check_final;
362+
}
363+
270364
if (flush || skb_gro_receive(p, skb)) {
271365
mss = 1;
272366
goto out_check_final;
@@ -288,7 +382,6 @@ struct sk_buff *tcp_gro_receive(struct list_head *head, struct sk_buff *skb)
288382
if (p && (!NAPI_GRO_CB(skb)->same_flow || flush))
289383
pp = p;
290384

291-
out:
292385
NAPI_GRO_CB(skb)->flush |= (flush != 0);
293386

294387
return pp;
@@ -314,25 +407,74 @@ void tcp_gro_complete(struct sk_buff *skb)
314407
}
315408
EXPORT_SYMBOL(tcp_gro_complete);
316409

410+
static void tcp4_check_fraglist_gro(struct list_head *head, struct sk_buff *skb,
411+
struct tcphdr *th)
412+
{
413+
const struct iphdr *iph;
414+
struct sk_buff *p;
415+
struct sock *sk;
416+
struct net *net;
417+
int iif, sdif;
418+
419+
if (likely(!(skb->dev->features & NETIF_F_GRO_FRAGLIST)))
420+
return;
421+
422+
p = tcp_gro_lookup(head, th);
423+
if (p) {
424+
NAPI_GRO_CB(skb)->is_flist = NAPI_GRO_CB(p)->is_flist;
425+
return;
426+
}
427+
428+
inet_get_iif_sdif(skb, &iif, &sdif);
429+
iph = skb_gro_network_header(skb);
430+
net = dev_net(skb->dev);
431+
sk = __inet_lookup_established(net, net->ipv4.tcp_death_row.hashinfo,
432+
iph->saddr, th->source,
433+
iph->daddr, ntohs(th->dest),
434+
iif, sdif);
435+
NAPI_GRO_CB(skb)->is_flist = !sk;
436+
if (sk)
437+
sock_put(sk);
438+
}
439+
317440
INDIRECT_CALLABLE_SCOPE
318441
struct sk_buff *tcp4_gro_receive(struct list_head *head, struct sk_buff *skb)
319442
{
443+
struct tcphdr *th;
444+
320445
/* Don't bother verifying checksum if we're going to flush anyway. */
321446
if (!NAPI_GRO_CB(skb)->flush &&
322447
skb_gro_checksum_validate(skb, IPPROTO_TCP,
323-
inet_gro_compute_pseudo)) {
324-
NAPI_GRO_CB(skb)->flush = 1;
325-
return NULL;
326-
}
448+
inet_gro_compute_pseudo))
449+
goto flush;
450+
451+
th = tcp_gro_pull_header(skb);
452+
if (!th)
453+
goto flush;
327454

328-
return tcp_gro_receive(head, skb);
455+
tcp4_check_fraglist_gro(head, skb, th);
456+
457+
return tcp_gro_receive(head, skb, th);
458+
459+
flush:
460+
NAPI_GRO_CB(skb)->flush = 1;
461+
return NULL;
329462
}
330463

331464
INDIRECT_CALLABLE_SCOPE int tcp4_gro_complete(struct sk_buff *skb, int thoff)
332465
{
333466
const struct iphdr *iph = ip_hdr(skb);
334467
struct tcphdr *th = tcp_hdr(skb);
335468

469+
if (unlikely(NAPI_GRO_CB(skb)->is_flist)) {
470+
skb_shinfo(skb)->gso_type |= SKB_GSO_FRAGLIST | SKB_GSO_TCPV4;
471+
skb_shinfo(skb)->gso_segs = NAPI_GRO_CB(skb)->count;
472+
473+
__skb_incr_checksum_unnecessary(skb);
474+
475+
return 0;
476+
}
477+
336478
th->check = ~tcp_v4_check(skb->len - thoff, iph->saddr,
337479
iph->daddr, 0);
338480

net/ipv4/udp_offload.c

Lines changed: 0 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -433,33 +433,6 @@ static struct sk_buff *udp4_ufo_fragment(struct sk_buff *skb,
433433
return segs;
434434
}
435435

436-
static int skb_gro_receive_list(struct sk_buff *p, struct sk_buff *skb)
437-
{
438-
if (unlikely(p->len + skb->len >= 65536))
439-
return -E2BIG;
440-
441-
if (NAPI_GRO_CB(p)->last == p)
442-
skb_shinfo(p)->frag_list = skb;
443-
else
444-
NAPI_GRO_CB(p)->last->next = skb;
445-
446-
skb_pull(skb, skb_gro_offset(skb));
447-
448-
NAPI_GRO_CB(p)->last = skb;
449-
NAPI_GRO_CB(p)->count++;
450-
p->data_len += skb->len;
451-
452-
/* sk ownership - if any - completely transferred to the aggregated packet */
453-
skb->destructor = NULL;
454-
skb->sk = NULL;
455-
p->truesize += skb->truesize;
456-
p->len += skb->len;
457-
458-
NAPI_GRO_CB(skb)->same_flow = 1;
459-
460-
return 0;
461-
}
462-
463436

464437
#define UDP_GRO_CNT_MAX 64
465438
static struct sk_buff *udp_gro_receive_segment(struct list_head *head,

0 commit comments

Comments
 (0)