Skip to content

Commit 5eddb24

Browse files
xli98davem330
authored andcommitted
gro: add support of (hw)gro packets to gro stack
Current GRO stack only supports incoming packets containing one frame/MSS. This patch changes GRO to accept packets that are already GRO. HW-GRO (aka RSC for some vendors) is very often limited in presence of interleaved packets. Linux SW GRO stack can complete the job and provide larger GRO packets, thus reducing rate of ACK packets and cpu overhead. This also means BIG TCP can still be used, even if HW-GRO/RSC was able to cook ~64 KB GRO packets. v2: fix logic in tcp_gro_receive() Only support TCP for the moment (Paolo) Co-Developed-by: Eric Dumazet <[email protected]> Signed-off-by: Eric Dumazet <[email protected]> Signed-off-by: Coco Li <[email protected]> Acked-by: Paolo Abeni <[email protected]> Signed-off-by: David S. Miller <[email protected]>
1 parent 197060c commit 5eddb24

File tree

2 files changed

+29
-6
lines changed

2 files changed

+29
-6
lines changed

net/core/gro.c

Lines changed: 14 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -160,6 +160,7 @@ int skb_gro_receive(struct sk_buff *p, struct sk_buff *skb)
160160
unsigned int gro_max_size;
161161
unsigned int new_truesize;
162162
struct sk_buff *lp;
163+
int segs;
163164

164165
/* pairs with WRITE_ONCE() in netif_set_gro_max_size() */
165166
gro_max_size = READ_ONCE(p->dev->gro_max_size);
@@ -175,6 +176,7 @@ int skb_gro_receive(struct sk_buff *p, struct sk_buff *skb)
175176
return -E2BIG;
176177
}
177178

179+
segs = NAPI_GRO_CB(skb)->count;
178180
lp = NAPI_GRO_CB(p)->last;
179181
pinfo = skb_shinfo(lp);
180182

@@ -265,7 +267,7 @@ int skb_gro_receive(struct sk_buff *p, struct sk_buff *skb)
265267
lp = p;
266268

267269
done:
268-
NAPI_GRO_CB(p)->count++;
270+
NAPI_GRO_CB(p)->count += segs;
269271
p->data_len += len;
270272
p->truesize += delta_truesize;
271273
p->len += len;
@@ -496,8 +498,15 @@ static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff
496498
BUILD_BUG_ON(!IS_ALIGNED(offsetof(struct napi_gro_cb, zeroed),
497499
sizeof(u32))); /* Avoid slow unaligned acc */
498500
*(u32 *)&NAPI_GRO_CB(skb)->zeroed = 0;
499-
NAPI_GRO_CB(skb)->flush = skb_is_gso(skb) || skb_has_frag_list(skb);
501+
NAPI_GRO_CB(skb)->flush = skb_has_frag_list(skb);
500502
NAPI_GRO_CB(skb)->is_atomic = 1;
503+
NAPI_GRO_CB(skb)->count = 1;
504+
if (unlikely(skb_is_gso(skb))) {
505+
NAPI_GRO_CB(skb)->count = skb_shinfo(skb)->gso_segs;
506+
/* Only support TCP at the moment. */
507+
if (!skb_is_gso_tcp(skb))
508+
NAPI_GRO_CB(skb)->flush = 1;
509+
}
501510

502511
/* Setup for GRO checksum validation */
503512
switch (skb->ip_summed) {
@@ -545,10 +554,10 @@ static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff
545554
else
546555
gro_list->count++;
547556

548-
NAPI_GRO_CB(skb)->count = 1;
549557
NAPI_GRO_CB(skb)->age = jiffies;
550558
NAPI_GRO_CB(skb)->last = skb;
551-
skb_shinfo(skb)->gso_size = skb_gro_len(skb);
559+
if (!skb_is_gso(skb))
560+
skb_shinfo(skb)->gso_size = skb_gro_len(skb);
552561
list_add(&skb->list, &gro_list->list);
553562
ret = GRO_HELD;
554563

@@ -660,6 +669,7 @@ static void napi_reuse_skb(struct napi_struct *napi, struct sk_buff *skb)
660669

661670
skb->encapsulation = 0;
662671
skb_shinfo(skb)->gso_type = 0;
672+
skb_shinfo(skb)->gso_size = 0;
663673
if (unlikely(skb->slow_gro)) {
664674
skb_orphan(skb);
665675
skb_ext_reset(skb);

net/ipv4/tcp_offload.c

Lines changed: 15 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -255,7 +255,15 @@ struct sk_buff *tcp_gro_receive(struct list_head *head, struct sk_buff *skb)
255255

256256
mss = skb_shinfo(p)->gso_size;
257257

258-
flush |= (len - 1) >= mss;
258+
/* If skb is a GRO packet, make sure its gso_size matches prior packet mss.
259+
* If it is a single frame, do not aggregate it if its length
260+
* is bigger than our mss.
261+
*/
262+
if (unlikely(skb_is_gso(skb)))
263+
flush |= (mss != skb_shinfo(skb)->gso_size);
264+
else
265+
flush |= (len - 1) >= mss;
266+
259267
flush |= (ntohl(th2->seq) + skb_gro_len(p)) ^ ntohl(th->seq);
260268
#ifdef CONFIG_TLS_DEVICE
261269
flush |= p->decrypted ^ skb->decrypted;
@@ -269,7 +277,12 @@ struct sk_buff *tcp_gro_receive(struct list_head *head, struct sk_buff *skb)
269277
tcp_flag_word(th2) |= flags & (TCP_FLAG_FIN | TCP_FLAG_PSH);
270278

271279
out_check_final:
272-
flush = len < mss;
280+
/* Force a flush if last segment is smaller than mss. */
281+
if (unlikely(skb_is_gso(skb)))
282+
flush = len != NAPI_GRO_CB(skb)->count * skb_shinfo(skb)->gso_size;
283+
else
284+
flush = len < mss;
285+
273286
flush |= (__force int)(flags & (TCP_FLAG_URG | TCP_FLAG_PSH |
274287
TCP_FLAG_RST | TCP_FLAG_SYN |
275288
TCP_FLAG_FIN));

0 commit comments

Comments
 (0)