Skip to content

Commit ceb9a09

Browse files
jwhitedraggi
authored andcommitted
tun: reduce redundant checksumming in tcpGRO()
IPv4 header and pseudo header checksums were being computed on every merge operation. Additionally, virtioNetHdr was being written at the same time. This delays those operations until after all coalescing has occurred. Reviewed-by: Adrian Dewhurst <[email protected]> Signed-off-by: Jordan Whited <[email protected]>
1 parent 915962d commit ceb9a09

File tree

1 file changed

+99
-63
lines changed

1 file changed

+99
-63
lines changed

tun/tcp_offload_linux.go

Lines changed: 99 additions & 63 deletions
Original file line numberDiff line numberDiff line change
@@ -269,11 +269,11 @@ func tcpChecksumValid(pkt []byte, iphLen uint8, isV6 bool) bool {
269269
type coalesceResult int
270270

271271
const (
272-
coalesceInsufficientCap coalesceResult = 0
273-
coalescePSHEnding coalesceResult = 1
274-
coalesceItemInvalidCSum coalesceResult = 2
275-
coalescePktInvalidCSum coalesceResult = 3
276-
coalesceSuccess coalesceResult = 4
272+
coalesceInsufficientCap coalesceResult = iota
273+
coalescePSHEnding
274+
coalesceItemInvalidCSum
275+
coalescePktInvalidCSum
276+
coalesceSuccess
277277
)
278278

279279
// coalesceTCPPackets attempts to coalesce pkt with the packet described by
@@ -339,42 +339,6 @@ func coalesceTCPPackets(mode canCoalesce, pkt []byte, pktBuffsIndex int, gsoSize
339339
if gsoSize > item.gsoSize {
340340
item.gsoSize = gsoSize
341341
}
342-
hdr := virtioNetHdr{
343-
flags: unix.VIRTIO_NET_HDR_F_NEEDS_CSUM, // this turns into CHECKSUM_PARTIAL in the skb
344-
hdrLen: uint16(headersLen),
345-
gsoSize: uint16(item.gsoSize),
346-
csumStart: uint16(item.iphLen),
347-
csumOffset: 16,
348-
}
349-
350-
// Recalculate the total len (IPv4) or payload len (IPv6). Recalculate the
351-
// (IPv4) header checksum.
352-
if isV6 {
353-
hdr.gsoType = unix.VIRTIO_NET_HDR_GSO_TCPV6
354-
binary.BigEndian.PutUint16(pktHead[4:], uint16(coalescedLen)-uint16(item.iphLen)) // set new payload len
355-
} else {
356-
hdr.gsoType = unix.VIRTIO_NET_HDR_GSO_TCPV4
357-
pktHead[10], pktHead[11] = 0, 0 // clear checksum field
358-
binary.BigEndian.PutUint16(pktHead[2:], uint16(coalescedLen)) // set new total length
359-
iphCSum := ^checksum(pktHead[:item.iphLen], 0) // compute checksum
360-
binary.BigEndian.PutUint16(pktHead[10:], iphCSum) // set checksum field
361-
}
362-
hdr.encode(bufs[item.bufsIndex][bufsOffset-virtioNetHdrLen:])
363-
364-
// Calculate the pseudo header checksum and place it at the TCP checksum
365-
// offset. Downstream checksum offloading will combine this with computation
366-
// of the tcp header and payload checksum.
367-
addrLen := 4
368-
addrOffset := ipv4SrcAddrOffset
369-
if isV6 {
370-
addrLen = 16
371-
addrOffset = ipv6SrcAddrOffset
372-
}
373-
srcAddrAt := bufsOffset + addrOffset
374-
srcAddr := bufs[item.bufsIndex][srcAddrAt : srcAddrAt+addrLen]
375-
dstAddr := bufs[item.bufsIndex][srcAddrAt+addrLen : srcAddrAt+addrLen*2]
376-
psum := pseudoHeaderChecksumNoFold(unix.IPPROTO_TCP, srcAddr, dstAddr, uint16(coalescedLen-int(item.iphLen)))
377-
binary.BigEndian.PutUint16(pktHead[hdr.csumStart+hdr.csumOffset:], checksum([]byte{}, psum))
378342

379343
item.numMerged++
380344
return coalesceSuccess
@@ -390,58 +354,67 @@ const (
390354
maxUint16 = 1<<16 - 1
391355
)
392356

357+
type tcpGROResult int
358+
359+
const (
360+
tcpGROResultNoop tcpGROResult = iota
361+
tcpGROResultTableInsert
362+
tcpGROResultCoalesced
363+
)
364+
393365
// tcpGRO evaluates the TCP packet at pktI in bufs for coalescing with
394-
// existing packets tracked in table. It will return false when pktI is not
395-
// coalesced, otherwise true. This indicates to the caller if bufs[pktI]
396-
// should be written to the Device.
397-
func tcpGRO(bufs [][]byte, offset int, pktI int, table *tcpGROTable, isV6 bool) (pktCoalesced bool) {
366+
// existing packets tracked in table. It returns a tcpGROResultNoop when no
367+
// action was taken, tcpGROResultTableInsert when the evaluated packet was
368+
// inserted into table, and tcpGROResultCoalesced when the evaluated packet was
369+
// coalesced with another packet in table.
370+
func tcpGRO(bufs [][]byte, offset int, pktI int, table *tcpGROTable, isV6 bool) tcpGROResult {
398371
pkt := bufs[pktI][offset:]
399372
if len(pkt) > maxUint16 {
400373
// A valid IPv4 or IPv6 packet will never exceed this.
401-
return false
374+
return tcpGROResultNoop
402375
}
403376
iphLen := int((pkt[0] & 0x0F) * 4)
404377
if isV6 {
405378
iphLen = 40
406379
ipv6HPayloadLen := int(binary.BigEndian.Uint16(pkt[4:]))
407380
if ipv6HPayloadLen != len(pkt)-iphLen {
408-
return false
381+
return tcpGROResultNoop
409382
}
410383
} else {
411384
totalLen := int(binary.BigEndian.Uint16(pkt[2:]))
412385
if totalLen != len(pkt) {
413-
return false
386+
return tcpGROResultNoop
414387
}
415388
}
416389
if len(pkt) < iphLen {
417-
return false
390+
return tcpGROResultNoop
418391
}
419392
tcphLen := int((pkt[iphLen+12] >> 4) * 4)
420393
if tcphLen < 20 || tcphLen > 60 {
421-
return false
394+
return tcpGROResultNoop
422395
}
423396
if len(pkt) < iphLen+tcphLen {
424-
return false
397+
return tcpGROResultNoop
425398
}
426399
if !isV6 {
427400
if pkt[6]&ipv4FlagMoreFragments != 0 || pkt[6]<<3 != 0 || pkt[7] != 0 {
428401
// no GRO support for fragmented segments for now
429-
return false
402+
return tcpGROResultNoop
430403
}
431404
}
432405
tcpFlags := pkt[iphLen+tcpFlagsOffset]
433406
var pshSet bool
434407
// not a candidate if any non-ACK flags (except PSH+ACK) are set
435408
if tcpFlags != tcpFlagACK {
436409
if pkt[iphLen+tcpFlagsOffset] != tcpFlagACK|tcpFlagPSH {
437-
return false
410+
return tcpGROResultNoop
438411
}
439412
pshSet = true
440413
}
441414
gsoSize := uint16(len(pkt) - tcphLen - iphLen)
442415
// not a candidate if payload len is 0
443416
if gsoSize < 1 {
444-
return false
417+
return tcpGROResultNoop
445418
}
446419
seq := binary.BigEndian.Uint32(pkt[iphLen+4:])
447420
srcAddrOffset := ipv4SrcAddrOffset
@@ -452,7 +425,7 @@ func tcpGRO(bufs [][]byte, offset int, pktI int, table *tcpGROTable, isV6 bool)
452425
}
453426
items, existing := table.lookupOrInsert(pkt, srcAddrOffset, srcAddrOffset+addrLen, iphLen, tcphLen, pktI)
454427
if !existing {
455-
return false
428+
return tcpGROResultNoop
456429
}
457430
for i := len(items) - 1; i >= 0; i-- {
458431
// In the best case of packets arriving in order iterating in reverse is
@@ -470,20 +443,20 @@ func tcpGRO(bufs [][]byte, offset int, pktI int, table *tcpGROTable, isV6 bool)
470443
switch result {
471444
case coalesceSuccess:
472445
table.updateAt(item, i)
473-
return true
446+
return tcpGROResultCoalesced
474447
case coalesceItemInvalidCSum:
475448
// delete the item with an invalid csum
476449
table.deleteAt(item.key, i)
477450
case coalescePktInvalidCSum:
478451
// no point in inserting an item that we can't coalesce
479-
return false
452+
return tcpGROResultNoop
480453
default:
481454
}
482455
}
483456
}
484457
// failed to coalesce with any other packets; store the item in the flow
485458
table.insert(pkt, srcAddrOffset, srcAddrOffset+addrLen, iphLen, tcphLen, pktI)
486-
return false
459+
return tcpGROResultTableInsert
487460
}
488461

489462
func isTCP4NoIPOptions(b []byte) bool {
@@ -515,6 +488,64 @@ func isTCP6NoEH(b []byte) bool {
515488
return true
516489
}
517490

491+
// applyCoalesceAccounting updates bufs to account for coalescing based on the
492+
// metadata found in table.
493+
func applyCoalesceAccounting(bufs [][]byte, offset int, table *tcpGROTable, isV6 bool) error {
494+
for _, items := range table.itemsByFlow {
495+
for _, item := range items {
496+
if item.numMerged > 0 {
497+
hdr := virtioNetHdr{
498+
flags: unix.VIRTIO_NET_HDR_F_NEEDS_CSUM, // this turns into CHECKSUM_PARTIAL in the skb
499+
hdrLen: uint16(item.iphLen + item.tcphLen),
500+
gsoSize: item.gsoSize,
501+
csumStart: uint16(item.iphLen),
502+
csumOffset: 16,
503+
}
504+
pkt := bufs[item.bufsIndex][offset:]
505+
506+
// Recalculate the total len (IPv4) or payload len (IPv6).
507+
// Recalculate the (IPv4) header checksum.
508+
if isV6 {
509+
hdr.gsoType = unix.VIRTIO_NET_HDR_GSO_TCPV6
510+
binary.BigEndian.PutUint16(pkt[4:], uint16(len(pkt))-uint16(item.iphLen)) // set new IPv6 header payload len
511+
} else {
512+
hdr.gsoType = unix.VIRTIO_NET_HDR_GSO_TCPV4
513+
pkt[10], pkt[11] = 0, 0
514+
binary.BigEndian.PutUint16(pkt[2:], uint16(len(pkt))) // set new total length
515+
iphCSum := ^checksum(pkt[:item.iphLen], 0) // compute IPv4 header checksum
516+
binary.BigEndian.PutUint16(pkt[10:], iphCSum) // set IPv4 header checksum field
517+
}
518+
err := hdr.encode(bufs[item.bufsIndex][offset-virtioNetHdrLen:])
519+
if err != nil {
520+
return err
521+
}
522+
523+
// Calculate the pseudo header checksum and place it at the TCP
524+
// checksum offset. Downstream checksum offloading will combine
525+
// this with computation of the tcp header and payload checksum.
526+
addrLen := 4
527+
addrOffset := ipv4SrcAddrOffset
528+
if isV6 {
529+
addrLen = 16
530+
addrOffset = ipv6SrcAddrOffset
531+
}
532+
srcAddrAt := offset + addrOffset
533+
srcAddr := bufs[item.bufsIndex][srcAddrAt : srcAddrAt+addrLen]
534+
dstAddr := bufs[item.bufsIndex][srcAddrAt+addrLen : srcAddrAt+addrLen*2]
535+
psum := pseudoHeaderChecksumNoFold(unix.IPPROTO_TCP, srcAddr, dstAddr, uint16(len(pkt)-int(item.iphLen)))
536+
binary.BigEndian.PutUint16(pkt[hdr.csumStart+hdr.csumOffset:], checksum([]byte{}, psum))
537+
} else {
538+
hdr := virtioNetHdr{}
539+
err := hdr.encode(bufs[item.bufsIndex][offset-virtioNetHdrLen:])
540+
if err != nil {
541+
return err
542+
}
543+
}
544+
}
545+
}
546+
return nil
547+
}
548+
518549
// handleGRO evaluates bufs for GRO, and writes the indices of the resulting
519550
// packets into toWrite. toWrite, tcp4Table, and tcp6Table should initially be
520551
// empty (but non-nil), and are passed in to save allocs as the caller may reset
@@ -524,23 +555,28 @@ func handleGRO(bufs [][]byte, offset int, tcp4Table, tcp6Table *tcpGROTable, toW
524555
if offset < virtioNetHdrLen || offset > len(bufs[i])-1 {
525556
return errors.New("invalid offset")
526557
}
527-
var coalesced bool
558+
var result tcpGROResult
528559
switch {
529560
case isTCP4NoIPOptions(bufs[i][offset:]): // ipv4 packets w/IP options do not coalesce
530-
coalesced = tcpGRO(bufs, offset, i, tcp4Table, false)
561+
result = tcpGRO(bufs, offset, i, tcp4Table, false)
531562
case isTCP6NoEH(bufs[i][offset:]): // ipv6 packets w/extension headers do not coalesce
532-
coalesced = tcpGRO(bufs, offset, i, tcp6Table, true)
563+
result = tcpGRO(bufs, offset, i, tcp6Table, true)
533564
}
534-
if !coalesced {
565+
switch result {
566+
case tcpGROResultNoop:
535567
hdr := virtioNetHdr{}
536568
err := hdr.encode(bufs[i][offset-virtioNetHdrLen:])
537569
if err != nil {
538570
return err
539571
}
572+
fallthrough
573+
case tcpGROResultTableInsert:
540574
*toWrite = append(*toWrite, i)
541575
}
542576
}
543-
return nil
577+
err4 := applyCoalesceAccounting(bufs, offset, tcp4Table, false)
578+
err6 := applyCoalesceAccounting(bufs, offset, tcp6Table, true)
579+
return errors.Join(err4, err6)
544580
}
545581

546582
// tcpTSO splits packets from in into outBuffs, writing the size of each

0 commit comments

Comments
 (0)