Skip to content

Commit 915962d

Browse files
jwhitedraggi
authored andcommitted
tun: unwind summing loop in checksumNoFold()
$ benchstat old.txt new.txt goos: linux goarch: amd64 pkg: golang.zx2c4.com/wireguard/tun cpu: 12th Gen Intel(R) Core(TM) i5-12400 │ old.txt │ new.txt │ │ sec/op │ sec/op vs base │ Checksum/64-12 10.670n ± 2% 4.769n ± 0% -55.30% (p=0.000 n=10) Checksum/128-12 19.665n ± 2% 8.032n ± 0% -59.16% (p=0.000 n=10) Checksum/256-12 37.68n ± 1% 16.06n ± 0% -57.37% (p=0.000 n=10) Checksum/512-12 76.61n ± 3% 32.13n ± 0% -58.06% (p=0.000 n=10) Checksum/1024-12 160.55n ± 4% 64.25n ± 0% -59.98% (p=0.000 n=10) Checksum/1500-12 231.05n ± 7% 94.12n ± 0% -59.26% (p=0.000 n=10) Checksum/2048-12 309.5n ± 3% 128.5n ± 0% -58.48% (p=0.000 n=10) Checksum/4096-12 603.8n ± 4% 257.2n ± 0% -57.41% (p=0.000 n=10) Checksum/8192-12 1185.0n ± 3% 515.5n ± 0% -56.50% (p=0.000 n=10) Checksum/9000-12 1328.5n ± 5% 564.8n ± 0% -57.49% (p=0.000 n=10) Checksum/9001-12 1340.5n ± 3% 564.8n ± 0% -57.87% (p=0.000 n=10) geomean 185.3n 77.99n -57.92% Reviewed-by: Adrian Dewhurst <[email protected]> Signed-off-by: Jordan Whited <[email protected]>
1 parent d831fef commit 915962d

File tree

2 files changed

+123
-12
lines changed

2 files changed

+123
-12
lines changed

tun/checksum.go

Lines changed: 88 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -3,23 +3,99 @@ package tun
33
import "encoding/binary"
44

55
// TODO: Explore SIMD and/or other assembly optimizations.
6+
// TODO: Test native endian loads. See RFC 1071 section 2 part B.
67
func checksumNoFold(b []byte, initial uint64) uint64 {
78
ac := initial
8-
i := 0
9-
n := len(b)
10-
for n >= 4 {
11-
ac += uint64(binary.BigEndian.Uint32(b[i : i+4]))
12-
n -= 4
13-
i += 4
9+
10+
for len(b) >= 128 {
11+
ac += uint64(binary.BigEndian.Uint32(b[:4]))
12+
ac += uint64(binary.BigEndian.Uint32(b[4:8]))
13+
ac += uint64(binary.BigEndian.Uint32(b[8:12]))
14+
ac += uint64(binary.BigEndian.Uint32(b[12:16]))
15+
ac += uint64(binary.BigEndian.Uint32(b[16:20]))
16+
ac += uint64(binary.BigEndian.Uint32(b[20:24]))
17+
ac += uint64(binary.BigEndian.Uint32(b[24:28]))
18+
ac += uint64(binary.BigEndian.Uint32(b[28:32]))
19+
ac += uint64(binary.BigEndian.Uint32(b[32:36]))
20+
ac += uint64(binary.BigEndian.Uint32(b[36:40]))
21+
ac += uint64(binary.BigEndian.Uint32(b[40:44]))
22+
ac += uint64(binary.BigEndian.Uint32(b[44:48]))
23+
ac += uint64(binary.BigEndian.Uint32(b[48:52]))
24+
ac += uint64(binary.BigEndian.Uint32(b[52:56]))
25+
ac += uint64(binary.BigEndian.Uint32(b[56:60]))
26+
ac += uint64(binary.BigEndian.Uint32(b[60:64]))
27+
ac += uint64(binary.BigEndian.Uint32(b[64:68]))
28+
ac += uint64(binary.BigEndian.Uint32(b[68:72]))
29+
ac += uint64(binary.BigEndian.Uint32(b[72:76]))
30+
ac += uint64(binary.BigEndian.Uint32(b[76:80]))
31+
ac += uint64(binary.BigEndian.Uint32(b[80:84]))
32+
ac += uint64(binary.BigEndian.Uint32(b[84:88]))
33+
ac += uint64(binary.BigEndian.Uint32(b[88:92]))
34+
ac += uint64(binary.BigEndian.Uint32(b[92:96]))
35+
ac += uint64(binary.BigEndian.Uint32(b[96:100]))
36+
ac += uint64(binary.BigEndian.Uint32(b[100:104]))
37+
ac += uint64(binary.BigEndian.Uint32(b[104:108]))
38+
ac += uint64(binary.BigEndian.Uint32(b[108:112]))
39+
ac += uint64(binary.BigEndian.Uint32(b[112:116]))
40+
ac += uint64(binary.BigEndian.Uint32(b[116:120]))
41+
ac += uint64(binary.BigEndian.Uint32(b[120:124]))
42+
ac += uint64(binary.BigEndian.Uint32(b[124:128]))
43+
b = b[128:]
44+
}
45+
if len(b) >= 64 {
46+
ac += uint64(binary.BigEndian.Uint32(b[:4]))
47+
ac += uint64(binary.BigEndian.Uint32(b[4:8]))
48+
ac += uint64(binary.BigEndian.Uint32(b[8:12]))
49+
ac += uint64(binary.BigEndian.Uint32(b[12:16]))
50+
ac += uint64(binary.BigEndian.Uint32(b[16:20]))
51+
ac += uint64(binary.BigEndian.Uint32(b[20:24]))
52+
ac += uint64(binary.BigEndian.Uint32(b[24:28]))
53+
ac += uint64(binary.BigEndian.Uint32(b[28:32]))
54+
ac += uint64(binary.BigEndian.Uint32(b[32:36]))
55+
ac += uint64(binary.BigEndian.Uint32(b[36:40]))
56+
ac += uint64(binary.BigEndian.Uint32(b[40:44]))
57+
ac += uint64(binary.BigEndian.Uint32(b[44:48]))
58+
ac += uint64(binary.BigEndian.Uint32(b[48:52]))
59+
ac += uint64(binary.BigEndian.Uint32(b[52:56]))
60+
ac += uint64(binary.BigEndian.Uint32(b[56:60]))
61+
ac += uint64(binary.BigEndian.Uint32(b[60:64]))
62+
b = b[64:]
63+
}
64+
if len(b) >= 32 {
65+
ac += uint64(binary.BigEndian.Uint32(b[:4]))
66+
ac += uint64(binary.BigEndian.Uint32(b[4:8]))
67+
ac += uint64(binary.BigEndian.Uint32(b[8:12]))
68+
ac += uint64(binary.BigEndian.Uint32(b[12:16]))
69+
ac += uint64(binary.BigEndian.Uint32(b[16:20]))
70+
ac += uint64(binary.BigEndian.Uint32(b[20:24]))
71+
ac += uint64(binary.BigEndian.Uint32(b[24:28]))
72+
ac += uint64(binary.BigEndian.Uint32(b[28:32]))
73+
b = b[32:]
74+
}
75+
if len(b) >= 16 {
76+
ac += uint64(binary.BigEndian.Uint32(b[:4]))
77+
ac += uint64(binary.BigEndian.Uint32(b[4:8]))
78+
ac += uint64(binary.BigEndian.Uint32(b[8:12]))
79+
ac += uint64(binary.BigEndian.Uint32(b[12:16]))
80+
b = b[16:]
1481
}
15-
for n >= 2 {
16-
ac += uint64(binary.BigEndian.Uint16(b[i : i+2]))
17-
n -= 2
18-
i += 2
82+
if len(b) >= 8 {
83+
ac += uint64(binary.BigEndian.Uint32(b[:4]))
84+
ac += uint64(binary.BigEndian.Uint32(b[4:8]))
85+
b = b[8:]
1986
}
20-
if n == 1 {
21-
ac += uint64(b[i]) << 8
87+
if len(b) >= 4 {
88+
ac += uint64(binary.BigEndian.Uint32(b))
89+
b = b[4:]
2290
}
91+
if len(b) >= 2 {
92+
ac += uint64(binary.BigEndian.Uint16(b))
93+
b = b[2:]
94+
}
95+
if len(b) == 1 {
96+
ac += uint64(b[0]) << 8
97+
}
98+
2399
return ac
24100
}
25101

tun/checksum_test.go

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
package tun
2+
3+
import (
4+
"fmt"
5+
"math/rand"
6+
"testing"
7+
)
8+
9+
func BenchmarkChecksum(b *testing.B) {
10+
lengths := []int{
11+
64,
12+
128,
13+
256,
14+
512,
15+
1024,
16+
1500,
17+
2048,
18+
4096,
19+
8192,
20+
9000,
21+
9001,
22+
}
23+
24+
for _, length := range lengths {
25+
b.Run(fmt.Sprintf("%d", length), func(b *testing.B) {
26+
buf := make([]byte, length)
27+
rng := rand.New(rand.NewSource(1))
28+
rng.Read(buf)
29+
b.ResetTimer()
30+
for i := 0; i < b.N; i++ {
31+
checksum(buf, 0)
32+
}
33+
})
34+
}
35+
}

0 commit comments

Comments
 (0)