Skip to content

Commit 889ab74

Browse files
mknyszekgopherbot
authored andcommitted
internal/runtime/gc/scan: import scan kernel from gclab [green tea]
This change imports the AVX512 GC scanning kernel from CL 593938 into a new package, internal/runtime/gc/scan. Credit to Austin Clements for most of this work. I did some cleanup, added support for more size classes to the expanders, and added more testing. I also restructured the code to make it easier and clearer to add new scan kernels for new architectures. For golang#73581. Change-Id: I76bcbc889fa6cad73ba0084620fae084a5912e6b Cq-Include-Trybots: luci.golang.try:gotip-linux-amd64_avx512,gotip-linux-amd64_avx512-greenteagc Reviewed-on: https://go-review.googlesource.com/c/go/+/655280 LUCI-TryBot-Result: Go LUCI <[email protected]> Auto-Submit: Michael Knyszek <[email protected]> Reviewed-by: Michael Pratt <[email protected]>
1 parent 182336b commit 889ab74

34 files changed

+5426
-14
lines changed

src/cmd/internal/objabi/pkgspecial.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,7 @@ var runtimePkgs = []string{
5252
"internal/runtime/cgroup",
5353
"internal/runtime/exithook",
5454
"internal/runtime/gc",
55+
"internal/runtime/gc/scan",
5556
"internal/runtime/maps",
5657
"internal/runtime/math",
5758
"internal/runtime/strconv",

src/go/build/deps_test.go

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -100,6 +100,7 @@ var depsRules = `
100100
< internal/runtime/maps
101101
< internal/runtime/strconv
102102
< internal/runtime/cgroup
103+
< internal/runtime/gc/scan
103104
< runtime
104105
< sync/atomic
105106
< internal/sync
@@ -797,6 +798,20 @@ var depsRules = `
797798
798799
FMT, testing < internal/cgrouptest;
799800
C, CGO < internal/runtime/cgobench;
801+
802+
# Generate-only packages can have anything they want
803+
container/heap,
804+
encoding/binary,
805+
fmt,
806+
hash/maphash,
807+
io,
808+
log,
809+
math/bits,
810+
os,
811+
reflect,
812+
strings,
813+
sync
814+
< internal/runtime/gc/internal/gen;
800815
`
801816

802817
// listStdPkgs returns the same list of packages as "go list std".

src/internal/cpu/cpu.go

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,15 +34,19 @@ var X86 struct {
3434
HasAVX512 bool // Virtual feature: F+CD+BW+DQ+VL
3535
HasAVX512F bool
3636
HasAVX512CD bool
37+
HasAVX512BITALG bool
3738
HasAVX512BW bool
3839
HasAVX512DQ bool
3940
HasAVX512VL bool
4041
HasAVX512VPCLMULQDQ bool
42+
HasAVX512VBMI bool
43+
HasAVX512VBMI2 bool
4144
HasBMI1 bool
4245
HasBMI2 bool
4346
HasERMS bool
4447
HasFSRM bool
4548
HasFMA bool
49+
HasGFNI bool
4650
HasOSXSAVE bool
4751
HasPCLMULQDQ bool
4852
HasPOPCNT bool

src/internal/cpu/cpu_x86.go

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ func xgetbv() (eax, edx uint32)
1818
func getGOAMD64level() int32
1919

2020
const (
21-
// ecx bits
21+
// Bits returned in ECX for CPUID EAX=0x1 ECX=0x0
2222
cpuid_SSE3 = 1 << 0
2323
cpuid_PCLMULQDQ = 1 << 1
2424
cpuid_SSSE3 = 1 << 9
@@ -30,7 +30,7 @@ const (
3030
cpuid_OSXSAVE = 1 << 27
3131
cpuid_AVX = 1 << 28
3232

33-
// ebx bits
33+
// "Extended Feature Flag" bits returned in EBX for CPUID EAX=0x7 ECX=0x0
3434
cpuid_BMI1 = 1 << 3
3535
cpuid_AVX2 = 1 << 5
3636
cpuid_BMI2 = 1 << 8
@@ -43,8 +43,12 @@ const (
4343
cpuid_AVX512BW = 1 << 30
4444
cpuid_AVX512VL = 1 << 31
4545

46-
// ecx bits
46+
// "Extended Feature Flag" bits returned in ECX for CPUID EAX=0x7 ECX=0x0
47+
cpuid_AVX512_VBMI = 1 << 1
48+
cpuid_AVX512_VBMI2 = 1 << 6
49+
cpuid_GFNI = 1 << 8
4750
cpuid_AVX512VPCLMULQDQ = 1 << 10
51+
cpuid_AVX512_BITALG = 1 << 12
4852

4953
// edx bits
5054
cpuid_FSRM = 1 << 4
@@ -163,6 +167,10 @@ func doinit() {
163167
X86.HasAVX512DQ = isSet(ebx7, cpuid_AVX512DQ)
164168
X86.HasAVX512VL = isSet(ebx7, cpuid_AVX512VL)
165169
X86.HasAVX512VPCLMULQDQ = isSet(ecx7, cpuid_AVX512VPCLMULQDQ)
170+
X86.HasAVX512VBMI = isSet(ecx7, cpuid_AVX512_VBMI)
171+
X86.HasAVX512VBMI2 = isSet(ecx7, cpuid_AVX512_VBMI2)
172+
X86.HasGFNI = isSet(ecx7, cpuid_GFNI)
173+
X86.HasAVX512BITALG = isSet(ecx7, cpuid_AVX512_BITALG)
166174
}
167175

168176
X86.HasFSRM = isSet(edx7, cpuid_FSRM)
Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
// Copyright 2025 The Go Authors. All rights reserved.
2+
// Use of this source code is governed by a BSD-style
3+
// license that can be found in the LICENSE file.
4+
5+
//go:build !386 && !amd64
6+
7+
package cpu
8+
9+
func DataCacheSizes() []uintptr {
10+
return nil
11+
}

src/internal/cpu/datacache_x86.go

Lines changed: 121 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,121 @@
1+
// Copyright 2025 The Go Authors. All rights reserved.
2+
// Use of this source code is governed by a BSD-style
3+
// license that can be found in the LICENSE file.
4+
5+
//go:build 386 || amd64
6+
7+
package cpu
8+
9+
// DataCacheSizes returns the size of each data cache from lowest
10+
// level in the hierarchy to highest.
11+
//
12+
// Unlike other parts of this package's public API, it is not safe
13+
// to reference early in runtime initialization because it allocates.
14+
// It's intended for testing only.
15+
func DataCacheSizes() []uintptr {
16+
maxFunctionInformation, ebx0, ecx0, edx0 := cpuid(0, 0)
17+
if maxFunctionInformation < 1 {
18+
return nil
19+
}
20+
21+
switch {
22+
// Check for "GenuineIntel"
23+
case ebx0 == 0x756E6547 && ecx0 == 0x6C65746E && edx0 == 0x49656E69:
24+
return getDataCacheSizesIntel(maxFunctionInformation)
25+
// Check for "AuthenticAMD"
26+
case ebx0 == 0x68747541 && ecx0 == 0x444D4163 && edx0 == 0x69746E65:
27+
return getDataCacheSizesAMD()
28+
}
29+
return nil
30+
}
31+
32+
func extractBits(arg uint32, l int, r int) uint32 {
33+
if l > r {
34+
panic("bad bit range")
35+
}
36+
return (arg >> l) & ((1 << (r - l + 1)) - 1)
37+
}
38+
39+
func getDataCacheSizesIntel(maxID uint32) []uintptr {
40+
// Constants for cache types
41+
const (
42+
noCache = 0
43+
dataCache = 1
44+
instructionCache = 2
45+
unifiedCache = 3
46+
)
47+
if maxID < 4 {
48+
return nil
49+
}
50+
51+
// Iterate through CPUID leaf 4 (deterministic cache parameters)
52+
var caches []uintptr
53+
for i := uint32(0); i < 0xFFFF; i++ {
54+
eax, ebx, ecx, _ := cpuid(4, i)
55+
56+
cacheType := eax & 0xF // EAX bits 4-0: Cache Type
57+
if cacheType == 0 {
58+
break
59+
}
60+
61+
// Report only data caches.
62+
if !(cacheType == dataCache || cacheType == unifiedCache) {
63+
continue
64+
}
65+
66+
// Guaranteed to always start counting from 1.
67+
level := (eax >> 5) & 0x7
68+
69+
lineSize := extractBits(ebx, 0, 11) + 1 // Bits 11-0: Line size in bytes - 1
70+
partitions := extractBits(ebx, 12, 21) + 1 // Bits 21-12: Physical line partitions - 1
71+
ways := extractBits(ebx, 22, 31) + 1 // Bits 31-22: Ways of associativity - 1
72+
sets := uint64(ecx) + 1 // Number of sets - 1
73+
size := uint64(ways*partitions*lineSize) * sets // Calculate cache size in bytes
74+
75+
caches = append(caches, uintptr(size))
76+
77+
// If we see more than one cache described per level, or they appear
78+
// out of order, crash.
79+
//
80+
// Going by the SDM, it's not clear whether this is actually possible,
81+
// so this code is purely defensive.
82+
if level != uint32(len(caches)) {
83+
panic("expected levels to be in order and for there to be one data/unified cache per level")
84+
}
85+
}
86+
return caches
87+
}
88+
89+
func getDataCacheSizesAMD() []uintptr {
90+
maxExtendedFunctionInformation, _, _, _ := cpuid(0x80000000, 0)
91+
if maxExtendedFunctionInformation < 0x80000006 {
92+
return nil
93+
}
94+
95+
var caches []uintptr
96+
97+
_, _, ecx5, _ := cpuid(0x80000005, 0)
98+
_, _, ecx6, edx6 := cpuid(0x80000006, 0)
99+
100+
// The size is return in kb, turning into bytes.
101+
l1dSize := uintptr(extractBits(ecx5, 24, 31) << 10)
102+
caches = append(caches, l1dSize)
103+
104+
// Check that L2 cache is present.
105+
if l2Assoc := extractBits(ecx6, 12, 15); l2Assoc == 0 {
106+
return caches
107+
}
108+
l2Size := uintptr(extractBits(ecx6, 16, 31) << 10)
109+
caches = append(caches, l2Size)
110+
111+
// Check that L3 cache is present.
112+
if l3Assoc := extractBits(edx6, 12, 15); l3Assoc == 0 {
113+
return caches
114+
}
115+
// Specifies the L3 cache size is within the following range:
116+
// (L3Size[31:18] * 512KB) <= L3 cache size < ((L3Size[31:18]+1) * 512KB).
117+
l3Size := uintptr(extractBits(edx6, 18, 31) * (512 << 10))
118+
caches = append(caches, l3Size)
119+
120+
return caches
121+
}
Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
// Copyright 2025 The Go Authors. All rights reserved.
2+
// Use of this source code is governed by a BSD-style
3+
// license that can be found in the LICENSE file.
4+
5+
//go:build 386 || amd64
6+
7+
package cpu_test
8+
9+
import (
10+
"internal/cpu"
11+
"testing"
12+
)
13+
14+
// Tests fetching data cache sizes. This test only checks that DataCacheSizes
15+
// won't explode. Otherwise it's just informational, and dumps the current
16+
// data cache sizes.
17+
func TestDataCacheSizes(t *testing.T) {
18+
// N.B. Don't try to check these values because we don't know what
19+
// kind of environment we're running in. We don't want this test to
20+
// fail on some random x86 chip that happens to not support the right
21+
// CPUID bits for some reason.
22+
caches := cpu.DataCacheSizes()
23+
for i, size := range caches {
24+
t.Logf("L%d: %d", i+1, size)
25+
}
26+
}

src/internal/goarch/goarch.go

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,9 @@ const (
3434
// It is also the size of the machine's native word size (that is, 4 on 32-bit systems, 8 on 64-bit).
3535
const PtrSize = 4 << (^uintptr(0) >> 63)
3636

37+
// PtrSize is bit width of a pointer.
38+
const PtrBits = PtrSize * 8
39+
3740
// ArchFamily is the architecture family (AMD64, ARM, ...)
3841
const ArchFamily ArchFamilyType = _ArchFamily
3942

0 commit comments

Comments
 (0)