Skip to content

Commit 5d040df

Browse files
mknyszekgopherbot
authored andcommitted
runtime: use scan kernels in scanSpan [green tea]
This is an extra 15-20% faster over the current sparse span scanning when AVX512+GFNI is available and there's sufficient density. For golang#73581. Change-Id: I9688e09885dd76c5ccab7c492c85a7e14e18ee04 Reviewed-on: https://go-review.googlesource.com/c/go/+/665495 Reviewed-by: Michael Pratt <[email protected]> LUCI-TryBot-Result: Go LUCI <[email protected]> Auto-Submit: Michael Knyszek <[email protected]>
1 parent 7e0251b commit 5d040df

File tree

1 file changed

+60
-9
lines changed

1 file changed

+60
-9
lines changed

src/runtime/mgcmark_greenteagc.go

Lines changed: 60 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,7 @@ import (
4141
"internal/goarch"
4242
"internal/runtime/atomic"
4343
"internal/runtime/gc"
44+
"internal/runtime/gc/scan"
4445
"internal/runtime/sys"
4546
"unsafe"
4647
)
@@ -259,7 +260,7 @@ func gcUsesSpanInlineMarkBits(size uintptr) bool {
259260
return heapBitsInSpan(size) && size >= 16
260261
}
261262

262-
// tryQueueOnSpan tries to queue p on the span it points to, if it
263+
// tryDeferToSpanScan tries to queue p on the span it points to, if it
263264
// points to a small object span (gcUsesSpanQueue size).
264265
func tryDeferToSpanScan(p uintptr, gcw *gcWork) bool {
265266
if useCheckmark {
@@ -608,8 +609,7 @@ func scanSpan(p objptr, gcw *gcWork) {
608609
atomic.Or8(bytep, mask)
609610
gcw.bytesMarked += uint64(elemsize)
610611
if debug.gctrace > 1 {
611-
gcw.stats[spanclass.sizeclass()].spansSparseScanned++
612-
gcw.stats[spanclass.sizeclass()].spanObjsSparseScanned++
612+
gcw.stats[spanclass.sizeclass()].sparseObjsScanned++
613613
}
614614
b := spanBase + uintptr(objIndex)*elemsize
615615
scanObjectSmall(spanBase, b, elemsize, gcw)
@@ -631,11 +631,47 @@ func scanSpan(p objptr, gcw *gcWork) {
631631
return
632632
}
633633
gcw.bytesMarked += uint64(objsMarked) * uint64(elemsize)
634+
635+
// Check if we have enough density to make a dartboard scan
636+
// worthwhile. If not, just do what scanobject does, but
637+
// localized to the span, using the dartboard.
638+
if !scan.HasFastScanSpanPacked() || objsMarked < int(nelems/8) {
639+
if debug.gctrace > 1 {
640+
gcw.stats[spanclass.sizeclass()].spansSparseScanned++
641+
gcw.stats[spanclass.sizeclass()].spanObjsSparseScanned += uint64(objsMarked)
642+
}
643+
scanObjectsSmall(spanBase, elemsize, nelems, gcw, &toScan)
644+
return
645+
}
646+
647+
// Scan the span.
648+
//
649+
// N.B. Use gcw.ptrBuf as the output buffer. This is a bit different
650+
// from scanObjectsSmall, which puts addresses to dereference. ScanSpanPacked
651+
// on the other hand, fills gcw.ptrBuf with already dereferenced pointers.
652+
nptrs := scan.ScanSpanPacked(
653+
unsafe.Pointer(spanBase),
654+
&gcw.ptrBuf[0],
655+
&toScan,
656+
uintptr(spanclass.sizeclass()),
657+
spanPtrMaskUnsafe(spanBase),
658+
)
659+
gcw.heapScanWork += int64(objsMarked) * int64(elemsize)
660+
634661
if debug.gctrace > 1 {
662+
// Write down some statistics.
635663
gcw.stats[spanclass.sizeclass()].spansDenseScanned++
636664
gcw.stats[spanclass.sizeclass()].spanObjsDenseScanned += uint64(objsMarked)
637665
}
638-
scanObjectsSmall(spanBase, elemsize, nelems, gcw, &toScan)
666+
667+
// Process all the pointers we just got.
668+
for _, p := range gcw.ptrBuf[:nptrs] {
669+
if !tryDeferToSpanScan(p, gcw) {
670+
if obj, span, objIndex := findObject(p, 0, 0); obj != 0 {
671+
greyobject(obj, 0, 0, span, gcw, objIndex)
672+
}
673+
}
674+
}
639675
}
640676

641677
// spanSetScans sets any unset mark bits that have their mark bits set in the inline mark bits.
@@ -798,12 +834,27 @@ func heapBitsSmallForAddrInline(spanBase, addr, elemsize uintptr) uintptr {
798834
return read
799835
}
800836

837+
// spanPtrMaskUnsafe returns the pointer mask for a span with inline mark bits.
838+
//
839+
// The caller must ensure spanBase is the base of a span that:
840+
// - 1 page in size,
841+
// - Uses inline mark bits,
842+
// - Contains pointers.
843+
func spanPtrMaskUnsafe(spanBase uintptr) *gc.PtrMask {
844+
base := spanBase + gc.PageSize - unsafe.Sizeof(gc.PtrMask{}) - unsafe.Sizeof(spanInlineMarkBits{})
845+
return (*gc.PtrMask)(unsafe.Pointer(base))
846+
}
847+
801848
type sizeClassScanStats struct {
802-
spansDenseScanned uint64
803-
spanObjsDenseScanned uint64
804-
spansSparseScanned uint64
805-
spanObjsSparseScanned uint64
806-
sparseObjsScanned uint64
849+
spansDenseScanned uint64 // Spans scanned with ScanSpanPacked.
850+
spanObjsDenseScanned uint64 // Objects scanned with ScanSpanPacked.
851+
spansSparseScanned uint64 // Spans scanned with scanObjectsSmall.
852+
spanObjsSparseScanned uint64 // Objects scanned with scanObjectsSmall.
853+
sparseObjsScanned uint64 // Objects scanned with scanobject or scanObjectSmall.
854+
// Note: sparseObjsScanned is sufficient for both cases because
855+
// a particular size class either uses scanobject or scanObjectSmall,
856+
// not both. In the latter case, we also know that there was one
857+
// object scanned per span, so no need for a span counter.
807858
}
808859

809860
func dumpScanStats() {

0 commit comments

Comments
 (0)