Skip to content

Commit 53009b2

Browse files
committed
runtime: use a smaller arena size on Wasm
On Wasm, some programs have very small heap. Currently, we use 4 MB arena size (like all other 32-bit platforms). For a very small program, it needs to allocate one heap arena, 4 MB size at a 4 MB aligned address. So we'll need 8 MB of linear memory, whereas only a smaller portion is actually used by the program. On Wasm, samll programs are not uncommon (e.g. WASI plugins), and users are concerned about the memory usage. This CL switches to a smaller arena size, as well as a smaller page allocator chunk size (both are now 512 KB). So the heap will be grown in 512 KB granularity. For a helloworld program, it now uses less than 3 MB of linear memory, instead of 8 MB. Change-Id: Ibd66c1fa6e794a12c00906cbacc8f2e410f196c4 Reviewed-on: https://go-review.googlesource.com/c/go/+/683296 Reviewed-by: David Chase <[email protected]> Reviewed-by: Michael Knyszek <[email protected]> LUCI-TryBot-Result: Go LUCI <[email protected]>
1 parent 3a5df9d commit 53009b2

File tree

9 files changed

+375
-323
lines changed

9 files changed

+375
-323
lines changed

src/runtime/malloc.go

Lines changed: 12 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -233,27 +233,30 @@ const (
233233
// ios/arm64 40 4MB 1 256K (2MB)
234234
// */32-bit 32 4MB 1 1024 (4KB)
235235
// */mips(le) 31 4MB 1 512 (2KB)
236+
// wasm 32 512KB 1 8192 (64KB)
236237

237238
// heapArenaBytes is the size of a heap arena. The heap
238239
// consists of mappings of size heapArenaBytes, aligned to
239240
// heapArenaBytes. The initial heap mapping is one arena.
240241
//
241-
// This is currently 64MB on 64-bit non-Windows and 4MB on
242-
// 32-bit and on Windows. We use smaller arenas on Windows
243-
// because all committed memory is charged to the process,
244-
// even if it's not touched. Hence, for processes with small
245-
// heaps, the mapped arena space needs to be commensurate.
246-
// This is particularly important with the race detector,
247-
// since it significantly amplifies the cost of committed
248-
// memory.
242+
// This is currently 64MB on 64-bit non-Windows, 4MB on
243+
// 32-bit and on Windows, and 512KB on Wasm. We use smaller
244+
// arenas on Windows because all committed memory is charged
245+
// to the process, even if it's not touched. Hence, for
246+
// processes with small heaps, the mapped arena space needs
247+
// to be commensurate. This is particularly important with
248+
// the race detector, since it significantly amplifies the
249+
// cost of committed memory. We use smaller arenas on Wasm
250+
// because some Wasm programs have very small heap, and
251+
// everything in the Wasm linear memory is charged.
249252
heapArenaBytes = 1 << logHeapArenaBytes
250253

251254
heapArenaWords = heapArenaBytes / goarch.PtrSize
252255

253256
// logHeapArenaBytes is log_2 of heapArenaBytes. For clarity,
254257
// prefer using heapArenaBytes where possible (we need the
255258
// constant to compute some other constants).
256-
logHeapArenaBytes = (6+20)*(_64bit*(1-goos.IsWindows)*(1-goarch.IsWasm)*(1-goos.IsIos*goarch.IsArm64)) + (2+20)*(_64bit*goos.IsWindows) + (2+20)*(1-_64bit) + (2+20)*goarch.IsWasm + (2+20)*goos.IsIos*goarch.IsArm64
259+
logHeapArenaBytes = (6+20)*(_64bit*(1-goos.IsWindows)*(1-goarch.IsWasm)*(1-goos.IsIos*goarch.IsArm64)) + (2+20)*(_64bit*goos.IsWindows) + (2+20)*(1-_64bit) + (9+10)*goarch.IsWasm + (2+20)*goos.IsIos*goarch.IsArm64
257260

258261
// heapArenaBitmapWords is the size of each heap arena's bitmap in uintptrs.
259262
heapArenaBitmapWords = heapArenaWords / (8 * goarch.PtrSize)

src/runtime/mgcmark.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,7 @@ const (
5050
//
5151
// Must be a multiple of the pageInUse bitmap element size and
5252
// must also evenly divide pagesPerArena.
53-
pagesPerSpanRoot = 512
53+
pagesPerSpanRoot = min(512, pagesPerArena)
5454
)
5555

5656
// gcPrepareMarkRoots queues root scanning jobs (stacks, globals, and

src/runtime/mgcscavenge_test.go

Lines changed: 81 additions & 62 deletions
Original file line numberDiff line numberDiff line change
@@ -121,13 +121,17 @@ func TestPallocDataFindScavengeCandidate(t *testing.T) {
121121
max: PallocChunkPages,
122122
want: BitRange{41, 1},
123123
},
124-
"MultiMin1": {
125-
alloc: []BitRange{{0, 63}, {65, 20}, {87, PallocChunkPages - 87}},
124+
}
125+
if PallocChunkPages >= 512 {
126+
// avoid constant overflow when PallocChunkPages is small
127+
var pallocChunkPages uint = PallocChunkPages
128+
tests["MultiMin1"] = test{
129+
alloc: []BitRange{{0, 63}, {65, 20}, {87, pallocChunkPages - 87}},
126130
scavenged: []BitRange{{86, 1}},
127131
min: 1,
128132
max: PallocChunkPages,
129133
want: BitRange{85, 1},
130-
},
134+
}
131135
}
132136
// Try out different page minimums.
133137
for m := uintptr(1); m <= 64; m *= 2 {
@@ -162,25 +166,27 @@ func TestPallocDataFindScavengeCandidate(t *testing.T) {
162166
max: PallocChunkPages,
163167
want: BitRange{PallocChunkPages - uint(m), uint(m)},
164168
}
165-
tests["Straddle64"+suffix] = test{
166-
alloc: []BitRange{{0, 64 - uint(m)}, {64 + uint(m), PallocChunkPages - (64 + uint(m))}},
167-
min: m,
168-
max: 2 * m,
169-
want: BitRange{64 - uint(m), 2 * uint(m)},
170-
}
171-
tests["BottomEdge64WithFull"+suffix] = test{
172-
alloc: []BitRange{{64, 64}, {128 + 3*uint(m), PallocChunkPages - (128 + 3*uint(m))}},
173-
scavenged: []BitRange{{1, 10}},
174-
min: m,
175-
max: 3 * m,
176-
want: BitRange{128, 3 * uint(m)},
177-
}
178-
tests["BottomEdge64WithPocket"+suffix] = test{
179-
alloc: []BitRange{{64, 62}, {127, 1}, {128 + 3*uint(m), PallocChunkPages - (128 + 3*uint(m))}},
180-
scavenged: []BitRange{{1, 10}},
181-
min: m,
182-
max: 3 * m,
183-
want: BitRange{128, 3 * uint(m)},
169+
if PallocChunkPages >= 512 {
170+
tests["Straddle64"+suffix] = test{
171+
alloc: []BitRange{{0, 64 - uint(m)}, {64 + uint(m), PallocChunkPages - (64 + uint(m))}},
172+
min: m,
173+
max: 2 * m,
174+
want: BitRange{64 - uint(m), 2 * uint(m)},
175+
}
176+
tests["BottomEdge64WithFull"+suffix] = test{
177+
alloc: []BitRange{{64, 64}, {128 + 3*uint(m), PallocChunkPages - (128 + 3*uint(m))}},
178+
scavenged: []BitRange{{1, 10}},
179+
min: m,
180+
max: 3 * m,
181+
want: BitRange{128, 3 * uint(m)},
182+
}
183+
tests["BottomEdge64WithPocket"+suffix] = test{
184+
alloc: []BitRange{{64, 62}, {127, 1}, {128 + 3*uint(m), PallocChunkPages - (128 + 3*uint(m))}},
185+
scavenged: []BitRange{{1, 10}},
186+
min: m,
187+
max: 3 * m,
188+
want: BitRange{128, 3 * uint(m)},
189+
}
184190
}
185191
tests["Max0"+suffix] = test{
186192
scavenged: []BitRange{{0, PallocChunkPages - uint(m)}},
@@ -204,23 +210,29 @@ func TestPallocDataFindScavengeCandidate(t *testing.T) {
204210
}
205211
}
206212
if m > 1 {
207-
tests["MaxUnaligned"+suffix] = test{
208-
scavenged: []BitRange{{0, PallocChunkPages - uint(m*2-1)}},
209-
min: m,
210-
max: m - 2,
211-
want: BitRange{PallocChunkPages - uint(m), uint(m)},
212-
}
213-
tests["SkipSmall"+suffix] = test{
214-
alloc: []BitRange{{0, 64 - uint(m)}, {64, 5}, {70, 11}, {82, PallocChunkPages - 82}},
215-
min: m,
216-
max: m,
217-
want: BitRange{64 - uint(m), uint(m)},
213+
if PallocChunkPages >= m*2 {
214+
tests["MaxUnaligned"+suffix] = test{
215+
scavenged: []BitRange{{0, PallocChunkPages - uint(m*2-1)}},
216+
min: m,
217+
max: m - 2,
218+
want: BitRange{PallocChunkPages - uint(m), uint(m)},
219+
}
218220
}
219-
tests["SkipMisaligned"+suffix] = test{
220-
alloc: []BitRange{{0, 64 - uint(m)}, {64, 63}, {127 + uint(m), PallocChunkPages - (127 + uint(m))}},
221-
min: m,
222-
max: m,
223-
want: BitRange{64 - uint(m), uint(m)},
221+
if PallocChunkPages >= 512 {
222+
// avoid constant overflow when PallocChunkPages is small
223+
var PallocChunkPages uint = PallocChunkPages
224+
tests["SkipSmall"+suffix] = test{
225+
alloc: []BitRange{{0, 64 - uint(m)}, {64, 5}, {70, 11}, {82, PallocChunkPages - 82}},
226+
min: m,
227+
max: m,
228+
want: BitRange{64 - uint(m), uint(m)},
229+
}
230+
tests["SkipMisaligned"+suffix] = test{
231+
alloc: []BitRange{{0, 64 - uint(m)}, {64, 63}, {127 + uint(m), PallocChunkPages - (127 + uint(m))}},
232+
min: m,
233+
max: m,
234+
want: BitRange{64 - uint(m), uint(m)},
235+
}
224236
}
225237
tests["MaxLessThan"+suffix] = test{
226238
scavenged: []BitRange{{0, PallocChunkPages - uint(m)}},
@@ -641,7 +653,7 @@ func TestScavengeIndex(t *testing.T) {
641653
mark func(markFunc)
642654
find func(findFunc)
643655
}
644-
for _, test := range []testCase{
656+
tests := []testCase{
645657
{
646658
name: "Uninitialized",
647659
mark: func(_ markFunc) {},
@@ -692,26 +704,6 @@ func TestScavengeIndex(t *testing.T) {
692704
find(BaseChunkIdx, PallocChunkPages-1)
693705
},
694706
},
695-
{
696-
name: "TwoChunks",
697-
mark: func(mark markFunc) {
698-
mark(PageBase(BaseChunkIdx, 128), PageBase(BaseChunkIdx+1, 128))
699-
},
700-
find: func(find findFunc) {
701-
find(BaseChunkIdx+1, 127)
702-
find(BaseChunkIdx, PallocChunkPages-1)
703-
},
704-
},
705-
{
706-
name: "TwoChunksOffset",
707-
mark: func(mark markFunc) {
708-
mark(PageBase(BaseChunkIdx+7, 128), PageBase(BaseChunkIdx+8, 129))
709-
},
710-
find: func(find findFunc) {
711-
find(BaseChunkIdx+8, 128)
712-
find(BaseChunkIdx+7, PallocChunkPages-1)
713-
},
714-
},
715707
{
716708
name: "SevenChunksOffset",
717709
mark: func(mark markFunc) {
@@ -793,7 +785,32 @@ func TestScavengeIndex(t *testing.T) {
793785
}
794786
},
795787
},
796-
} {
788+
}
789+
if PallocChunkPages >= 512 {
790+
tests = append(tests,
791+
testCase{
792+
name: "TwoChunks",
793+
mark: func(mark markFunc) {
794+
mark(PageBase(BaseChunkIdx, 128), PageBase(BaseChunkIdx+1, 128))
795+
},
796+
find: func(find findFunc) {
797+
find(BaseChunkIdx+1, 127)
798+
find(BaseChunkIdx, PallocChunkPages-1)
799+
},
800+
},
801+
testCase{
802+
name: "TwoChunksOffset",
803+
mark: func(mark markFunc) {
804+
mark(PageBase(BaseChunkIdx+7, 128), PageBase(BaseChunkIdx+8, 129))
805+
},
806+
find: func(find findFunc) {
807+
find(BaseChunkIdx+8, 128)
808+
find(BaseChunkIdx+7, PallocChunkPages-1)
809+
},
810+
},
811+
)
812+
}
813+
for _, test := range tests {
797814
test := test
798815
t.Run("Bg/"+test.name, func(t *testing.T) {
799816
mark, find, nextGen := setup(t, false)
@@ -830,8 +847,10 @@ func TestScavengeIndex(t *testing.T) {
830847
}
831848

832849
func TestScavChunkDataPack(t *testing.T) {
833-
if !CheckPackScavChunkData(1918237402, 512, 512, 0b11) {
834-
t.Error("failed pack/unpack check for scavChunkData 1")
850+
if PallocChunkPages >= 512 {
851+
if !CheckPackScavChunkData(1918237402, 512, 512, 0b11) {
852+
t.Error("failed pack/unpack check for scavChunkData 1")
853+
}
835854
}
836855
if !CheckPackScavChunkData(^uint32(0), 12, 0, 0b00) {
837856
t.Error("failed pack/unpack check for scavChunkData 2")

src/runtime/mheap.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,7 @@ const (
4747
//
4848
// Must be a multiple of the pageInUse bitmap element size and
4949
// must also evenly divide pagesPerArena.
50-
pagesPerReclaimerChunk = 512
50+
pagesPerReclaimerChunk = min(512, pagesPerArena)
5151

5252
// physPageAlignedStacks indicates whether stack allocations must be
5353
// physical page aligned. This is a requirement for MAP_STACK on

src/runtime/mpagealloc.go

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -48,17 +48,20 @@
4848
package runtime
4949

5050
import (
51+
"internal/goarch"
5152
"internal/runtime/atomic"
5253
"internal/runtime/gc"
5354
"unsafe"
5455
)
5556

5657
const (
5758
// The size of a bitmap chunk, i.e. the amount of bits (that is, pages) to consider
58-
// in the bitmap at once.
59+
// in the bitmap at once. It is 4MB on most platforms, except on Wasm it is 512KB.
60+
// We use a smaller chuck size on Wasm for the same reason as the smaller arena
61+
// size (see heapArenaBytes).
5962
pallocChunkPages = 1 << logPallocChunkPages
6063
pallocChunkBytes = pallocChunkPages * pageSize
61-
logPallocChunkPages = 9
64+
logPallocChunkPages = 9*(1-goarch.IsWasm) + 6*goarch.IsWasm
6265
logPallocChunkBytes = logPallocChunkPages + gc.PageShift
6366

6467
// The number of radix bits for each level.
@@ -220,6 +223,7 @@ type pageAlloc struct {
220223
// heapAddrBits | L1 Bits | L2 Bits | L2 Entry Size
221224
// ------------------------------------------------
222225
// 32 | 0 | 10 | 128 KiB
226+
// 32 (wasm) | 0 | 13 | 128 KiB
223227
// 33 (iOS) | 0 | 11 | 256 KiB
224228
// 48 | 13 | 13 | 1 MiB
225229
//

0 commit comments

Comments
 (0)