Skip to content

Commit f46da5a

Browse files
committed
runtime: store large map keys and values indirectly
When a map key or value exceeds 128 bytes, the bucket now stores a pointer to separately allocated memory instead of the data inline. This matches Go's MapMaxKeyBytes/MapMaxElemBytes threshold and prevents bucket sizes from exploding for large key/value types. For example, map[[256]byte]int previously used 2128 bytes per bucket (16 header + 256*8 keys + 8*8 values); now it uses 144 bytes per bucket (16 header + 8*8 pointers + 8*8 values). The indirection is fully encapsulated in the runtime via helper functions (hashmapKeySlotSize, hashmapValueSlotSize, hashmapSlotKeyData, hashmapSlotValueData, hashmapStoreKey, hashmapStoreValue). No compiler or reflect changes are needed.
1 parent 0646500 commit f46da5a

5 files changed

Lines changed: 242 additions & 26 deletions

File tree

main_test.go

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -77,6 +77,7 @@ func TestBuild(t *testing.T) {
7777
"interface.go",
7878
"json.go",
7979
"map.go",
80+
"map_bigkey.go",
8081
"math.go",
8182
"oldgo/",
8283
"print.go",
@@ -283,6 +284,11 @@ func runPlatTests(options compileopts.Options, tests []string, t *testing.T) {
283284
// limited amount of memory.
284285
continue
285286

287+
case "map_bigkey.go":
288+
// Compiler generates many large stack temporaries for [256]byte
289+
// map keys, overflowing the goroutine stack (384 bytes).
290+
continue
291+
286292
case "gc.go":
287293
// Does not pass due to high mark false positive rate.
288294
continue

src/runtime/hashmap.go

Lines changed: 137 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -16,13 +16,22 @@ type hashmap struct {
1616
buckets unsafe.Pointer // pointer to array of buckets
1717
seed uintptr
1818
count uintptr
19-
keySize uintptr // maybe this can store the key type as well? E.g. keysize == 5 means string?
19+
keySize uintptr
2020
valueSize uintptr
2121
bucketBits uint8
22+
flags uint8
2223
keyEqual func(x, y unsafe.Pointer, n uintptr) bool
2324
keyHash func(key unsafe.Pointer, size, seed uintptr) uint32
2425
}
2526

27+
const (
28+
hashmapMaxKeySize = 128
29+
hashmapMaxValueSize = 128
30+
31+
hashmapFlagIndirectKey = 1 << 0
32+
hashmapFlagIndirectValue = 1 << 1
33+
)
34+
2635
// A hashmap bucket. A bucket is a container of 8 key/value pairs: first the
2736
// following two entries, then the 8 keys, then the 8 values. This somewhat odd
2837
// ordering is to make sure the keys and values are well aligned when one of
@@ -40,6 +49,48 @@ type hashmapBucket struct {
4049
// like MIPS) are properly aligned in the bucket.
4150
const hashmapBucketHeaderSize = (unsafe.Sizeof(hashmapBucket{}) + 7) &^ 7
4251

52+
// hashmapKeySlotSize returns the size of a key slot in the bucket. For indirect
53+
// keys, this is the pointer size; otherwise the actual key size.
54+
//
55+
//go:inline
56+
func hashmapKeySlotSize(m *hashmap) uintptr {
57+
if m.flags&hashmapFlagIndirectKey != 0 {
58+
return unsafe.Sizeof(unsafe.Pointer(nil))
59+
}
60+
return m.keySize
61+
}
62+
63+
// hashmapValueSlotSize returns the size of a value slot in the bucket.
64+
//
65+
//go:inline
66+
func hashmapValueSlotSize(m *hashmap) uintptr {
67+
if m.flags&hashmapFlagIndirectValue != 0 {
68+
return unsafe.Sizeof(unsafe.Pointer(nil))
69+
}
70+
return m.valueSize
71+
}
72+
73+
// hashmapSlotKeyData returns a pointer to the actual key data for a given slot.
74+
// For indirect keys, the slot contains a pointer that must be dereferenced.
75+
//
76+
//go:inline
77+
func hashmapSlotKeyData(m *hashmap, slotKey unsafe.Pointer) unsafe.Pointer {
78+
if m.flags&hashmapFlagIndirectKey != 0 {
79+
return *(*unsafe.Pointer)(slotKey)
80+
}
81+
return slotKey
82+
}
83+
84+
// hashmapSlotValueData returns a pointer to the actual value data for a given slot.
85+
//
86+
//go:inline
87+
func hashmapSlotValueData(m *hashmap, slotValue unsafe.Pointer) unsafe.Pointer {
88+
if m.flags&hashmapFlagIndirectValue != 0 {
89+
return *(*unsafe.Pointer)(slotValue)
90+
}
91+
return slotValue
92+
}
93+
4394
type hashmapIterator struct {
4495
buckets unsafe.Pointer // pointer to array of hashapBuckets
4596
numBuckets uintptr // length of buckets array
@@ -72,7 +123,19 @@ func hashmapMake(keySize, valueSize uintptr, sizeHint uintptr, alg uint8) *hashm
72123
bucketBits++
73124
}
74125

75-
bucketBufSize := hashmapBucketHeaderSize + keySize*8 + valueSize*8
126+
var flags uint8
127+
keySlotSize := keySize
128+
if keySize > hashmapMaxKeySize {
129+
flags |= hashmapFlagIndirectKey
130+
keySlotSize = unsafe.Sizeof(unsafe.Pointer(nil))
131+
}
132+
valueSlotSize := valueSize
133+
if valueSize > hashmapMaxValueSize {
134+
flags |= hashmapFlagIndirectValue
135+
valueSlotSize = unsafe.Sizeof(unsafe.Pointer(nil))
136+
}
137+
138+
bucketBufSize := hashmapBucketHeaderSize + keySlotSize*8 + valueSlotSize*8
76139
buckets := alloc(bucketBufSize*(1<<bucketBits), nil)
77140

78141
keyHash := hashmapKeyHashAlg(tinygo.HashmapAlgorithm(alg))
@@ -84,6 +147,7 @@ func hashmapMake(keySize, valueSize uintptr, sizeHint uintptr, alg uint8) *hashm
84147
keySize: keySize,
85148
valueSize: valueSize,
86149
bucketBits: bucketBits,
150+
flags: flags,
87151
keyEqual: keyEqual,
88152
keyHash: keyHash,
89153
}
@@ -178,7 +242,7 @@ func hashmapLen(m *hashmap) int {
178242

179243
//go:inline
180244
func hashmapBucketSize(m *hashmap) uintptr {
181-
return hashmapBucketHeaderSize + uintptr(m.keySize)*8 + uintptr(m.valueSize)*8
245+
return hashmapBucketHeaderSize + hashmapKeySlotSize(m)*8 + hashmapValueSlotSize(m)*8
182246
}
183247

184248
//go:inline
@@ -197,16 +261,14 @@ func hashmapBucketAddrForHash(m *hashmap, hash uint32) *hashmapBucket {
197261

198262
//go:inline
199263
func hashmapSlotKey(m *hashmap, bucket *hashmapBucket, slot uint8) unsafe.Pointer {
200-
slotKeyOffset := hashmapBucketHeaderSize + uintptr(m.keySize)*uintptr(slot)
201-
slotKey := unsafe.Add(unsafe.Pointer(bucket), slotKeyOffset)
202-
return slotKey
264+
slotKeyOffset := hashmapBucketHeaderSize + hashmapKeySlotSize(m)*uintptr(slot)
265+
return unsafe.Add(unsafe.Pointer(bucket), slotKeyOffset)
203266
}
204267

205268
//go:inline
206269
func hashmapSlotValue(m *hashmap, bucket *hashmapBucket, slot uint8) unsafe.Pointer {
207-
slotValueOffset := hashmapBucketHeaderSize + uintptr(m.keySize)*8 + uintptr(m.valueSize)*uintptr(slot)
208-
slotValue := unsafe.Add(unsafe.Pointer(bucket), slotValueOffset)
209-
return slotValue
270+
slotValueOffset := hashmapBucketHeaderSize + hashmapKeySlotSize(m)*8 + hashmapValueSlotSize(m)*uintptr(slot)
271+
return unsafe.Add(unsafe.Pointer(bucket), slotValueOffset)
210272
}
211273

212274
// Set a specified key to a given value. Grow the map if necessary.
@@ -240,9 +302,9 @@ func hashmapSet(m *hashmap, key unsafe.Pointer, value unsafe.Pointer, hash uint3
240302
}
241303
if bucket.tophash[i] == tophash {
242304
// Could be an existing key that's the same.
243-
if m.keyEqual(key, slotKey, m.keySize) {
244-
// found same key, replace it
245-
memcpy(slotValue, value, m.valueSize)
305+
if m.keyEqual(key, hashmapSlotKeyData(m, slotKey), m.keySize) {
306+
// found same key, replace the value
307+
hashmapStoreValue(m, slotValue, value)
246308
return
247309
}
248310
}
@@ -257,11 +319,45 @@ func hashmapSet(m *hashmap, key unsafe.Pointer, value unsafe.Pointer, hash uint3
257319
return
258320
}
259321
m.count++
260-
memcpy(emptySlotKey, key, m.keySize)
261-
memcpy(emptySlotValue, value, m.valueSize)
322+
hashmapStoreKey(m, emptySlotKey, key)
323+
hashmapStoreValue(m, emptySlotValue, value)
262324
*emptySlotTophash = tophash
263325
}
264326

327+
// hashmapStoreKey stores a key into a bucket slot, allocating backing storage
328+
// if the key is indirect (first insert) or copying into it (shouldn't happen
329+
// for keys, but handles it correctly).
330+
//
331+
//go:inline
332+
func hashmapStoreKey(m *hashmap, slotKey, key unsafe.Pointer) {
333+
if m.flags&hashmapFlagIndirectKey != 0 {
334+
p := alloc(m.keySize, nil)
335+
memcpy(p, key, m.keySize)
336+
*(*unsafe.Pointer)(slotKey) = p
337+
} else {
338+
memcpy(slotKey, key, m.keySize)
339+
}
340+
}
341+
342+
// hashmapStoreValue stores a value into a bucket slot. For indirect values,
343+
// it allocates backing storage on first insert or copies into the existing
344+
// backing on overwrite.
345+
//
346+
//go:inline
347+
func hashmapStoreValue(m *hashmap, slotValue, value unsafe.Pointer) {
348+
if m.flags&hashmapFlagIndirectValue != 0 {
349+
p := *(*unsafe.Pointer)(slotValue)
350+
if p == nil {
351+
// First insert: allocate backing storage.
352+
p = alloc(m.valueSize, nil)
353+
*(*unsafe.Pointer)(slotValue) = p
354+
}
355+
memcpy(p, value, m.valueSize)
356+
} else {
357+
memcpy(slotValue, value, m.valueSize)
358+
}
359+
}
360+
265361
// hashmapInsertIntoNewBucket creates a new bucket, inserts the given key and
266362
// value into the bucket, and returns a pointer to this bucket.
267363
func hashmapInsertIntoNewBucket(m *hashmap, key, value unsafe.Pointer, tophash uint8) *hashmapBucket {
@@ -273,8 +369,8 @@ func hashmapInsertIntoNewBucket(m *hashmap, key, value unsafe.Pointer, tophash u
273369
slotKey := hashmapSlotKey(m, bucket, 0)
274370
slotValue := hashmapSlotValue(m, bucket, 0)
275371
m.count++
276-
memcpy(slotKey, key, m.keySize)
277-
memcpy(slotValue, value, m.valueSize)
372+
hashmapStoreKey(m, slotKey, key)
373+
hashmapStoreValue(m, slotValue, value)
278374
bucket.tophash[0] = tophash
279375
return bucket
280376
}
@@ -337,12 +433,12 @@ func hashmapGet(m *hashmap, key, value unsafe.Pointer, valueSize uintptr, hash u
337433
for bucket != nil {
338434
for i := uint8(0); i < 8; i++ {
339435
slotKey := hashmapSlotKey(m, bucket, i)
340-
slotValue := hashmapSlotValue(m, bucket, i)
341436
if bucket.tophash[i] == tophash {
342437
// This could be the key we're looking for.
343-
if m.keyEqual(key, slotKey, m.keySize) {
438+
if m.keyEqual(key, hashmapSlotKeyData(m, slotKey), m.keySize) {
344439
// Found the key, copy it.
345-
memcpy(value, slotValue, m.valueSize)
440+
slotValue := hashmapSlotValue(m, bucket, i)
441+
memcpy(value, hashmapSlotValueData(m, slotValue), m.valueSize)
346442
return true
347443
}
348444
}
@@ -376,13 +472,15 @@ func hashmapDelete(m *hashmap, key unsafe.Pointer, hash uint32) {
376472
slotKey := hashmapSlotKey(m, bucket, i)
377473
if bucket.tophash[i] == tophash {
378474
// This could be the key we're looking for.
379-
if m.keyEqual(key, slotKey, m.keySize) {
475+
if m.keyEqual(key, hashmapSlotKeyData(m, slotKey), m.keySize) {
380476
// Found the key, delete it.
381477
bucket.tophash[i] = 0
382-
// Zero out the key and value so garbage collector doesn't pin the allocations.
383-
memzero(slotKey, m.keySize)
478+
// Zero out the slot so the GC won't pin the allocations.
479+
keySlotSize := hashmapKeySlotSize(m)
480+
memzero(slotKey, keySlotSize)
384481
slotValue := hashmapSlotValue(m, bucket, i)
385-
memzero(slotValue, m.valueSize)
482+
valueSlotSize := hashmapValueSlotSize(m)
483+
memzero(slotValue, valueSlotSize)
386484
m.count--
387485
return
388486
}
@@ -444,13 +542,13 @@ func hashmapNext(m *hashmap, it *hashmapIterator, key, value unsafe.Pointer) boo
444542

445543
// Found a key.
446544
slotKey := hashmapSlotKey(m, it.bucket, it.bucketIndex)
447-
memcpy(key, slotKey, m.keySize)
545+
memcpy(key, hashmapSlotKeyData(m, slotKey), m.keySize)
448546

449547
if it.buckets == m.buckets {
450548
// Our view of the buckets is the same as the parent map.
451549
// Just copy the value we have
452550
slotValue := hashmapSlotValue(m, it.bucket, it.bucketIndex)
453-
memcpy(value, slotValue, m.valueSize)
551+
memcpy(value, hashmapSlotValueData(m, slotValue), m.valueSize)
454552
it.bucketIndex++
455553
} else {
456554
it.bucketIndex++
@@ -539,7 +637,19 @@ func hashmapMakeGeneric(keySize, valueSize uintptr, sizeHint uintptr,
539637
bucketBits++
540638
}
541639

542-
bucketBufSize := hashmapBucketHeaderSize + keySize*8 + valueSize*8
640+
var flags uint8
641+
keySlotSize := keySize
642+
if keySize > hashmapMaxKeySize {
643+
flags |= hashmapFlagIndirectKey
644+
keySlotSize = unsafe.Sizeof(unsafe.Pointer(nil))
645+
}
646+
valueSlotSize := valueSize
647+
if valueSize > hashmapMaxValueSize {
648+
flags |= hashmapFlagIndirectValue
649+
valueSlotSize = unsafe.Sizeof(unsafe.Pointer(nil))
650+
}
651+
652+
bucketBufSize := hashmapBucketHeaderSize + keySlotSize*8 + valueSlotSize*8
543653
buckets := alloc(bucketBufSize*(1<<bucketBits), nil)
544654

545655
return &hashmap{
@@ -548,6 +658,7 @@ func hashmapMakeGeneric(keySize, valueSize uintptr, sizeHint uintptr,
548658
keySize: keySize,
549659
valueSize: valueSize,
550660
bucketBits: bucketBits,
661+
flags: flags,
551662
keyEqual: keyEqual,
552663
keyHash: keyHash,
553664
}

testdata/map_bigkey.go

Lines changed: 63 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,63 @@
1+
package main
2+
3+
// Test maps with keys and values larger than 128 bytes, which triggers
4+
// indirect storage in the bucket (pointers instead of inline data).
5+
//
6+
// This is a separate file from map.go because the compiler generates many
7+
// large stack temporaries for map operations on [256]byte keys, which
8+
// overflows the goroutine stack on AVR (384 bytes). AVR skips this test.
9+
10+
type BigKey [256]byte
11+
type BigValue [256]byte
12+
13+
func main() {
14+
// Large key, small value.
15+
m1 := make(map[BigKey]int)
16+
var k1 BigKey
17+
k1[0] = 1
18+
k1[255] = 42
19+
m1[k1] = 100
20+
21+
var k1same BigKey
22+
k1same[0] = 1
23+
k1same[255] = 42
24+
25+
var k1diff BigKey
26+
k1diff[0] = 2
27+
28+
println("bigkey get:", m1[k1])
29+
println("bigkey get same:", m1[k1same])
30+
println("bigkey get diff:", m1[k1diff])
31+
32+
// Overwrite.
33+
m1[k1] = 200
34+
println("bigkey overwrite:", m1[k1])
35+
36+
// Small key, large value.
37+
m2 := make(map[int]BigValue)
38+
var v BigValue
39+
v[0] = 7
40+
v[255] = 99
41+
m2[1] = v
42+
got := m2[1]
43+
println("bigval get:", got[0], got[255])
44+
45+
// Both large.
46+
m3 := make(map[BigKey]BigValue)
47+
m3[k1] = v
48+
got3 := m3[k1]
49+
println("bigboth get:", got3[0], got3[255])
50+
51+
// Delete.
52+
delete(m3, k1)
53+
got3 = m3[k1]
54+
println("bigboth deleted:", got3[0])
55+
56+
// Iteration.
57+
m1[k1diff] = 300
58+
count := 0
59+
for range m1 {
60+
count++
61+
}
62+
println("bigkey len:", len(m1), "iterated:", count)
63+
}

testdata/map_bigkey.txt

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
bigkey get: 100
2+
bigkey get same: 100
3+
bigkey get diff: 0
4+
bigkey overwrite: 200
5+
bigval get: 7 99
6+
bigboth get: 7 99
7+
bigboth deleted: 0
8+
bigkey len: 2 iterated: 2

0 commit comments

Comments
 (0)