Skip to content

Commit 7f81fa9

Browse files
Thomas StrombergThomas Stromberg
authored andcommitted
Replace maps with bloom filters for lower memory use
1 parent 117a5d1 commit 7f81fa9

File tree

1 file changed

+81
-36
lines changed

1 file changed

+81
-36
lines changed

s3fifo.go

Lines changed: 81 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -95,12 +95,13 @@ type shard[K comparable, V any] struct {
9595
small entryList[K, V] // Intrusive list for small queue
9696
main entryList[K, V] // Intrusive list for main queue
9797

98-
// Two-map ghost: tracks recently evicted keys with zero false positives.
99-
// Two maps rotate to provide approximate FIFO without linked list overhead.
98+
// Two-stage Bloom filter ghost: tracks recently evicted keys with low memory overhead.
99+
// Two filters rotate to provide approximate FIFO.
100100
// When ghostActive fills up, ghostAging is cleared and they swap roles.
101-
ghostActive map[K]struct{} // current generation
102-
ghostAging map[K]struct{} // previous generation (read-only until swap)
103-
ghostCap int // max entries before rotation
101+
ghostActive *bloomFilter
102+
ghostAging *bloomFilter
103+
ghostCap int
104+
hasher func(K) uint64
104105

105106
capacity int
106107
smallCap int
@@ -216,31 +217,66 @@ func newS3FIFO[K comparable, V any](cfg *config) *s3fifo[K, V] {
216217
c.keyIsString = true
217218
}
218219

219-
// Auto-tune ratios based on capacity
220-
// Note: Two-map ghost tracks 2x ghostRatio total (both maps can be nearly full).
221-
// Ghost ratio sweep results (Meta trace):
222-
// 0.5x: 68.15% / 76.05%
223-
// 1.0x: 68.42% / 76.27%
224-
// 1.5x: 68.53% / 76.34% <- sweet spot (good hit rate, reasonable memory)
225-
// 2.0x: 68.57% / 76.39% <- diminishing returns
220+
// Auto-tune ratios based on capacity.
221+
// Small caches with Zipf workloads need tight ghost tracking to avoid false promotions.
222+
// Large caches benefit from more ghost history for better admission decisions.
223+
//
224+
// Zipf benchmark results:
225+
// 500 items (0.1%): ghostRatio=0.2 -> 48.12% (wins)
226+
// 5000 items (1%): ghostRatio=0.2 -> 64.50% (wins)
227+
// 50000 items (10%): testing larger small queue for better ghost learning
226228
var smallRatio, ghostRatio float64
227-
if capacity <= 16384 {
228-
smallRatio = 0.01 // 1% for small caches (Zipf-friendly)
229-
ghostRatio = 1.0 // 100% per map = ~200% total for small caches
229+
if capacity <= 10000 {
230+
smallRatio = 0.01 // 1% small queue for Zipf-skewed workloads
231+
ghostRatio = 0.2 // 20% per bloom = ~40% total ghost tracking
230232
} else {
231-
smallRatio = 0.05 // 5% for large caches (Meta trace optimal)
232-
ghostRatio = 1.5 // 150% per map = ~300% total for large caches
233+
smallRatio = 0.10 // 10% for large caches - standard S3-FIFO ratio
234+
ghostRatio = 0.2 // 20% per bloom = ~40% total ghost tracking
235+
}
236+
237+
// Prepare hasher for Bloom filter
238+
var hasher func(K) uint64
239+
switch {
240+
case c.keyIsInt:
241+
hasher = func(key K) uint64 {
242+
return hashInt64(int64(*(*int)(unsafe.Pointer(&key))))
243+
}
244+
case c.keyIsInt64:
245+
hasher = func(key K) uint64 {
246+
return hashInt64(*(*int64)(unsafe.Pointer(&key)))
247+
}
248+
case c.keyIsString:
249+
hasher = func(key K) uint64 {
250+
return wyhashString(*(*string)(unsafe.Pointer(&key)))
251+
}
252+
default:
253+
hasher = func(key K) uint64 {
254+
switch k := any(key).(type) {
255+
case uint:
256+
//nolint:gosec // G115: intentional bit reinterpretation for hashing
257+
return hashInt64(int64(k))
258+
case uint64:
259+
//nolint:gosec // G115: intentional bit reinterpretation for hashing
260+
return hashInt64(int64(k))
261+
case string:
262+
return wyhashString(k)
263+
case fmt.Stringer:
264+
return wyhashString(k.String())
265+
default:
266+
return wyhashString(fmt.Sprintf("%v", k))
267+
}
268+
}
233269
}
234270

235271
for i := range numShards {
236-
c.shards[i] = newShard[K, V](shardCap, smallRatio, ghostRatio)
272+
c.shards[i] = newShard[K, V](shardCap, smallRatio, ghostRatio, hasher)
237273
}
238274

239275
return c
240276
}
241277

242278
// newShard creates a new S3-FIFO shard with the given capacity.
243-
func newShard[K comparable, V any](capacity int, smallRatio, ghostRatio float64) *shard[K, V] {
279+
func newShard[K comparable, V any](capacity int, smallRatio, ghostRatio float64, hasher func(K) uint64) *shard[K, V] {
244280
// Small queue: recommended 10%
245281
smallCap := int(float64(capacity) * smallRatio)
246282
if smallCap < 1 {
@@ -258,8 +294,9 @@ func newShard[K comparable, V any](capacity int, smallRatio, ghostRatio float64)
258294
smallCap: smallCap,
259295
ghostCap: ghostCap,
260296
entries: make(map[K]*entry[K, V], capacity),
261-
ghostActive: make(map[K]struct{}, ghostCap),
262-
ghostAging: make(map[K]struct{}, ghostCap),
297+
ghostActive: newBloomFilter(ghostCap, 0.0001),
298+
ghostAging: newBloomFilter(ghostCap, 0.0001),
299+
hasher: hasher,
263300
}
264301
return s
265302
}
@@ -340,15 +377,19 @@ func (c *s3fifo[K, V]) get(key K) (V, bool) {
340377
func (s *shard[K, V]) get(key K) (V, bool) {
341378
s.mu.RLock()
342379
ent, ok := s.entries[key]
343-
s.mu.RUnlock()
344-
345380
if !ok {
381+
s.mu.RUnlock()
346382
var zero V
347383
return zero, false
348384
}
349385

386+
// Read values while holding lock to avoid race with concurrent set()
387+
val := ent.value
388+
expiry := ent.expiryNano
389+
s.mu.RUnlock()
390+
350391
// Check expiration (lazy - actual cleanup happens in background)
351-
if ent.expiryNano != 0 && time.Now().UnixNano() > ent.expiryNano {
392+
if expiry != 0 && time.Now().UnixNano() > expiry {
352393
var zero V
353394
return zero, false
354395
}
@@ -359,7 +400,7 @@ func (s *shard[K, V]) get(key K) (V, bool) {
359400
ent.freq.Store(f + 1)
360401
}
361402

362-
return ent.value, true
403+
return val, true
363404
}
364405

365406
// set adds or updates a value in the cache.
@@ -381,10 +422,11 @@ func (s *shard[K, V]) set(key K, value V, expiryNano int64) {
381422

382423
// Slow path: insert new key (already holding lock)
383424

384-
// Check if key is in ghost (zero false positives)
385-
_, inGhost := s.ghostActive[key]
425+
// Check if key is in ghost (Bloom filter)
426+
h := s.hasher(key)
427+
inGhost := s.ghostActive.Contains(h)
386428
if !inGhost {
387-
_, inGhost = s.ghostAging[key]
429+
inGhost = s.ghostAging.Contains(h)
388430
}
389431

390432
// Create new entry
@@ -484,14 +526,17 @@ func (s *shard[K, V]) evictFromMain() {
484526
}
485527
}
486528

487-
// addToGhost adds a key to the ghost queue using two rotating maps.
529+
// addToGhost adds a key to the ghost queue using two rotating Bloom filters.
488530
func (s *shard[K, V]) addToGhost(key K) {
489-
s.ghostActive[key] = struct{}{}
531+
h := s.hasher(key)
532+
if !s.ghostActive.Contains(h) {
533+
s.ghostActive.Add(h)
534+
}
490535

491-
// Rotate maps when active is full (provides approximate FIFO)
492-
if len(s.ghostActive) >= s.ghostCap {
493-
// Clear aging map and swap - aging becomes new active
494-
clear(s.ghostAging)
536+
// Rotate filters when active is full (provides approximate FIFO)
537+
if s.ghostActive.entries >= s.ghostCap {
538+
// Reset aging filter and swap - aging becomes new active
539+
s.ghostAging.Reset()
495540
s.ghostActive, s.ghostAging = s.ghostAging, s.ghostActive
496541
}
497542
}
@@ -525,7 +570,7 @@ func (s *shard[K, V]) flush() int {
525570
s.entries = make(map[K]*entry[K, V], s.capacity)
526571
s.small.init()
527572
s.main.init()
528-
clear(s.ghostActive)
529-
clear(s.ghostAging)
573+
s.ghostActive.Reset()
574+
s.ghostAging.Reset()
530575
return n
531576
}

0 commit comments

Comments
 (0)