@@ -95,12 +95,13 @@ type shard[K comparable, V any] struct {
9595 small entryList [K , V ] // Intrusive list for small queue
9696 main entryList [K , V ] // Intrusive list for main queue
9797
98- // Two-map ghost: tracks recently evicted keys with zero false positives .
99- // Two maps rotate to provide approximate FIFO without linked list overhead .
98+ // Two-stage Bloom filter ghost: tracks recently evicted keys with low memory overhead .
99+ // Two filters rotate to provide approximate FIFO.
100100 // When ghostActive fills up, ghostAging is cleared and they swap roles.
101- ghostActive map [K ]struct {} // current generation
102- ghostAging map [K ]struct {} // previous generation (read-only until swap)
103- ghostCap int // max entries before rotation
101+ ghostActive * bloomFilter
102+ ghostAging * bloomFilter
103+ ghostCap int
104+ hasher func (K ) uint64
104105
105106 capacity int
106107 smallCap int
@@ -216,31 +217,66 @@ func newS3FIFO[K comparable, V any](cfg *config) *s3fifo[K, V] {
216217 c .keyIsString = true
217218 }
218219
219- // Auto-tune ratios based on capacity
220- // Note: Two-map ghost tracks 2x ghostRatio total (both maps can be nearly full).
221- // Ghost ratio sweep results (Meta trace):
222- // 0.5x: 68.15% / 76.05%
223- // 1.0x: 68.42% / 76.27%
224- // 1.5x: 68.53% / 76.34% <- sweet spot (good hit rate, reasonable memory)
225- // 2.0x: 68.57% / 76.39% <- diminishing returns
220+ // Auto-tune ratios based on capacity.
221+ // Small caches with Zipf workloads need tight ghost tracking to avoid false promotions.
222+ // Large caches benefit from more ghost history for better admission decisions.
223+ //
224+ // Zipf benchmark results:
225+ // 500 items (0.1%): ghostRatio=0.2 -> 48.12% (wins)
226+ // 5000 items (1%): ghostRatio=0.2 -> 64.50% (wins)
227+ // 50000 items (10%): testing larger small queue for better ghost learning
226228 var smallRatio , ghostRatio float64
227- if capacity <= 16384 {
228- smallRatio = 0.01 // 1% for small caches ( Zipf-friendly)
229- ghostRatio = 1.0 // 100 % per map = ~200 % total for small caches
229+ if capacity <= 10000 {
230+ smallRatio = 0.01 // 1% small queue for Zipf-skewed workloads
231+ ghostRatio = 0.2 // 20 % per bloom = ~40 % total ghost tracking
230232 } else {
231- smallRatio = 0.05 // 5% for large caches (Meta trace optimal)
232- ghostRatio = 1.5 // 150% per map = ~300% total for large caches
233+ smallRatio = 0.10 // 10% for large caches - standard S3-FIFO ratio
234+ ghostRatio = 0.2 // 20% per bloom = ~40% total ghost tracking
235+ }
236+
237+ // Prepare hasher for Bloom filter
238+ var hasher func (K ) uint64
239+ switch {
240+ case c .keyIsInt :
241+ hasher = func (key K ) uint64 {
242+ return hashInt64 (int64 (* (* int )(unsafe .Pointer (& key ))))
243+ }
244+ case c .keyIsInt64 :
245+ hasher = func (key K ) uint64 {
246+ return hashInt64 (* (* int64 )(unsafe .Pointer (& key )))
247+ }
248+ case c .keyIsString :
249+ hasher = func (key K ) uint64 {
250+ return wyhashString (* (* string )(unsafe .Pointer (& key )))
251+ }
252+ default :
253+ hasher = func (key K ) uint64 {
254+ switch k := any (key ).(type ) {
255+ case uint :
256+ //nolint:gosec // G115: intentional bit reinterpretation for hashing
257+ return hashInt64 (int64 (k ))
258+ case uint64 :
259+ //nolint:gosec // G115: intentional bit reinterpretation for hashing
260+ return hashInt64 (int64 (k ))
261+ case string :
262+ return wyhashString (k )
263+ case fmt.Stringer :
264+ return wyhashString (k .String ())
265+ default :
266+ return wyhashString (fmt .Sprintf ("%v" , k ))
267+ }
268+ }
233269 }
234270
235271 for i := range numShards {
236- c .shards [i ] = newShard [K , V ](shardCap , smallRatio , ghostRatio )
272+ c .shards [i ] = newShard [K , V ](shardCap , smallRatio , ghostRatio , hasher )
237273 }
238274
239275 return c
240276}
241277
242278// newShard creates a new S3-FIFO shard with the given capacity.
243- func newShard [K comparable , V any ](capacity int , smallRatio , ghostRatio float64 ) * shard [K , V ] {
279+ func newShard [K comparable , V any ](capacity int , smallRatio , ghostRatio float64 , hasher func ( K ) uint64 ) * shard [K , V ] {
244280 // Small queue: recommended 10%
245281 smallCap := int (float64 (capacity ) * smallRatio )
246282 if smallCap < 1 {
@@ -258,8 +294,9 @@ func newShard[K comparable, V any](capacity int, smallRatio, ghostRatio float64)
258294 smallCap : smallCap ,
259295 ghostCap : ghostCap ,
260296 entries : make (map [K ]* entry [K , V ], capacity ),
261- ghostActive : make (map [K ]struct {}, ghostCap ),
262- ghostAging : make (map [K ]struct {}, ghostCap ),
297+ ghostActive : newBloomFilter (ghostCap , 0.0001 ),
298+ ghostAging : newBloomFilter (ghostCap , 0.0001 ),
299+ hasher : hasher ,
263300 }
264301 return s
265302}
@@ -340,15 +377,19 @@ func (c *s3fifo[K, V]) get(key K) (V, bool) {
340377func (s * shard [K , V ]) get (key K ) (V , bool ) {
341378 s .mu .RLock ()
342379 ent , ok := s .entries [key ]
343- s .mu .RUnlock ()
344-
345380 if ! ok {
381+ s .mu .RUnlock ()
346382 var zero V
347383 return zero , false
348384 }
349385
386+ // Read values while holding lock to avoid race with concurrent set()
387+ val := ent .value
388+ expiry := ent .expiryNano
389+ s .mu .RUnlock ()
390+
350391 // Check expiration (lazy - actual cleanup happens in background)
351- if ent . expiryNano != 0 && time .Now ().UnixNano () > ent . expiryNano {
392+ if expiry != 0 && time .Now ().UnixNano () > expiry {
352393 var zero V
353394 return zero , false
354395 }
@@ -359,7 +400,7 @@ func (s *shard[K, V]) get(key K) (V, bool) {
359400 ent .freq .Store (f + 1 )
360401 }
361402
362- return ent . value , true
403+ return val , true
363404}
364405
365406// set adds or updates a value in the cache.
@@ -381,10 +422,11 @@ func (s *shard[K, V]) set(key K, value V, expiryNano int64) {
381422
382423 // Slow path: insert new key (already holding lock)
383424
384- // Check if key is in ghost (zero false positives)
385- _ , inGhost := s .ghostActive [key ]
425+ // Check if key is in ghost (Bloom filter)
426+ h := s .hasher (key )
427+ inGhost := s .ghostActive .Contains (h )
386428 if ! inGhost {
387- _ , inGhost = s .ghostAging [ key ]
429+ inGhost = s .ghostAging . Contains ( h )
388430 }
389431
390432 // Create new entry
@@ -484,14 +526,17 @@ func (s *shard[K, V]) evictFromMain() {
484526 }
485527}
486528
487- // addToGhost adds a key to the ghost queue using two rotating maps .
529+ // addToGhost adds a key to the ghost queue using two rotating Bloom filters .
488530func (s * shard [K , V ]) addToGhost (key K ) {
489- s .ghostActive [key ] = struct {}{}
531+ h := s .hasher (key )
532+ if ! s .ghostActive .Contains (h ) {
533+ s .ghostActive .Add (h )
534+ }
490535
491- // Rotate maps when active is full (provides approximate FIFO)
492- if len ( s .ghostActive ) >= s .ghostCap {
493- // Clear aging map and swap - aging becomes new active
494- clear ( s .ghostAging )
536+ // Rotate filters when active is full (provides approximate FIFO)
537+ if s .ghostActive . entries >= s .ghostCap {
538+ // Reset aging filter and swap - aging becomes new active
539+ s .ghostAging . Reset ( )
495540 s .ghostActive , s .ghostAging = s .ghostAging , s .ghostActive
496541 }
497542}
@@ -525,7 +570,7 @@ func (s *shard[K, V]) flush() int {
525570 s .entries = make (map [K ]* entry [K , V ], s .capacity )
526571 s .small .init ()
527572 s .main .init ()
528- clear ( s .ghostActive )
529- clear ( s .ghostAging )
573+ s .ghostActive . Reset ( )
574+ s .ghostAging . Reset ( )
530575 return n
531576}
0 commit comments