Skip to content

Commit 4f96d1b

Browse files
Fix self-eviction crash and size tracking bugs in FileBasedCacheManager
add_entry did not handle re-adding an existing bin (which happens when save_cache writes to disk and a later maybe_load_cache reloads the grown bin). Three bugs resulted: 1. Self-eviction: the old (smaller) size in self.sizes made the bin a prime candidate for smallest-first eviction during free_space_for, removing it from in_memory_cache while add_entry was still running. The subsequent re-addition to self.sizes left the two dicts out of sync, causing a KeyError on the next eviction cycle. 2. Double-counting: when the bin was not the smallest and survived eviction, self.sizes was overwritten with the new size but total_usage_mb had the new size added without subtracting the old, inflating the counter and triggering premature eviction. 3. Memory leak on oversized entries: when free_space_for returned False, the entry remained in in_memory_cache but was never tracked in sizes, leaking memory invisibly. Fix: remove old size tracking before the eviction check, and clean up in_memory_cache when the entry cannot fit.
1 parent db7bb1b commit 4f96d1b

File tree

2 files changed

+106
-0
lines changed

2 files changed

+106
-0
lines changed

safetytooling/apis/inference/cache_manager.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -158,10 +158,19 @@ def add_entry(self, cache_file: Path, contents: dict):
158158
self.in_memory_cache[cache_file] = contents
159159

160160
if self.max_mem_usage_mb is not None:
161+
# Remove old size tracking if this bin was already loaded.
162+
# Without this, free_space_for can evict the entry we're currently
163+
# adding (self-eviction), leaving sizes and in_memory_cache out of
164+
# sync and causing a KeyError on the next eviction cycle. It also
165+
# prevents double-counting the old size in total_usage_mb.
166+
if cache_file in self.sizes:
167+
self.total_usage_mb -= self.sizes.pop(cache_file)
168+
161169
size = total_size(contents)
162170
if self.total_usage_mb + size > self.max_mem_usage_mb:
163171
space_available = self.free_space_for(size)
164172
if not space_available:
173+
self.in_memory_cache.pop(cache_file, None)
165174
return False
166175
self.sizes[cache_file] = size
167176
self.total_usage_mb += size

tests/test_cache_manager.py

Lines changed: 97 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,97 @@
1+
"""Unit tests for FileBasedCacheManager memory eviction logic.
2+
3+
These tests exercise the in-memory cache eviction without making any API calls.
4+
"""
5+
6+
import tempfile
7+
from pathlib import Path
8+
9+
from safetytooling.apis.inference.cache_manager import FileBasedCacheManager, total_size
10+
11+
12+
def _make_data(size_chars: int, num_keys: int = 1) -> dict:
13+
"""Create a dict whose in-memory size is roughly proportional to size_chars."""
14+
return {f"k{i}": "x" * (size_chars // num_keys) for i in range(num_keys)}
15+
16+
17+
class TestAddEntrySelfEviction:
18+
"""Regression tests for the self-eviction bug in add_entry.
19+
20+
When a bin file is re-loaded from disk with a larger size (because
21+
save_cache wrote new entries to disk without updating in_memory_cache),
22+
add_entry previously left self.sizes and self.in_memory_cache out of
23+
sync. This caused a KeyError on the next eviction cycle.
24+
"""
25+
26+
def test_re_add_larger_bin_does_not_crash(self):
27+
"""Re-adding a bin that triggers eviction must not self-evict."""
28+
with tempfile.TemporaryDirectory() as tmpdir:
29+
small = _make_data(200_000) # ~0.2 MB
30+
large = _make_data(400_000) # ~0.4 MB
31+
32+
limit = total_size(small) + total_size(large) + 0.01
33+
cm = FileBasedCacheManager(Path(tmpdir), max_mem_usage_mb=limit)
34+
35+
bin_a = Path(tmpdir) / "binA.json"
36+
bin_b = Path(tmpdir) / "binB.json"
37+
bin_c = Path(tmpdir) / "binC.json"
38+
39+
# Fill cache: bin_a is the smallest entry
40+
cm.add_entry(bin_a, small)
41+
cm.add_entry(bin_b, large)
42+
43+
# Re-add bin_a with larger contents (simulates disk reload after
44+
# save_cache grew the bin). This must evict bin_b, not bin_a itself.
45+
cm.add_entry(bin_a, large)
46+
47+
# Before the fix, sizes had bin_a but in_memory_cache didn't.
48+
# This next add triggers eviction → KeyError on bin_a.pop().
49+
cm.add_entry(bin_c, large) # must not crash
50+
51+
def test_re_add_keeps_sizes_consistent(self):
52+
"""After re-adding a bin, sizes and in_memory_cache must agree."""
53+
with tempfile.TemporaryDirectory() as tmpdir:
54+
cm = FileBasedCacheManager(Path(tmpdir), max_mem_usage_mb=100)
55+
56+
bin_a = Path(tmpdir) / "binA.json"
57+
small = _make_data(100_000)
58+
big = _make_data(500_000)
59+
60+
cm.add_entry(bin_a, small)
61+
cm.add_entry(bin_a, big)
62+
63+
assert set(cm.sizes.keys()) == set(cm.in_memory_cache.keys())
64+
assert bin_a in cm.in_memory_cache
65+
assert cm.in_memory_cache[bin_a] is big
66+
67+
def test_re_add_does_not_double_count(self):
68+
"""Re-adding a bin must not inflate total_usage_mb."""
69+
with tempfile.TemporaryDirectory() as tmpdir:
70+
cm = FileBasedCacheManager(Path(tmpdir), max_mem_usage_mb=100)
71+
72+
bin_a = Path(tmpdir) / "binA.json"
73+
data = _make_data(200_000)
74+
expected_size = total_size(data)
75+
76+
cm.add_entry(bin_a, data)
77+
assert abs(cm.total_usage_mb - expected_size) < 0.001
78+
79+
# Re-add with identical data — total should not change
80+
cm.add_entry(bin_a, data)
81+
assert abs(cm.total_usage_mb - expected_size) < 0.001
82+
83+
def test_oversized_entry_not_leaked(self):
84+
"""If an entry is too large for the cache, it must not leak in in_memory_cache."""
85+
with tempfile.TemporaryDirectory() as tmpdir:
86+
tiny_limit = 0.001 # ~1 KB
87+
cm = FileBasedCacheManager(Path(tmpdir), max_mem_usage_mb=tiny_limit)
88+
89+
bin_a = Path(tmpdir) / "binA.json"
90+
huge = _make_data(1_000_000) # way over 1 KB
91+
92+
result = cm.add_entry(bin_a, huge)
93+
94+
assert result is False
95+
assert bin_a not in cm.in_memory_cache
96+
assert bin_a not in cm.sizes
97+
assert cm.total_usage_mb == 0

0 commit comments

Comments
 (0)