Skip to content

Commit 2c1e501

Browse files
Replace smallest-first eviction with LRU in FileBasedCacheManager
The previous memory management layer had three bugs caused by add_entry not handling re-added bins (when save_cache grows a bin on disk and maybe_load_cache reloads it): 1. Self-eviction: the eviction loop could pick the entry being added (its old small size made it the smallest), removing it from in_memory_cache while add_entry continued to re-add it to sizes. Next eviction cycle: KeyError. 2. Double-counting: re-adding without subtracting the old size inflated total_usage_mb, triggering premature eviction. 3. Oversized entry leak: entries that exceeded the cache limit stayed in in_memory_cache but were invisible to the size tracker. This commit replaces the entire eviction layer: - OrderedDict for LRU ordering instead of smallest-first via min() - Remove old tracking before eviction check (prevents self-eviction) - touch() method for cache hits in maybe_load_cache - Simpler code: 3 methods (add_entry, _evict_lru, touch) replace 3 (add_entry, remove_entry, free_space_for) Verified end-to-end: old code crashes with KeyError at max_mem_usage_mb=5 on 700+ pair variants; new code completes all 12 variants.
1 parent 4f96d1b commit 2c1e501

File tree

2 files changed

+133
-73
lines changed

2 files changed

+133
-73
lines changed

safetytooling/apis/inference/cache_manager.py

Lines changed: 40 additions & 50 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
import logging
22
import os
33
import sys
4-
from collections import deque
4+
from collections import OrderedDict, deque
55
from itertools import chain
66
from pathlib import Path
77
from typing import List, Tuple, Union
@@ -137,66 +137,55 @@ def save_embeddings(self, params: EmbeddingParams, response: EmbeddingResponseBa
137137

138138

139139
class FileBasedCacheManager(BaseCacheManager):
140-
"""Original file-based cache manager implementation."""
140+
"""File-based cache with an LRU-evicted in-memory layer."""
141141

142142
def __init__(self, cache_dir: Path, num_bins: int = 20, max_mem_usage_mb: float = 5_000):
143143
super().__init__(cache_dir, num_bins)
144-
self.in_memory_cache = {}
145-
self.sizes = {} # Track the size of each cache file in memory
146-
self.total_usage_mb = 0
144+
self.in_memory_cache: OrderedDict[Path, dict] = OrderedDict()
145+
self.sizes: dict[Path, float] = {}
146+
self.total_usage_mb = 0.0
147147
self.max_mem_usage_mb = max_mem_usage_mb
148148

149-
def remove_entry(self, cache_file: Path):
150-
self.in_memory_cache.pop(cache_file)
149+
def _evict_lru(self):
150+
"""Evict the least-recently-used bin from memory."""
151+
lru_key = next(iter(self.in_memory_cache))
152+
del self.in_memory_cache[lru_key]
153+
self.total_usage_mb -= self.sizes.pop(lru_key)
154+
LOGGER.info(f"Evicted LRU entry {lru_key} from mem cache. Total usage is now {self.total_usage_mb:.1f} MB.")
151155

152-
if self.max_mem_usage_mb is not None:
153-
size = self.sizes.pop(cache_file)
154-
self.total_usage_mb -= size
155-
LOGGER.info(f"Removed entry from mem cache. Freed {size} MB.")
156+
def add_entry(self, cache_file: Path, contents: dict) -> bool:
157+
"""Add or replace a bin in the in-memory cache, evicting LRU entries if needed.
156158
157-
def add_entry(self, cache_file: Path, contents: dict):
158-
self.in_memory_cache[cache_file] = contents
159+
Returns False if the single entry is larger than the entire cache limit.
160+
"""
161+
size_mb = total_size(contents)
159162

160-
if self.max_mem_usage_mb is not None:
161-
# Remove old size tracking if this bin was already loaded.
162-
# Without this, free_space_for can evict the entry we're currently
163-
# adding (self-eviction), leaving sizes and in_memory_cache out of
164-
# sync and causing a KeyError on the next eviction cycle. It also
165-
# prevents double-counting the old size in total_usage_mb.
166-
if cache_file in self.sizes:
167-
self.total_usage_mb -= self.sizes.pop(cache_file)
168-
169-
size = total_size(contents)
170-
if self.total_usage_mb + size > self.max_mem_usage_mb:
171-
space_available = self.free_space_for(size)
172-
if not space_available:
173-
self.in_memory_cache.pop(cache_file, None)
174-
return False
175-
self.sizes[cache_file] = size
176-
self.total_usage_mb += size
177-
178-
def free_space_for(self, needed_space_mb: float):
179-
if self.max_mem_usage_mb is None:
180-
return True
181-
182-
if needed_space_mb > self.max_mem_usage_mb:
183-
LOGGER.warning(
184-
f"Needed space {needed_space_mb} MB is greater than max mem usage {self.max_mem_usage_mb} MB. "
185-
"This is not possible."
186-
)
163+
if self.max_mem_usage_mb is not None and size_mb > self.max_mem_usage_mb:
164+
LOGGER.warning(f"Entry {cache_file} ({size_mb:.1f} MB) exceeds cache limit ({self.max_mem_usage_mb} MB).")
187165
return False
188-
LOGGER.info(f"Evicting entry from mem cache to free up {needed_space_mb} MB")
189-
while self.total_usage_mb > self.max_mem_usage_mb - needed_space_mb:
190-
# Find the entry with the smallest size
191-
try:
192-
smallest_entry = min(self.sizes.items(), key=lambda x: x[1])
193-
except ValueError:
194-
LOGGER.warning("No entries in mem cache to evict")
195-
return True
196-
self.remove_entry(smallest_entry[0])
197-
LOGGER.info(f"Evicted entry from mem cache. Total usage is now {self.total_usage_mb} MB.")
166+
167+
# Remove old version first. This prevents self-eviction (the eviction
168+
# loop below can only see OTHER bins) and prevents double-counting.
169+
if cache_file in self.sizes:
170+
del self.in_memory_cache[cache_file]
171+
self.total_usage_mb -= self.sizes.pop(cache_file)
172+
173+
# Evict least-recently-used bins until there's room
174+
if self.max_mem_usage_mb is not None:
175+
while self.in_memory_cache and self.total_usage_mb + size_mb > self.max_mem_usage_mb:
176+
self._evict_lru()
177+
178+
# Insert at the back (most-recently-used position)
179+
self.in_memory_cache[cache_file] = contents
180+
self.sizes[cache_file] = size_mb
181+
self.total_usage_mb += size_mb
198182
return True
199183

184+
def touch(self, cache_file: Path):
185+
"""Mark a bin as recently used (moves it to the back of the LRU queue)."""
186+
if cache_file in self.in_memory_cache:
187+
self.in_memory_cache.move_to_end(cache_file)
188+
200189
def get_cache_file(self, prompt: Prompt, params: LLMParams) -> tuple[Path, str]:
201190
# Use the SHA-1 hash of the prompt for the dictionary key
202191
prompt_hash = prompt.model_hash() # Assuming this gives a SHA-1 hash as a hex string
@@ -220,6 +209,7 @@ def maybe_load_cache(self, prompt: Prompt, params: LLMParams):
220209
self.add_entry(cache_file, contents)
221210
else:
222211
contents = self.in_memory_cache[cache_file]
212+
self.touch(cache_file)
223213

224214
data = contents.get(prompt_hash, None)
225215
return None if data is None else LLMCache.model_validate_json(data)

tests/test_cache_manager.py

Lines changed: 93 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
"""Unit tests for FileBasedCacheManager memory eviction logic.
1+
"""Unit tests for FileBasedCacheManager LRU memory eviction.
22
33
These tests exercise the in-memory cache eviction without making any API calls.
44
"""
@@ -14,20 +14,14 @@ def _make_data(size_chars: int, num_keys: int = 1) -> dict:
1414
return {f"k{i}": "x" * (size_chars // num_keys) for i in range(num_keys)}
1515

1616

17-
class TestAddEntrySelfEviction:
18-
"""Regression tests for the self-eviction bug in add_entry.
19-
20-
When a bin file is re-loaded from disk with a larger size (because
21-
save_cache wrote new entries to disk without updating in_memory_cache),
22-
add_entry previously left self.sizes and self.in_memory_cache out of
23-
sync. This caused a KeyError on the next eviction cycle.
24-
"""
17+
class TestAddEntryReAdd:
18+
"""Regression tests for re-adding bins (the self-eviction bug)."""
2519

2620
def test_re_add_larger_bin_does_not_crash(self):
2721
"""Re-adding a bin that triggers eviction must not self-evict."""
2822
with tempfile.TemporaryDirectory() as tmpdir:
29-
small = _make_data(200_000) # ~0.2 MB
30-
large = _make_data(400_000) # ~0.4 MB
23+
small = _make_data(200_000)
24+
large = _make_data(400_000)
3125

3226
limit = total_size(small) + total_size(large) + 0.01
3327
cm = FileBasedCacheManager(Path(tmpdir), max_mem_usage_mb=limit)
@@ -36,20 +30,16 @@ def test_re_add_larger_bin_does_not_crash(self):
3630
bin_b = Path(tmpdir) / "binB.json"
3731
bin_c = Path(tmpdir) / "binC.json"
3832

39-
# Fill cache: bin_a is the smallest entry
4033
cm.add_entry(bin_a, small)
4134
cm.add_entry(bin_b, large)
4235

4336
# Re-add bin_a with larger contents (simulates disk reload after
44-
# save_cache grew the bin). This must evict bin_b, not bin_a itself.
37+
# save_cache grew the bin on disk).
4538
cm.add_entry(bin_a, large)
46-
47-
# Before the fix, sizes had bin_a but in_memory_cache didn't.
48-
# This next add triggers eviction → KeyError on bin_a.pop().
4939
cm.add_entry(bin_c, large) # must not crash
5040

51-
def test_re_add_keeps_sizes_consistent(self):
52-
"""After re-adding a bin, sizes and in_memory_cache must agree."""
41+
def test_re_add_keeps_dicts_consistent(self):
42+
"""After re-adding a bin, sizes and in_memory_cache must have the same keys."""
5343
with tempfile.TemporaryDirectory() as tmpdir:
5444
cm = FileBasedCacheManager(Path(tmpdir), max_mem_usage_mb=100)
5545

@@ -76,22 +66,102 @@ def test_re_add_does_not_double_count(self):
7666
cm.add_entry(bin_a, data)
7767
assert abs(cm.total_usage_mb - expected_size) < 0.001
7868

79-
# Re-add with identical data — total should not change
8069
cm.add_entry(bin_a, data)
8170
assert abs(cm.total_usage_mb - expected_size) < 0.001
8271

8372
def test_oversized_entry_not_leaked(self):
84-
"""If an entry is too large for the cache, it must not leak in in_memory_cache."""
73+
"""If an entry exceeds the entire cache limit, it must not leak."""
8574
with tempfile.TemporaryDirectory() as tmpdir:
86-
tiny_limit = 0.001 # ~1 KB
87-
cm = FileBasedCacheManager(Path(tmpdir), max_mem_usage_mb=tiny_limit)
75+
cm = FileBasedCacheManager(Path(tmpdir), max_mem_usage_mb=0.001)
8876

8977
bin_a = Path(tmpdir) / "binA.json"
90-
huge = _make_data(1_000_000) # way over 1 KB
78+
huge = _make_data(1_000_000)
9179

9280
result = cm.add_entry(bin_a, huge)
9381

9482
assert result is False
9583
assert bin_a not in cm.in_memory_cache
9684
assert bin_a not in cm.sizes
9785
assert cm.total_usage_mb == 0
86+
87+
88+
class TestLRUEvictionOrder:
89+
"""Tests that eviction follows LRU order, not smallest-first."""
90+
91+
def test_evicts_oldest_not_smallest(self):
92+
"""When memory is full, the least-recently-used bin is evicted."""
93+
with tempfile.TemporaryDirectory() as tmpdir:
94+
small = _make_data(100_000)
95+
large = _make_data(300_000)
96+
97+
# Room for small + large, but not small + large + small
98+
limit = total_size(small) + total_size(large) + 0.01
99+
cm = FileBasedCacheManager(Path(tmpdir), max_mem_usage_mb=limit)
100+
101+
bin_a = Path(tmpdir) / "binA.json"
102+
bin_b = Path(tmpdir) / "binB.json"
103+
bin_c = Path(tmpdir) / "binC.json"
104+
105+
cm.add_entry(bin_a, small) # oldest
106+
cm.add_entry(bin_b, large) # newer
107+
108+
# Adding bin_c must evict bin_a (oldest), NOT bin_a (smallest).
109+
# Under the old smallest-first policy, bin_a would also have been
110+
# evicted — but for the wrong reason. We verify the LRU property
111+
# by checking that bin_b (larger but newer) survives.
112+
cm.add_entry(bin_c, small)
113+
114+
assert bin_a not in cm.in_memory_cache # evicted (oldest)
115+
assert bin_b in cm.in_memory_cache # kept (newer)
116+
assert bin_c in cm.in_memory_cache # just added
117+
118+
def test_touch_prevents_eviction(self):
119+
"""Accessing a bin via touch() moves it to the back of the LRU queue."""
120+
with tempfile.TemporaryDirectory() as tmpdir:
121+
data = _make_data(200_000)
122+
123+
# Room for exactly 2 bins
124+
limit = total_size(data) * 2 + 0.01
125+
cm = FileBasedCacheManager(Path(tmpdir), max_mem_usage_mb=limit)
126+
127+
bin_a = Path(tmpdir) / "binA.json"
128+
bin_b = Path(tmpdir) / "binB.json"
129+
bin_c = Path(tmpdir) / "binC.json"
130+
131+
cm.add_entry(bin_a, data) # oldest
132+
cm.add_entry(bin_b, data) # newer
133+
134+
# Touch bin_a — it's now the most-recently-used
135+
cm.touch(bin_a)
136+
137+
# Adding bin_c should evict bin_b (now the LRU), not bin_a
138+
cm.add_entry(bin_c, data)
139+
140+
assert bin_a in cm.in_memory_cache # survived (was touched)
141+
assert bin_b not in cm.in_memory_cache # evicted (LRU after touch)
142+
assert bin_c in cm.in_memory_cache
143+
144+
def test_add_entry_moves_to_mru(self):
145+
"""Re-adding a bin moves it to the most-recently-used position."""
146+
with tempfile.TemporaryDirectory() as tmpdir:
147+
data = _make_data(200_000)
148+
149+
limit = total_size(data) * 2 + 0.01
150+
cm = FileBasedCacheManager(Path(tmpdir), max_mem_usage_mb=limit)
151+
152+
bin_a = Path(tmpdir) / "binA.json"
153+
bin_b = Path(tmpdir) / "binB.json"
154+
bin_c = Path(tmpdir) / "binC.json"
155+
156+
cm.add_entry(bin_a, data) # oldest
157+
cm.add_entry(bin_b, data) # newer
158+
159+
# Re-add bin_a (simulates reload from disk) — now it's MRU
160+
cm.add_entry(bin_a, data)
161+
162+
# Adding bin_c should evict bin_b (now LRU), not bin_a
163+
cm.add_entry(bin_c, data)
164+
165+
assert bin_a in cm.in_memory_cache
166+
assert bin_b not in cm.in_memory_cache
167+
assert bin_c in cm.in_memory_cache

0 commit comments

Comments
 (0)