Skip to content

Commit 024ebf1

Browse files
Replace smallest-first eviction with LRU in FileBasedCacheManager (#152)
* Fix self-eviction crash and size tracking bugs in FileBasedCacheManager add_entry did not handle re-adding an existing bin (which happens when save_cache writes to disk and a later maybe_load_cache reloads the grown bin). Three bugs resulted: 1. Self-eviction: the old (smaller) size in self.sizes made the bin a prime candidate for smallest-first eviction during free_space_for, removing it from in_memory_cache while add_entry was still running. The subsequent re-addition to self.sizes left the two dicts out of sync, causing a KeyError on the next eviction cycle. 2. Double-counting: when the bin was not the smallest and survived eviction, self.sizes was overwritten with the new size but total_usage_mb had the new size added without subtracting the old, inflating the counter and triggering premature eviction. 3. Memory leak on oversized entries: when free_space_for returned False, the entry remained in in_memory_cache but was never tracked in sizes, leaking memory invisibly. Fix: remove old size tracking before the eviction check, and clean up in_memory_cache when the entry cannot fit. * Replace smallest-first eviction with LRU in FileBasedCacheManager The previous memory management layer had three bugs caused by add_entry not handling re-added bins (when save_cache grows a bin on disk and maybe_load_cache reloads it): 1. Self-eviction: the eviction loop could pick the entry being added (its old small size made it the smallest), removing it from in_memory_cache while add_entry continued to re-add it to sizes. Next eviction cycle: KeyError. 2. Double-counting: re-adding without subtracting the old size inflated total_usage_mb, triggering premature eviction. 3. Oversized entry leak: entries that exceeded the cache limit stayed in in_memory_cache but were invisible to the size tracker. This commit replaces the entire eviction layer: - OrderedDict for LRU ordering instead of smallest-first via min() - Remove old tracking before eviction check (prevents self-eviction) - touch() method for cache hits in maybe_load_cache - Simpler code: 3 methods (add_entry, _evict_lru, touch) replace 3 (add_entry, remove_entry, free_space_for) Verified end-to-end: old code crashes with KeyError at max_mem_usage_mb=5 on 700+ pair variants; new code completes all 12 variants. * Fix black formatting in test_cache_manager.py
1 parent db7bb1b commit 024ebf1

File tree

2 files changed

+207
-41
lines changed

2 files changed

+207
-41
lines changed

safetytooling/apis/inference/cache_manager.py

Lines changed: 40 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
import logging
22
import os
33
import sys
4-
from collections import deque
4+
from collections import OrderedDict, deque
55
from itertools import chain
66
from pathlib import Path
77
from typing import List, Tuple, Union
@@ -137,57 +137,55 @@ def save_embeddings(self, params: EmbeddingParams, response: EmbeddingResponseBa
137137

138138

139139
class FileBasedCacheManager(BaseCacheManager):
140-
"""Original file-based cache manager implementation."""
140+
"""File-based cache with an LRU-evicted in-memory layer."""
141141

142142
def __init__(self, cache_dir: Path, num_bins: int = 20, max_mem_usage_mb: float = 5_000):
143143
super().__init__(cache_dir, num_bins)
144-
self.in_memory_cache = {}
145-
self.sizes = {} # Track the size of each cache file in memory
146-
self.total_usage_mb = 0
144+
self.in_memory_cache: OrderedDict[Path, dict] = OrderedDict()
145+
self.sizes: dict[Path, float] = {}
146+
self.total_usage_mb = 0.0
147147
self.max_mem_usage_mb = max_mem_usage_mb
148148

149-
def remove_entry(self, cache_file: Path):
150-
self.in_memory_cache.pop(cache_file)
149+
def _evict_lru(self):
150+
"""Evict the least-recently-used bin from memory."""
151+
lru_key = next(iter(self.in_memory_cache))
152+
del self.in_memory_cache[lru_key]
153+
self.total_usage_mb -= self.sizes.pop(lru_key)
154+
LOGGER.info(f"Evicted LRU entry {lru_key} from mem cache. Total usage is now {self.total_usage_mb:.1f} MB.")
151155

152-
if self.max_mem_usage_mb is not None:
153-
size = self.sizes.pop(cache_file)
154-
self.total_usage_mb -= size
155-
LOGGER.info(f"Removed entry from mem cache. Freed {size} MB.")
156+
def add_entry(self, cache_file: Path, contents: dict) -> bool:
157+
"""Add or replace a bin in the in-memory cache, evicting LRU entries if needed.
156158
157-
def add_entry(self, cache_file: Path, contents: dict):
158-
self.in_memory_cache[cache_file] = contents
159+
Returns False if the single entry is larger than the entire cache limit.
160+
"""
161+
size_mb = total_size(contents)
159162

160-
if self.max_mem_usage_mb is not None:
161-
size = total_size(contents)
162-
if self.total_usage_mb + size > self.max_mem_usage_mb:
163-
space_available = self.free_space_for(size)
164-
if not space_available:
165-
return False
166-
self.sizes[cache_file] = size
167-
self.total_usage_mb += size
168-
169-
def free_space_for(self, needed_space_mb: float):
170-
if self.max_mem_usage_mb is None:
171-
return True
172-
173-
if needed_space_mb > self.max_mem_usage_mb:
174-
LOGGER.warning(
175-
f"Needed space {needed_space_mb} MB is greater than max mem usage {self.max_mem_usage_mb} MB. "
176-
"This is not possible."
177-
)
163+
if self.max_mem_usage_mb is not None and size_mb > self.max_mem_usage_mb:
164+
LOGGER.warning(f"Entry {cache_file} ({size_mb:.1f} MB) exceeds cache limit ({self.max_mem_usage_mb} MB).")
178165
return False
179-
LOGGER.info(f"Evicting entry from mem cache to free up {needed_space_mb} MB")
180-
while self.total_usage_mb > self.max_mem_usage_mb - needed_space_mb:
181-
# Find the entry with the smallest size
182-
try:
183-
smallest_entry = min(self.sizes.items(), key=lambda x: x[1])
184-
except ValueError:
185-
LOGGER.warning("No entries in mem cache to evict")
186-
return True
187-
self.remove_entry(smallest_entry[0])
188-
LOGGER.info(f"Evicted entry from mem cache. Total usage is now {self.total_usage_mb} MB.")
166+
167+
# Remove old version first. This prevents self-eviction (the eviction
168+
# loop below can only see OTHER bins) and prevents double-counting.
169+
if cache_file in self.sizes:
170+
del self.in_memory_cache[cache_file]
171+
self.total_usage_mb -= self.sizes.pop(cache_file)
172+
173+
# Evict least-recently-used bins until there's room
174+
if self.max_mem_usage_mb is not None:
175+
while self.in_memory_cache and self.total_usage_mb + size_mb > self.max_mem_usage_mb:
176+
self._evict_lru()
177+
178+
# Insert at the back (most-recently-used position)
179+
self.in_memory_cache[cache_file] = contents
180+
self.sizes[cache_file] = size_mb
181+
self.total_usage_mb += size_mb
189182
return True
190183

184+
def touch(self, cache_file: Path):
185+
"""Mark a bin as recently used (moves it to the back of the LRU queue)."""
186+
if cache_file in self.in_memory_cache:
187+
self.in_memory_cache.move_to_end(cache_file)
188+
191189
def get_cache_file(self, prompt: Prompt, params: LLMParams) -> tuple[Path, str]:
192190
# Use the SHA-1 hash of the prompt for the dictionary key
193191
prompt_hash = prompt.model_hash() # Assuming this gives a SHA-1 hash as a hex string
@@ -211,6 +209,7 @@ def maybe_load_cache(self, prompt: Prompt, params: LLMParams):
211209
self.add_entry(cache_file, contents)
212210
else:
213211
contents = self.in_memory_cache[cache_file]
212+
self.touch(cache_file)
214213

215214
data = contents.get(prompt_hash, None)
216215
return None if data is None else LLMCache.model_validate_json(data)

tests/test_cache_manager.py

Lines changed: 167 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,167 @@
1+
"""Unit tests for FileBasedCacheManager LRU memory eviction.
2+
3+
These tests exercise the in-memory cache eviction without making any API calls.
4+
"""
5+
6+
import tempfile
7+
from pathlib import Path
8+
9+
from safetytooling.apis.inference.cache_manager import FileBasedCacheManager, total_size
10+
11+
12+
def _make_data(size_chars: int, num_keys: int = 1) -> dict:
13+
"""Create a dict whose in-memory size is roughly proportional to size_chars."""
14+
return {f"k{i}": "x" * (size_chars // num_keys) for i in range(num_keys)}
15+
16+
17+
class TestAddEntryReAdd:
18+
"""Regression tests for re-adding bins (the self-eviction bug)."""
19+
20+
def test_re_add_larger_bin_does_not_crash(self):
21+
"""Re-adding a bin that triggers eviction must not self-evict."""
22+
with tempfile.TemporaryDirectory() as tmpdir:
23+
small = _make_data(200_000)
24+
large = _make_data(400_000)
25+
26+
limit = total_size(small) + total_size(large) + 0.01
27+
cm = FileBasedCacheManager(Path(tmpdir), max_mem_usage_mb=limit)
28+
29+
bin_a = Path(tmpdir) / "binA.json"
30+
bin_b = Path(tmpdir) / "binB.json"
31+
bin_c = Path(tmpdir) / "binC.json"
32+
33+
cm.add_entry(bin_a, small)
34+
cm.add_entry(bin_b, large)
35+
36+
# Re-add bin_a with larger contents (simulates disk reload after
37+
# save_cache grew the bin on disk).
38+
cm.add_entry(bin_a, large)
39+
cm.add_entry(bin_c, large) # must not crash
40+
41+
def test_re_add_keeps_dicts_consistent(self):
42+
"""After re-adding a bin, sizes and in_memory_cache must have the same keys."""
43+
with tempfile.TemporaryDirectory() as tmpdir:
44+
cm = FileBasedCacheManager(Path(tmpdir), max_mem_usage_mb=100)
45+
46+
bin_a = Path(tmpdir) / "binA.json"
47+
small = _make_data(100_000)
48+
big = _make_data(500_000)
49+
50+
cm.add_entry(bin_a, small)
51+
cm.add_entry(bin_a, big)
52+
53+
assert set(cm.sizes.keys()) == set(cm.in_memory_cache.keys())
54+
assert bin_a in cm.in_memory_cache
55+
assert cm.in_memory_cache[bin_a] is big
56+
57+
def test_re_add_does_not_double_count(self):
58+
"""Re-adding a bin must not inflate total_usage_mb."""
59+
with tempfile.TemporaryDirectory() as tmpdir:
60+
cm = FileBasedCacheManager(Path(tmpdir), max_mem_usage_mb=100)
61+
62+
bin_a = Path(tmpdir) / "binA.json"
63+
data = _make_data(200_000)
64+
expected_size = total_size(data)
65+
66+
cm.add_entry(bin_a, data)
67+
assert abs(cm.total_usage_mb - expected_size) < 0.001
68+
69+
cm.add_entry(bin_a, data)
70+
assert abs(cm.total_usage_mb - expected_size) < 0.001
71+
72+
def test_oversized_entry_not_leaked(self):
73+
"""If an entry exceeds the entire cache limit, it must not leak."""
74+
with tempfile.TemporaryDirectory() as tmpdir:
75+
cm = FileBasedCacheManager(Path(tmpdir), max_mem_usage_mb=0.001)
76+
77+
bin_a = Path(tmpdir) / "binA.json"
78+
huge = _make_data(1_000_000)
79+
80+
result = cm.add_entry(bin_a, huge)
81+
82+
assert result is False
83+
assert bin_a not in cm.in_memory_cache
84+
assert bin_a not in cm.sizes
85+
assert cm.total_usage_mb == 0
86+
87+
88+
class TestLRUEvictionOrder:
89+
"""Tests that eviction follows LRU order, not smallest-first."""
90+
91+
def test_evicts_oldest_not_smallest(self):
92+
"""When memory is full, the least-recently-used bin is evicted."""
93+
with tempfile.TemporaryDirectory() as tmpdir:
94+
small = _make_data(100_000)
95+
large = _make_data(300_000)
96+
97+
# Room for small + large, but not small + large + small
98+
limit = total_size(small) + total_size(large) + 0.01
99+
cm = FileBasedCacheManager(Path(tmpdir), max_mem_usage_mb=limit)
100+
101+
bin_a = Path(tmpdir) / "binA.json"
102+
bin_b = Path(tmpdir) / "binB.json"
103+
bin_c = Path(tmpdir) / "binC.json"
104+
105+
cm.add_entry(bin_a, small) # oldest
106+
cm.add_entry(bin_b, large) # newer
107+
108+
# Adding bin_c must evict bin_a (oldest), NOT bin_a (smallest).
109+
# Under the old smallest-first policy, bin_a would also have been
110+
# evicted — but for the wrong reason. We verify the LRU property
111+
# by checking that bin_b (larger but newer) survives.
112+
cm.add_entry(bin_c, small)
113+
114+
assert bin_a not in cm.in_memory_cache # evicted (oldest)
115+
assert bin_b in cm.in_memory_cache # kept (newer)
116+
assert bin_c in cm.in_memory_cache # just added
117+
118+
def test_touch_prevents_eviction(self):
119+
"""Accessing a bin via touch() moves it to the back of the LRU queue."""
120+
with tempfile.TemporaryDirectory() as tmpdir:
121+
data = _make_data(200_000)
122+
123+
# Room for exactly 2 bins
124+
limit = total_size(data) * 2 + 0.01
125+
cm = FileBasedCacheManager(Path(tmpdir), max_mem_usage_mb=limit)
126+
127+
bin_a = Path(tmpdir) / "binA.json"
128+
bin_b = Path(tmpdir) / "binB.json"
129+
bin_c = Path(tmpdir) / "binC.json"
130+
131+
cm.add_entry(bin_a, data) # oldest
132+
cm.add_entry(bin_b, data) # newer
133+
134+
# Touch bin_a — it's now the most-recently-used
135+
cm.touch(bin_a)
136+
137+
# Adding bin_c should evict bin_b (now the LRU), not bin_a
138+
cm.add_entry(bin_c, data)
139+
140+
assert bin_a in cm.in_memory_cache # survived (was touched)
141+
assert bin_b not in cm.in_memory_cache # evicted (LRU after touch)
142+
assert bin_c in cm.in_memory_cache
143+
144+
def test_add_entry_moves_to_mru(self):
145+
"""Re-adding a bin moves it to the most-recently-used position."""
146+
with tempfile.TemporaryDirectory() as tmpdir:
147+
data = _make_data(200_000)
148+
149+
limit = total_size(data) * 2 + 0.01
150+
cm = FileBasedCacheManager(Path(tmpdir), max_mem_usage_mb=limit)
151+
152+
bin_a = Path(tmpdir) / "binA.json"
153+
bin_b = Path(tmpdir) / "binB.json"
154+
bin_c = Path(tmpdir) / "binC.json"
155+
156+
cm.add_entry(bin_a, data) # oldest
157+
cm.add_entry(bin_b, data) # newer
158+
159+
# Re-add bin_a (simulates reload from disk) — now it's MRU
160+
cm.add_entry(bin_a, data)
161+
162+
# Adding bin_c should evict bin_b (now LRU), not bin_a
163+
cm.add_entry(bin_c, data)
164+
165+
assert bin_a in cm.in_memory_cache
166+
assert bin_b not in cm.in_memory_cache
167+
assert bin_c in cm.in_memory_cache

0 commit comments

Comments
 (0)