11import logging
22import os
33import sys
4- from collections import OrderedDict , deque
4+ from collections import deque
55from itertools import chain
66from pathlib import Path
77from typing import List , Tuple , Union
@@ -137,54 +137,56 @@ def save_embeddings(self, params: EmbeddingParams, response: EmbeddingResponseBa
137137
138138
139139class FileBasedCacheManager (BaseCacheManager ):
140- """File -based cache with an LRU-evicted in-memory layer ."""
140+ """Original file -based cache manager implementation ."""
141141
142142 def __init__ (self , cache_dir : Path , num_bins : int = 20 , max_mem_usage_mb : float = 5_000 ):
143143 super ().__init__ (cache_dir , num_bins )
144- self .in_memory_cache : OrderedDict [ Path , dict ] = OrderedDict ()
145- self .sizes : dict [ Path , float ] = {}
146- self .total_usage_mb = 0.0
144+ self .in_memory_cache = {}
145+ self .sizes = {} # Track the size of each cache file in memory
146+ self .total_usage_mb = 0
147147 self .max_mem_usage_mb = max_mem_usage_mb
148148
149- def _evict_lru (self ):
150- """Evict the least-recently-used bin from memory."""
151- lru_key = next (iter (self .in_memory_cache ))
152- del self .in_memory_cache [lru_key ]
153- self .total_usage_mb -= self .sizes .pop (lru_key )
154- LOGGER .info (f"Evicted LRU entry { lru_key } from mem cache. Total usage is now { self .total_usage_mb :.1f} MB." )
149+ def remove_entry (self , cache_file : Path ):
150+ self .in_memory_cache .pop (cache_file )
155151
156- def add_entry (self , cache_file : Path , contents : dict ) -> bool :
157- """Add or replace a bin in the in-memory cache, evicting LRU entries if needed.
158-
159- Returns False if the single entry is larger than the entire cache limit.
160- """
161- size_mb = total_size (contents )
162-
163- if self .max_mem_usage_mb is not None and size_mb > self .max_mem_usage_mb :
164- LOGGER .warning (f"Entry { cache_file } ({ size_mb :.1f} MB) exceeds cache limit ({ self .max_mem_usage_mb } MB)." )
165- return False
166-
167- # Remove old version first. This prevents self-eviction (the eviction
168- # loop below can only see OTHER bins) and prevents double-counting.
169- if cache_file in self .sizes :
170- del self .in_memory_cache [cache_file ]
171- self .total_usage_mb -= self .sizes .pop (cache_file )
172-
173- # Evict least-recently-used bins until there's room
174152 if self .max_mem_usage_mb is not None :
175- while self .in_memory_cache and self .total_usage_mb + size_mb > self .max_mem_usage_mb :
176- self ._evict_lru ()
153+ size = self .sizes .pop (cache_file )
154+ self .total_usage_mb -= size
155+ LOGGER .info (f"Removed entry from mem cache. Freed { size } MB." )
177156
178- # Insert at the back (most-recently-used position)
157+ def add_entry ( self , cache_file : Path , contents : dict ):
179158 self .in_memory_cache [cache_file ] = contents
180- self .sizes [cache_file ] = size_mb
181- self .total_usage_mb += size_mb
182- return True
183159
184- def touch (self , cache_file : Path ):
185- """Mark a bin as recently used (moves it to the back of the LRU queue)."""
186- if cache_file in self .in_memory_cache :
187- self .in_memory_cache .move_to_end (cache_file )
160+ if self .max_mem_usage_mb is not None :
161+ size = total_size (contents )
162+ if self .total_usage_mb + size > self .max_mem_usage_mb :
163+ space_available = self .free_space_for (size )
164+ if not space_available :
165+ return False
166+ self .sizes [cache_file ] = size
167+ self .total_usage_mb += size
168+
169+ def free_space_for (self , needed_space_mb : float ):
170+ if self .max_mem_usage_mb is None :
171+ return True
172+
173+ if needed_space_mb > self .max_mem_usage_mb :
174+ LOGGER .warning (
175+ f"Needed space { needed_space_mb } MB is greater than max mem usage { self .max_mem_usage_mb } MB. "
176+ "This is not possible."
177+ )
178+ return False
179+ LOGGER .info (f"Evicting entry from mem cache to free up { needed_space_mb } MB" )
180+ while self .total_usage_mb > self .max_mem_usage_mb - needed_space_mb :
181+ # Find the entry with the smallest size
182+ try :
183+ smallest_entry = min (self .sizes .items (), key = lambda x : x [1 ])
184+ except ValueError :
185+ LOGGER .warning ("No entries in mem cache to evict" )
186+ return True
187+ self .remove_entry (smallest_entry [0 ])
188+ LOGGER .info (f"Evicted entry from mem cache. Total usage is now { self .total_usage_mb } MB." )
189+ return True
188190
189191 def get_cache_file (self , prompt : Prompt , params : LLMParams ) -> tuple [Path , str ]:
190192 # Use the SHA-1 hash of the prompt for the dictionary key
@@ -209,7 +211,6 @@ def maybe_load_cache(self, prompt: Prompt, params: LLMParams):
209211 self .add_entry (cache_file , contents )
210212 else :
211213 contents = self .in_memory_cache [cache_file ]
212- self .touch (cache_file )
213214
214215 data = contents .get (prompt_hash , None )
215216 return None if data is None else LLMCache .model_validate_json (data )
0 commit comments