Skip to content

Commit c07a3fe

Browse files
committed
perf(api): add TTL cache for /api/analyze keyed by provider/model/datasetHash
1 parent 2463902 commit c07a3fe

File tree

1 file changed

+42
-2
lines changed

1 file changed

+42
-2
lines changed

backend/pdf_server.py

Lines changed: 42 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
from bleach.sanitizer import Cleaner
1717
from backend.errors import register_error_handlers, ValidationError
1818
import time
19+
import hashlib
1920

2021
# Настраиваем логирование
2122
logging.basicConfig(level=logging.DEBUG)
@@ -99,6 +100,36 @@ def _security_and_rate_limit():
99100
_rate_limit_store[cid] = bucket
100101

101102

103+
# Кэш анализа по (provider, model, dataset_hash)
104+
ANALYSIS_CACHE_TTL_SEC = int(os.getenv("ANALYSIS_CACHE_TTL_SEC", "600"))
105+
ANALYSIS_CACHE_MAX = int(os.getenv("ANALYSIS_CACHE_MAX", "256"))
106+
_analysis_cache: dict[str, tuple[float, str]] = {}
107+
108+
109+
def _make_analysis_key(provider: str, model: str, table_string: str) -> str:
110+
h = hashlib.sha256(table_string.encode("utf-8")).hexdigest()
111+
return f"{provider}:{model}:{h}"
112+
113+
114+
def _get_cached_analysis(key: str) -> Optional[str]:
115+
item = _analysis_cache.get(key)
116+
if not item:
117+
return None
118+
ts, val = item
119+
if time.time() - ts <= ANALYSIS_CACHE_TTL_SEC:
120+
return val
121+
_analysis_cache.pop(key, None)
122+
return None
123+
124+
125+
def _put_cached_analysis(key: str, value: str) -> None:
126+
if len(_analysis_cache) >= ANALYSIS_CACHE_MAX:
127+
# удаляем самый старый элемент
128+
oldest_key = min(_analysis_cache, key=lambda k: _analysis_cache[k][0])
129+
_analysis_cache.pop(oldest_key, None)
130+
_analysis_cache[key] = (time.time(), value)
131+
132+
102133
def perform_basic_analysis(df: pd.DataFrame) -> BasicAnalysis:
103134
"""Выполняет базовый анализ данных DataFrame."""
104135
logger.debug(f"Starting basic analysis. DataFrame shape: {df.shape}")
@@ -348,8 +379,17 @@ def analyze():
348379
df = pd.DataFrame(table_data)
349380
table_string = df.to_string(index=False, max_rows=100)
350381

351-
# Получаем анализ от выбранной LLM
352-
analysis = get_analysis(provider, model, table_string)
382+
# Кэширование по (provider, model, dataset_hash)
383+
cache_key = _make_analysis_key(provider, model, table_string)
384+
cached = _get_cached_analysis(cache_key)
385+
if cached is not None:
386+
analysis = cached
387+
logger.debug("Analysis cache hit")
388+
else:
389+
# Получаем анализ от выбранной LLM
390+
analysis = get_analysis(provider, model, table_string)
391+
_put_cached_analysis(cache_key, analysis)
392+
logger.debug("Analysis cache miss; computed and cached")
353393

354394
logger.debug(f"Analysis completed for {provider}:{model}")
355395

0 commit comments

Comments
 (0)