|
16 | 16 | from bleach.sanitizer import Cleaner |
17 | 17 | from backend.errors import register_error_handlers, ValidationError |
18 | 18 | import time |
| 19 | +import hashlib |
19 | 20 |
|
20 | 21 | # Настраиваем логирование |
21 | 22 | logging.basicConfig(level=logging.DEBUG) |
@@ -99,6 +100,36 @@ def _security_and_rate_limit(): |
99 | 100 | _rate_limit_store[cid] = bucket |
100 | 101 |
|
101 | 102 |
|
| 103 | +# Кэш анализа по (provider, model, dataset_hash) |
| 104 | +ANALYSIS_CACHE_TTL_SEC = int(os.getenv("ANALYSIS_CACHE_TTL_SEC", "600")) |
| 105 | +ANALYSIS_CACHE_MAX = int(os.getenv("ANALYSIS_CACHE_MAX", "256")) |
| 106 | +_analysis_cache: dict[str, tuple[float, str]] = {} |
| 107 | + |
| 108 | + |
| 109 | +def _make_analysis_key(provider: str, model: str, table_string: str) -> str: |
| 110 | + h = hashlib.sha256(table_string.encode("utf-8")).hexdigest() |
| 111 | + return f"{provider}:{model}:{h}" |
| 112 | + |
| 113 | + |
| 114 | +def _get_cached_analysis(key: str) -> Optional[str]: |
| 115 | + item = _analysis_cache.get(key) |
| 116 | + if not item: |
| 117 | + return None |
| 118 | + ts, val = item |
| 119 | + if time.time() - ts <= ANALYSIS_CACHE_TTL_SEC: |
| 120 | + return val |
| 121 | + _analysis_cache.pop(key, None) |
| 122 | + return None |
| 123 | + |
| 124 | + |
| 125 | +def _put_cached_analysis(key: str, value: str) -> None: |
| 126 | + if len(_analysis_cache) >= ANALYSIS_CACHE_MAX: |
| 127 | + # удаляем самый старый элемент |
| 128 | + oldest_key = min(_analysis_cache, key=lambda k: _analysis_cache[k][0]) |
| 129 | + _analysis_cache.pop(oldest_key, None) |
| 130 | + _analysis_cache[key] = (time.time(), value) |
| 131 | + |
| 132 | + |
102 | 133 | def perform_basic_analysis(df: pd.DataFrame) -> BasicAnalysis: |
103 | 134 | """Выполняет базовый анализ данных DataFrame.""" |
104 | 135 | logger.debug(f"Starting basic analysis. DataFrame shape: {df.shape}") |
@@ -348,8 +379,17 @@ def analyze(): |
348 | 379 | df = pd.DataFrame(table_data) |
349 | 380 | table_string = df.to_string(index=False, max_rows=100) |
350 | 381 |
|
351 | | - # Получаем анализ от выбранной LLM |
352 | | - analysis = get_analysis(provider, model, table_string) |
| 382 | + # Кэширование по (provider, model, dataset_hash) |
| 383 | + cache_key = _make_analysis_key(provider, model, table_string) |
| 384 | + cached = _get_cached_analysis(cache_key) |
| 385 | + if cached is not None: |
| 386 | + analysis = cached |
| 387 | + logger.debug("Analysis cache hit") |
| 388 | + else: |
| 389 | + # Получаем анализ от выбранной LLM |
| 390 | + analysis = get_analysis(provider, model, table_string) |
| 391 | + _put_cached_analysis(cache_key, analysis) |
| 392 | + logger.debug("Analysis cache miss; computed and cached") |
353 | 393 |
|
354 | 394 | logger.debug(f"Analysis completed for {provider}:{model}") |
355 | 395 |
|
|
0 commit comments