safety-research
diff --git a/‎pyproject.toml‎
Lines changed: 2 additions & 1 deletion b/‎pyproject.toml‎
Lines changed: 2 additions & 1 deletion
diff --git a/‎safetytooling/apis/__init__.py‎
Lines changed: 2 additions & 1 deletion b/‎safetytooling/apis/__init__.py‎
Lines changed: 2 additions & 1 deletion
diff --git a/‎safetytooling/apis/batch_api.py‎
Lines changed: 6 additions & 4 deletions b/‎safetytooling/apis/batch_api.py‎
Lines changed: 6 additions & 4 deletions
diff --git a/‎safetytooling/apis/inference/api.py‎
Lines changed: 7 additions & 4 deletions b/‎safetytooling/apis/inference/api.py‎
Lines changed: 7 additions & 4 deletions
diff --git a/‎safetytooling/apis/inference/cache_manager.py‎
Lines changed: 45 additions & 33 deletions b/‎safetytooling/apis/inference/cache_manager.py‎
Lines changed: 45 additions & 33 deletions
@@ -49,7 +49,8 @@ dependencies = [
     "pytest-asyncio==0.26.0",
     "pytest-xdist==3.6.1",
     "huggingface_hub",
-    "langchain"
+    "langchain",
+    "aiosqlite>=0.22.1",
 ]
 
 [tool.setuptools]
 
@@ -1,3 +1,4 @@
 from .inference.api import InferenceAPI
+from .inference.cache_manager import CacheBackend
 
-__all__ = ["InferenceAPI"]
+__all__ = ["CacheBackend", "InferenceAPI"]
@@ -5,7 +5,7 @@
 from typing import Literal
 
 from safetytooling.apis.inference.anthropic import ANTHROPIC_MODELS, AnthropicModelBatch
-from safetytooling.apis.inference.cache_manager import get_cache_manager
+from safetytooling.apis.inference.cache_manager import CacheBackend, get_cache_manager
 from safetytooling.apis.inference.openai.batch_api import OpenAIModelBatch
 from safetytooling.apis.inference.openai.utils import GPT_CHAT_MODELS
 from safetytooling.data_models import LLMParams, LLMResponse, Prompt
@@ -52,7 +52,7 @@ def __init__(
         log_dir: Path | Literal["default"] = "default",
         prompt_history_dir: Path | Literal["default"] | None = "default",
         cache_dir: Path | Literal["default"] | None = "default",
-        use_redis: bool = False,
+        cache_backend: CacheBackend = CacheBackend.FILE,
         anthropic_api_key: str | None = None,
         openai_api_key: str | None = None,
         no_cache: bool = False,
@@ -94,10 +94,12 @@ def __init__(
             self.cache_dir = cache_dir
 
         # Check REDIS_CACHE in os.environ
-        self.use_redis = use_redis or os.environ.get("REDIS_CACHE", "false").lower() == "true"
+        self.cache_backend = cache_backend
         self.cache_manager = None
         if self.cache_dir is not None and not self.no_cache:
-            self.cache_manager = get_cache_manager(self.cache_dir, self.use_redis, max_mem_usage_mb=max_mem_usage_mb)
+            self.cache_manager = get_cache_manager(
+                self.cache_dir, self.cache_backend, max_mem_usage_mb=max_mem_usage_mb
+            )
             print(f"{self.cache_manager=}")
 
         self._anthropic_batch = AnthropicModelBatch(anthropic_api_key=anthropic_api_key)
 
@@ -31,7 +31,7 @@
 from safetytooling.utils.utils import get_repo_root
 
 from .anthropic import ANTHROPIC_MODELS, AnthropicChatModel
-from .cache_manager import BaseCacheManager, get_cache_manager
+from .cache_manager import BaseCacheManager, CacheBackend, get_cache_manager
 from .gemini.genai import GeminiModel
 from .gemini.vertexai import GeminiVertexAIModel
 from .gray_swan import GRAYSWAN_MODELS, GraySwanChatModel
@@ -82,7 +82,8 @@ def __init__(
         deepseek_num_threads: int = 20,
         prompt_history_dir: Path | Literal["default"] | None = None,
         cache_dir: Path | Literal["default"] | None = "default",
-        use_redis: bool = False,
+        cache_backend: CacheBackend = CacheBackend.FILE,
+        use_redis: bool = False,  # deprecated: use cache_backend=CacheBackend.REDIS
         empty_completion_threshold: int = 0,
         use_gpu_models: bool = False,
         anthropic_api_key: str | None = None,
@@ -151,9 +152,11 @@ def __init__(
             self.cache_dir = cache_dir
 
         self.cache_manager: BaseCacheManager | None = None
-        self.use_redis = use_redis or os.environ.get("REDIS_CACHE", "false").lower() == "true"
+        self.cache_backend = CacheBackend.REDIS if use_redis else cache_backend
         if self.cache_dir is not None:
-            self.cache_manager = get_cache_manager(self.cache_dir, self.use_redis, max_mem_usage_mb=max_mem_usage_mb)
+            self.cache_manager = get_cache_manager(
+                self.cache_dir, self.cache_backend, max_mem_usage_mb=max_mem_usage_mb
+            )
             print(f"{self.cache_manager=}")
 
         self._openai_completion = OpenAICompletionModel(
 
@@ -2,6 +2,7 @@
 import os
 import sys
 from collections import OrderedDict, deque
+from enum import Enum
 from itertools import chain
 from pathlib import Path
 from typing import List, Tuple, Union
@@ -268,34 +269,31 @@ def process_cached_responses(
         for individual_prompt in prompts:
             cached_result = self.maybe_load_cache(prompt=individual_prompt, params=params)
 
-            if cached_result is not None:
+            if cached_result is not None and cached_result.responses is not None:
+                responses_list = cached_result.responses
                 cache_file, _ = self.get_cache_file(prompt=individual_prompt, params=params)
                 LOGGER.info(f"Loaded cache for prompt from {cache_file}")
 
-                prop_empty_completions = sum(
-                    1 for response in cached_result.responses if response.completion == ""
-                ) / len(cached_result.responses)
+                prop_empty_completions = sum(1 for response in responses_list if response.completion == "") / len(
+                    responses_list
+                )
 
                 if prop_empty_completions > empty_completion_threshold:
-                    if len(cached_result.responses) == 1:
+                    if len(responses_list) == 1:
                         LOGGER.warning("Cache does not contain completion; likely due to recitation")
                     else:
                         LOGGER.warning(
                             f"Proportion of cache responses that contain empty completions ({prop_empty_completions}) is greater than threshold {empty_completion_threshold}. Likely due to recitation"
                         )
-                    failed_cache_response = cached_result.responses
+                    failed_cache_response = responses_list
                     cached_result = None
                     cached_response = None
                 else:
-                    cached_response = (
-                        cached_result.responses
-                    )  # We want a list of LLMResponses if we have n responses in a cache
+                    cached_response = responses_list
                     if insufficient_valids_behaviour != "continue":
-                        assert (
-                            len(cached_result.responses) == n
-                        ), f"cache is inconsistent with n={n}\n{cached_result.responses}"
+                        assert len(responses_list) == n, f"cache is inconsistent with n={n}\n{responses_list}"
                     if print_prompt_and_response:
-                        individual_prompt.pretty_print(cached_result.responses)
+                        individual_prompt.pretty_print(responses_list)
 
                     failed_cache_response = None
             else:
@@ -330,7 +328,7 @@ def update_failed_cache(
             failed_cache_responses[0]
         ), f"There should be the same number of responses and failed_cache_responses! Instead we have {len(responses)} responses and {len(failed_cache_responses)} failed_cache_responses."
         for i in range(len(responses)):
-            responses[i].api_failures = failed_cache_responses[0][i].api_failures + 1
+            responses[i].api_failures = (failed_cache_responses[0][i].api_failures or 0) + 1
 
         LOGGER.info(
             f"""Updating previous failures for: \n
@@ -449,7 +447,7 @@ def get_cache_file(self, prompt: Prompt, params: LLMParams) -> tuple[Path, str]:
     def maybe_load_cache(self, prompt: Prompt, params: LLMParams):
         cache_dir, prompt_hash = self.get_cache_file(prompt, params)
         key = self._make_key(f"{cache_dir}/{prompt_hash}")
-        data = self.db.get(key)
+        data: bytes | None = self.db.get(key)  # type: ignore[assignment]
         if data is None:
             return None
         return LLMCache.model_validate_json(data.decode("utf-8"))
@@ -476,32 +474,31 @@ def process_cached_responses(
         for individual_prompt in prompts:
             cached_result = self.maybe_load_cache(prompt=individual_prompt, params=params)
 
-            if cached_result is not None:
+            if cached_result is not None and cached_result.responses is not None:
+                responses_list = cached_result.responses
                 cache_dir, _ = self.get_cache_file(prompt=individual_prompt, params=params)
                 LOGGER.info(f"Loaded cache for prompt from {cache_dir}")
 
-                prop_empty_completions = sum(
-                    1 for response in cached_result.responses if response.completion == ""
-                ) / len(cached_result.responses)
+                prop_empty_completions = sum(1 for response in responses_list if response.completion == "") / len(
+                    responses_list
+                )
 
                 if prop_empty_completions > empty_completion_threshold:
-                    if len(cached_result.responses) == 1:
+                    if len(responses_list) == 1:
                         LOGGER.warning("Cache does not contain completion; likely due to recitation")
                     else:
                         LOGGER.warning(
                             f"Proportion of cache responses that contain empty completions ({prop_empty_completions}) is greater than threshold {empty_completion_threshold}. Likely due to recitation"
                         )
-                    failed_cache_response = cached_result.responses
+                    failed_cache_response = responses_list
                     cached_result = None
                     cached_response = None
                 else:
-                    cached_response = cached_result.responses
+                    cached_response = responses_list
                     if insufficient_valids_behaviour != "continue":
-                        assert (
-                            len(cached_result.responses) == n
-                        ), f"cache is inconsistent with n={n}\n{cached_result.responses}"
+                        assert len(responses_list) == n, f"cache is inconsistent with n={n}\n{responses_list}"
                     if print_prompt_and_response:
-                        individual_prompt.pretty_print(cached_result.responses)
+                        individual_prompt.pretty_print(responses_list)
 
                     failed_cache_response = None
             else:
@@ -533,7 +530,7 @@ def update_failed_cache(
             failed_cache_responses[0]
         ), f"There should be the same number of responses and failed_cache_responses! Instead we have {len(responses)} responses and {len(failed_cache_responses)} failed_cache_responses."
         for i in range(len(responses)):
-            responses[i].api_failures = failed_cache_responses[0][i].api_failures + 1
+            responses[i].api_failures = (failed_cache_responses[0][i].api_failures or 0) + 1
 
         LOGGER.info(
             f"""Updating previous failures for: \n
@@ -560,7 +557,7 @@ def get_moderation_file(self, texts: list[str]) -> tuple[Path, str]:
     def maybe_load_moderation(self, texts: list[str]):
         _, hash = self.get_moderation_file(texts)
         key = self._make_key(f"moderation/{hash}")
-        data = self.db.get(key)
+        data: bytes | None = self.db.get(key)  # type: ignore[assignment]
         if data is None:
             return None
         return LLMCacheModeration.model_validate_json(data.decode("utf-8"))
@@ -581,7 +578,7 @@ def get_embeddings_file(self, params: EmbeddingParams) -> tuple[Path, str]:
     def maybe_load_embeddings(self, params: EmbeddingParams) -> EmbeddingResponseBase64 | None:
         _, hash = self.get_embeddings_file(params)
         key = self._make_key(f"embeddings/{hash}")
-        data = self.db.get(key)
+        data: bytes | None = self.db.get(key)  # type: ignore[assignment]
         if data is None:
             return None
         return EmbeddingResponseBase64.model_validate_json(data.decode("utf-8"))
@@ -592,11 +589,26 @@ def save_embeddings(self, params: EmbeddingParams, response: EmbeddingResponseBa
         self.db.set(key, response.model_dump_json())
 
 
+class CacheBackend(str, Enum):
+    """Cache backend selection."""
+
+    FILE = "file"
+    SQLITE = "sqlite"
+    REDIS = "redis"
+
+
 def get_cache_manager(
-    cache_dir: Path, use_redis: bool = False, num_bins: int = 20, max_mem_usage_mb: float = 5_000
+    cache_dir: Path,
+    backend: CacheBackend = CacheBackend.FILE,
+    num_bins: int = 20,
+    max_mem_usage_mb: float = 5_000,
 ) -> BaseCacheManager:
-    """Factory function to get the appropriate cache manager based on environment variable."""
-    print(f"{cache_dir=}, {use_redis=}, {num_bins=}")
-    if use_redis:
+    """Factory function to get the appropriate cache manager."""
+    print(f"{cache_dir=}, {backend=}")
+    if backend == CacheBackend.REDIS:
         return RedisCacheManager(cache_dir, num_bins)
+    if backend == CacheBackend.SQLITE:
+        from .sqlite_cache_manager import SQLiteCacheManager
+
+        return SQLiteCacheManager(cache_dir)
     return FileBasedCacheManager(cache_dir, num_bins, max_mem_usage_mb)
Original file line number	Diff line number	Diff line change
`@@ -49,7 +49,8 @@ dependencies = [`
`49`	`49`	`"pytest-asyncio==0.26.0",`
`50`	`50`	`"pytest-xdist==3.6.1",`
`51`	`51`	`"huggingface_hub",`
`52`		`- "langchain"`
	`52`	`+ "langchain",`
	`53`	`+ "aiosqlite>=0.22.1",`
`53`	`54`	`]`
`54`	`55`
`55`	`56`	`[tool.setuptools]`