cache: docstrings and typehints

aditya0by0 · aditya0by0 · commit 238be4e8a274 · 2025-08-02T20:23:44.000+02:00
diff --git a/chebifier/_custom_cache.py b/chebifier/_custom_cache.py
@@ -8,8 +8,20 @@
 
 
 class PerSmilesPerModelLRUCache:
+    """
+    A thread-safe, optionally persistent LRU cache for storing
+    (SMILES, model_name) → result mappings.
+    """
+
     def __init__(self, max_size: int = 100, persist_path: str | None = None):
-        self._cache = OrderedDict()
+        """
+        Initialize the cache.
+
+        Args:
+            max_size (int): Maximum number of items to keep in the cache.
+            persist_path (str | None): Optional path to persist cache using pickle.
+        """
+        self._cache: OrderedDict[tuple[str, str], Any] = OrderedDict()
         self._max_size = max_size
         self._lock = threading.Lock()
         self._persist_path = persist_path
@@ -21,6 +33,16 @@ def __init__(self, max_size: int = 100, persist_path: str | None = None):
             self._load_cache()
 
     def get(self, smiles: str, model_name: str) -> Any | None:
+        """
+        Retrieve value from cache if present, otherwise return None.
+
+        Args:
+            smiles (str): SMILES string key.
+            model_name (str): Model identifier.
+
+        Returns:
+            Any | None: Cached value or None.
+        """
         key = (smiles, model_name)
         with self._lock:
             if key in self._cache:
@@ -32,6 +54,14 @@ def get(self, smiles: str, model_name: str) -> Any | None:
                 return None
 
     def set(self, smiles: str, model_name: str, value: Any) -> None:
+        """
+        Store value in cache under (smiles, model_name) key.
+
+        Args:
+            smiles (str): SMILES string key.
+            model_name (str): Model identifier.
+            value (Any): Value to cache.
+        """
         assert value is not None, "Value must not be None"
         key = (smiles, model_name)
         with self._lock:
@@ -42,6 +72,9 @@ def set(self, smiles: str, model_name: str, value: Any) -> None:
                 self._cache.popitem(last=False)
 
     def clear(self) -> None:
+        """
+        Clear the cache and remove the persistence file if present.
+        """
         self._save_cache()
         with self._lock:
             self._cache.clear()
@@ -50,23 +83,38 @@ def clear(self) -> None:
             if self._persist_path and os.path.exists(self._persist_path):
                 os.remove(self._persist_path)
 
-    def stats(self) -> dict:
+    def stats(self) -> dict[str, int]:
+        """
+        Return cache hit/miss statistics.
+
+        Returns:
+            dict[str, int]: Dictionary with 'hits' and 'misses' keys.
+        """
         return {"hits": self.hits, "misses": self.misses}
 
     def batch_decorator(self, func: Callable) -> Callable:
-        """Decorator for class methods that accept a batch of SMILES as a tuple,
-        and want caching per (smiles, model_name) combination.
+        """
+        Decorator for class methods that accept a batch of SMILES as a list,
+        and cache predictions per (smiles, model_name) key.
+
+        The instance is expected to have a `model_name` attribute.
+
+        Args:
+            func (Callable): The method to decorate.
+
+        Returns:
+            Callable: The wrapped method.
         """
 
         @wraps(func)
-        def wrapper(instance, smiles_list: list[str]):
+        def wrapper(instance, smiles_list: list[str]) -> list[Any]:
             assert isinstance(smiles_list, list), "smiles_list must be a list."
             model_name = getattr(instance, "model_name", None)
             assert model_name is not None, "Instance must have a model_name attribute."
 
-            results = []
-            missing_smiles = []
-            missing_indices = []
+            results: list[tuple[int, Any]] = []
+            missing_smiles: list[str] = []
+            missing_indices: list[int] = []
 
             # First: try to fetch all from cache
             for i, smiles in enumerate(smiles_list):
@@ -82,7 +130,8 @@ def wrapper(instance, smiles_list: list[str]):
                 new_results = func(instance, tuple(missing_smiles))
                 assert isinstance(
                     new_results, Iterable
-                ), "Function must return an  Iterable."
+                ), "Function must return an Iterable."
+
                 # Save to cache and append
                 for smiles, prediction, missing_idx in zip(
                     missing_smiles, new_results, missing_indices
@@ -101,21 +150,41 @@ def wrapper(instance, smiles_list: list[str]):
 
         return wrapper
 
-    def __len__(self):
+    def __len__(self) -> int:
+        """
+        Return number of items in the cache.
+
+        Returns:
+            int: Number of entries in the cache.
+        """
         with self._lock:
             return len(self._cache)
 
-    def __repr__(self):
+    def __repr__(self) -> str:
+        """
+        String representation of the underlying cache.
+
+        Returns:
+            str: String version of the OrderedDict.
+        """
         return self._cache.__repr__()
 
-    def save(self):
+    def save(self) -> None:
+        """
+        Save the cache to disk, if persistence is enabled.
+        """
         self._save_cache()
 
-    def load(self):
+    def load(self) -> None:
+        """
+        Load the cache from disk, if persistence is enabled.
+        """
         self._load_cache()
 
     def _save_cache(self) -> None:
-        """Serialize the cache to disk."""
+        """
+        Serialize the cache to disk using pickle.
+        """
         if self._persist_path:
             try:
                 with open(self._persist_path, "wb") as f:
@@ -124,7 +193,9 @@ def _save_cache(self) -> None:
                 print(f"[Cache Save Error] {e}")
 
     def _load_cache(self) -> None:
-        """Load the cache from disk."""
+        """
+        Load the cache from disk, if the file exists and is non-empty.
+        """
         if (
             self._persist_path
             and os.path.exists(self._persist_path)
diff --git a/tests/test_cache.py b/tests/test_cache.py
@@ -8,29 +8,46 @@
 
 
 class DummyPredictor:
-    def __init__(self, model_name):
+    def __init__(self, model_name: str):
+        """
+        Dummy predictor for testing cache decorator.
+        :param model_name: Name of the model instance (used for key separation).
+        """
         self.model_name = model_name
 
     @g_cache.batch_decorator
-    def predict(self, smiles_list: tuple[str]):
+    def predict(self, smiles_list: tuple[str]) -> list[str]:
+        """
+        Dummy predict method to simulate model inference.
+        Returns list of predictions with predictable format.
+        """
         # Simple predictable dummy function for tests
         return [f"{self.model_name}_P{i}" for i in range(len(smiles_list))]
 
 
 class TestPerSmilesPerModelLRUCache(unittest.TestCase):
-    def setUp(self):
+    def setUp(self) -> None:
+        """
+        Set up a temporary cache file and cache instance before each test.
+        """
         # Create temp file for persistence tests
         self.temp_file = tempfile.NamedTemporaryFile(delete=False)
         self.temp_file.close()
         self.cache = PerSmilesPerModelLRUCache(
             max_size=3, persist_path=self.temp_file.name
         )
 
-    def tearDown(self):
+    def tearDown(self) -> None:
+        """
+        Clean up the temporary file after each test.
+        """
         if os.path.exists(self.temp_file.name):
             os.remove(self.temp_file.name)
 
-    def test_cache_miss_and_set_get(self):
+    def test_cache_miss_and_set_get(self) -> None:
+        """
+        Test cache miss on initial get, then set and confirm hit.
+        """
         # Initially empty
         self.assertEqual(len(self.cache), 0)
         self.assertIsNone(self.cache.get("CCC", "model1"))
@@ -41,7 +58,10 @@ def test_cache_miss_and_set_get(self):
         self.assertEqual(self.cache.hits, 1)
         self.assertEqual(self.cache.misses, 1)  # One miss from first get
 
-    def test_cache_eviction(self):
+    def test_cache_eviction(self) -> None:
+        """
+        Test LRU eviction when capacity is exceeded.
+        """
         self.cache.set("a", "m", "v1")
         self.cache.set("b", "m", "v2")
         self.cache.set("c", "m", "v3")
@@ -52,7 +72,13 @@ def test_cache_eviction(self):
         self.assertIsNone(self.cache.get("a", "m"))  # 'a' evicted
         self.assertIsNotNone(self.cache.get("d", "m"))  # 'd' present
 
-    def test_batch_decorator_hits_and_misses(self):
+    def test_batch_decorator_hits_and_misses(self) -> None:
+        """
+        Test decorator behavior on batch prediction:
+        - first call (all misses)
+        - second call (mixed hits and misses)
+        - third call (more hits and misses)
+        """
         predictor = DummyPredictor("modelA")
         predictor2 = DummyPredictor("modelB")
 
@@ -120,7 +146,10 @@ def test_batch_decorator_hits_and_misses(self):
             stats_after_third["misses"], 14
         )  # additional 3 misses for GGG, HHH, ZZZ
 
-    def test_persistence_save_and_load(self):
+    def test_persistence_save_and_load(self) -> None:
+        """
+        Test that cache is properly saved to disk and reloaded.
+        """
         # Set some values
         self.cache.set("sm1", "modelX", "val1")
         self.cache.set("sm2", "modelX", "val2")
@@ -137,7 +166,10 @@ def test_persistence_save_and_load(self):
         self.assertEqual(new_cache.get("sm1", "modelX"), "val1")
         self.assertEqual(new_cache.get("sm2", "modelX"), "val2")
 
-    def test_clear_cache(self):
+    def test_clear_cache(self) -> None:
+        """
+        Test clearing the cache and removing persisted file.
+        """
         self.cache.set("x", "m", "v")
         self.cache.save()
         self.assertTrue(os.path.exists(self.temp_file.name))