CielAl
diff --git a/‎README.md‎
Lines changed: 27 additions & 3 deletions b/‎README.md‎
Lines changed: 27 additions & 3 deletions
diff --git a/‎demo.py‎
Lines changed: 1 addition & 1 deletion b/‎demo.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎torch_staintools/augmentor/base.py‎
Lines changed: 184 additions & 42 deletions b/‎torch_staintools/augmentor/base.py‎
Lines changed: 184 additions & 42 deletions
@@ -53,9 +53,11 @@ target_tensor = ToTensor()(target).unsqueeze(0).to(device)
 norm_tensor = ToTensor()(norm).unsqueeze(0).to(device)
 
 # ######## Normalization
-# fit
+# create the normalizer - using vahadane. Alternatively can use 'macenko' or 'reinhard'.
 normalizer_vahadane = NormalizerBuilder.build('vahadane')
+# move the normalizer to the device (CPU or GPU)
 normalizer_vahadane = normalizer_vahadane.to(device)
+# fit. For macenko and vahadane this step will compute the stain matrix and concentration
 normalizer_vahadane.fit(target_tensor)
 # transform
 # BCHW - scaled to [0, 1] torch.float32
@@ -65,15 +67,37 @@ output = normalizer_vahadane(norm_tensor)
 # augment by: alpha * concentration + beta, while alpha is uniformly randomly sampled from (1 - sigma_alpha, 1 + sigma_alpha),
 # and beta is uniformly randomly sampled from (-sigma_beta, sigma_beta).
 augmentor = AugmentorBuilder.build('vahadane',
+                                   # fix the random number generator seed for reproducibility.
                                    rng=314159,
+                                   # the luminosity threshold to find the tissue region to augment
+                                   # if set to None means all pixels are treated as tissue
+                                   luminosity_threshold=0.8,
+                                   
                                    sigma_alpha=0.2,
-                                   sigma_beta=0.2, target_stain_idx=(0, 1)
+                                   sigma_beta=0.2, target_stain_idx=(0, 1),
+                                   # this allows to cache the stain matrix if it's too time-consuming to recompute.
+                                   # e.g., if using Vahadane algorithm
+                                   use_cache=True,
+                                   # size limit of cache. -1 means no limit (stain matrix is often small in size, e.g., 2 x 3)
+                                   cache_size_limit=-1,
+                                   # if specified, the augmentor will load the cached stain matrices from file system.
+                                   load_path=None,
                                    )
 
 num_augment = 5
+# multiple copies of different random augmentation of the same tile may be generated
 for _ in range(num_augment):
     # B x C x H x W
-    aug_out = augmentor(norm_tensor)
+    # use a list of Hashable key (e.g., str) to map the batch input to its corresponding stain matrix in cache.
+    # this key should be unique, e.g., using the filename of the input tile.
+    # leave it as None if no caching is intended, even if use_cache is enabled.
+    # note since the inputs are all batchified, the cache_key are in form of a list, with each element in the 
+    # list corresponding to a data point in the batch.
+    aug_out = augmentor(norm_tensor, cache_keys=['some unique key'])
+    # do anything to the augmentation output
+    
+# dump the cache of stain matrices for future usage
+augmentor.dump_cache('./cache.pickle')
 ```
 
 ## Installation
 
@@ -115,7 +115,7 @@ def postprocess(image_tensor): return convert_image_dtype(image_tensor, torch.ui
     for idx, tile_single in enumerate(tqdm(tiles)):
         tile_single = tile_single.unsqueeze(0).contiguous()
         # BCHW - scaled to [0 1] torch.float32
-        test_out_tensor = augmentor(tile_single, regularizer=0.01, )
+        test_out_tensor = augmentor(tile_single)
         test_out = postprocess(test_out_tensor)
         plt.imshow(test_out)
         plt.title(f"Augmented: {idx}")
 
@@ -1,20 +1,29 @@
-from torch_staintools.functional.stain_extraction.factory import build_from_name
 from torch import nn
 import torch
-from torch_staintools.functional.optimization.dict_learning import get_concentrations
-from torch_staintools.functional.stain_extraction.extractor import BaseExtractor
-from torch_staintools.functional.utility.implementation import transpose_trailing, img_from_concentration
-from torch_staintools.functional.tissue_mask import get_tissue_mask
-from operator import mul
-from functools import reduce
-from typing import Optional, Sequence, Tuple
-import multiprocessing as mp
-import ctypes
-import numpy as np
+from typing import Optional, Sequence, Tuple, Hashable, List
+from ..functional.utility.implementation import default_device
+# from operator import mul
+# from functools import reduce
+# import multiprocessing as mp
+# import ctypes
+# import numpy as np
+from ..functional.stain_extraction.factory import build_from_name
+from ..functional.optimization.dict_learning import get_concentrations
+from ..functional.stain_extraction.extractor import BaseExtractor
+from ..functional.utility.implementation import transpose_trailing, img_from_concentration
+from ..functional.tissue_mask import get_tissue_mask
+from ..cache.tensor_cache import TensorCache
+from ..loggers import GlobalLoggers
+
+logger = GlobalLoggers.instance().get_logger(__name__)
 
 
 class Augmentor(nn.Module):
-    use_cache: bool
+    device: torch.device
+
+    _tensor_cache: TensorCache
+    CACHE_FIELD: str = '_tensor_cache'
+
     target_stain_idx: Optional[Sequence[int]]
     rng: torch.Generator
 
@@ -29,14 +38,23 @@ class Augmentor(nn.Module):
     luminosity_threshold: float
     regularizer: float
 
+    @staticmethod
+    def _init_cache(use_cache: bool, cache_size_limit: int, device: Optional[torch.device] = None,
+                    load_path: Optional[str] = None) -> Optional[TensorCache]:
+        if not use_cache:
+            return None
+        return TensorCache.build(size_limit=cache_size_limit, device=device, path=load_path)
+
     def __init__(self, get_stain_matrix: BaseExtractor, reconst_method: str = 'ista',
                  rng: Optional[int | torch.Generator] = None,
                  target_stain_idx: Optional[Sequence[int]] = (0, 1),
                  sigma_alpha: float = 0.2,
                  sigma_beta: float = 0.2,
                  num_stains: int = 2,
-                 luminosity_threshold: float = 0.8,
-                 regularizer: float = 0.01):
+                 luminosity_threshold: Optional[float] = 0.8,
+                 regularizer: float = 0.1,
+                 cache: Optional[TensorCache] = None,
+                 device: Optional[torch.device] = None):
         """Augment the stain concentration by alpha * concentration + beta
 
         Args:
@@ -50,7 +68,7 @@ def __init__(self, get_stain_matrix: BaseExtractor, reconst_method: str = 'ista'
             luminosity_threshold: luminosity threshold to obtain tissue region and ignore brighter backgrounds.
                 If None, all image pixels will be considered as tissue for stain matrix/concentration computation.
             regularizer: the regularizer to compute concentration used in ISTA or CD algorithm.
-
+            cache: the external cache object
         """
         super().__init__()
         self.reconst_method = reconst_method
@@ -65,6 +83,25 @@ def __init__(self, get_stain_matrix: BaseExtractor, reconst_method: str = 'ista'
         self.luminosity_threshold = luminosity_threshold
         self.regularizer = regularizer
 
+        self._tensor_cache = cache
+        self.device = default_device(device)
+
+    def to(self, device: torch.device):
+        self.device = device
+        if self.cache_initialized():
+            self.tensor_cache.to(device)
+        return super().to(device)
+
+    @property
+    def cache_size_limit(self) -> int:
+        if self.cache_initialized():
+            return self.tensor_cache.size_limit
+        return 0
+
+    def dump_cache(self, path: str):
+        assert self.cache_initialized()
+        self.tensor_cache.dump(path)
+
     @staticmethod
     def _default_rng(rng: Optional[torch.Generator | int]):
         if rng is None:
@@ -74,21 +111,21 @@ def _default_rng(rng: Optional[torch.Generator | int]):
         assert isinstance(rng, torch.Generator)
         return rng
 
-    @staticmethod
-    def new_cache(shape):
-        """
-        Args:
-            shape:
-
-        Returns:
-
-        """
-        # Todo map the key to the corresponding cached data -- cached in file or to memory?
-        #
-        shared_array_base = mp.Array(ctypes.c_float, reduce(mul, shape))
-        shared_array = np.ctypeslib.as_array(shared_array_base.get_obj())
-        shared_array = shared_array.reshape(*shape)
-        return shared_array
+    # @staticmethod
+    # def new_cache(shape):
+    #     """
+    #     Args:
+    #         shape:
+    #
+    #     Returns:
+    #
+    #     """
+    #     # Todo map the key to the corresponding cached data -- cached in file or to memory?
+    #     #
+    #     shared_array_base = mp.Array(ctypes.c_float, reduce(mul, shape))
+    #     shared_array = np.ctypeslib.as_array(shared_array_base.get_obj())
+    #     shared_array = shared_array.reshape(*shape)
+    #     return shared_array
 
     @staticmethod
     def __concentration_selected(target_concentration: torch.Tensor,
@@ -123,9 +160,13 @@ def __inplace_augment_helper(target_concentration: torch.Tensor, *,
         """
         alpha = alpha.to(target_concentration.device)
         beta = beta.to(target_concentration.device)
-        tissue_mask = tissue_mask.ravel()
-        target_concentration[..., tissue_mask] *= alpha
-        target_concentration += beta
+
+        tissue_mask_flattened = tissue_mask.flatten(start_dim=-2, end_dim=-1).expand(target_concentration.shape)
+        alpha_expanded = alpha.expand(target_concentration.shape)
+        target_concentration[..., tissue_mask_flattened] *= alpha_expanded[..., tissue_mask_flattened]
+
+        beta_expanded = beta.expand(target_concentration.shape)
+        target_concentration[..., tissue_mask_flattened] += beta_expanded[..., tissue_mask_flattened]
         return target_concentration
 
     @staticmethod
@@ -142,7 +183,7 @@ def channel_rand(target_concentration_selected: torch.Tensor, rng: torch.Generat
 
         Args:
             target_concentration_selected: concentrations to work on (e.g., the entire or a subset of concentration
-                matrix
+                matrix)
             rng: torch.Generator object
             sigma_alpha: sample alpha values in range (1-sigma, 1+ sigma)
             sigma_beta: sample beta values in range (-sigma, sigma)
@@ -197,11 +238,80 @@ def augment(*,
                                                                   alpha=alpha, beta=beta)
         return target_concentration
 
-    def forward(self, target: torch.Tensor, **stain_mat_kwargs):
+    @staticmethod
+    def _stain_mat_kwargs_helper(luminosity_threshold,
+                                 num_stains,
+                                 regularizer,
+                                 **stain_mat_kwargs):
+        arg_dict = {
+            'luminosity_threshold': luminosity_threshold,
+            'num_stains': num_stains,
+            'regularizer': regularizer,
+        }
+        stain_mat_kwargs = {k: v for k, v in stain_mat_kwargs.items()}
+        stain_mat_kwargs.update(arg_dict)
+        return stain_mat_kwargs
+
+    @staticmethod
+    def stain_mat_from_cache(cache: TensorCache, *,
+                             cache_keys: List[Hashable],
+                             get_stain_matrix: BaseExtractor,
+                             target,
+                             luminosity_threshold,
+                             num_stains,
+                             regularizer,
+                             **stain_mat_kwargs) -> torch.Tensor:
+        cache_func_kwargs = Augmentor._stain_mat_kwargs_helper(luminosity_threshold, num_stains, regularizer,
+                                                               **stain_mat_kwargs)
+        stain_mat_list = cache.get_batch(cache_keys, get_stain_matrix, target, **cache_func_kwargs)
+        if isinstance(stain_mat_list, torch.Tensor):
+            return stain_mat_list
+
+        return torch.stack(stain_mat_list, dim=0)
+
+    def _tensor_cache_helper(self) -> Optional[TensorCache]:
+        return getattr(self, Augmentor.CACHE_FIELD)
+
+    def cache_initialized(self):
+        return hasattr(self, Augmentor.CACHE_FIELD) and self._tensor_cache_helper() is not None
+
+    @property
+    def tensor_cache(self) -> Optional[TensorCache]:
+        return self._tensor_cache_helper()
+
+    def stain_matrix_helper(self,
+                            *,
+                            cache_keys: Optional[List[Hashable]],
+                            get_stain_matrix: BaseExtractor,
+                            target,
+                            luminosity_threshold,
+                            num_stains,
+                            regularizer,
+                            **stain_mat_kwargs) -> torch.Tensor:
+        if not self.cache_initialized() or cache_keys is None:
+            logger.debug(f'{self.cache_initialized()} + {cache_keys is None} - no cache')
+            return get_stain_matrix(target, luminosity_threshold=luminosity_threshold,
+                                    num_stains=num_stains,
+                                    regularizer=regularizer,
+                                    **stain_mat_kwargs)
+        # if use cache
+        assert self.cache_initialized(), f"Attempt to fetch data from cache but cache is not initialized"
+        assert cache_keys is not None, f"Attempt to fetch data from cache but key is not given"
+        # move fetched stain matrix to the same device of the target
+        logger.debug(f"{cache_keys[0:3]}. cache initialized")
+        return Augmentor.stain_mat_from_cache(cache=self.tensor_cache, cache_keys=cache_keys,
+                                              get_stain_matrix=get_stain_matrix,
+                                              target=target,
+                                              luminosity_threshold=luminosity_threshold, num_stains=num_stains,
+                                              regularizer=regularizer, **stain_mat_kwargs,
+                                              ).to(target.device)
+
+    def forward(self, target: torch.Tensor, cache_keys: Optional[List[Hashable]] = None, **stain_mat_kwargs):
         """
 
         Args:
             target: input tensor to augment. Shape B x C x H x W and intensity range is [0, 1].
+            cache_keys: a unique key point the input entry to the cached stain matrix. `None` means no cache.
             **stain_mat_kwargs: all extra keyword arguments other than regularizer/num_stains/luminosity_threshold set
                 in __init__.
 
@@ -210,10 +320,11 @@ def forward(self, target: torch.Tensor, **stain_mat_kwargs):
         """
         # stain_matrix_target -- B x num_stain x num_input_color_channel
         # todo cache
-        target_stain_matrix = self.get_stain_matrix(target, luminosity_threshold=self.luminosity_threshold,
-                                                    num_stains=self.num_stains,
-                                                    regularizer=self.regularizer,
-                                                    **stain_mat_kwargs)
+        target_stain_matrix = self.stain_matrix_helper(cache_keys=cache_keys, get_stain_matrix=self.get_stain_matrix,
+                                                       target=target, luminosity_threshold=self.luminosity_threshold,
+                                                       num_stains=self.num_stains,
+                                                       regularizer=self.regularizer,
+                                                       **stain_mat_kwargs)
 
         #  B x num_stains x num_pixel_in_mask
         concentration = get_concentrations(target, target_stain_matrix, regularizer=self.regularizer,
@@ -236,9 +347,40 @@ def build(cls,
               rng: Optional[int | torch.Generator] = None,
               target_stain_idx: Optional[Sequence[int]] = (0, 1),
               sigma_alpha: float = 0.2,
-              sigma_beta: float = 0.2):
+              sigma_beta: float = 0.2,
+              luminosity_threshold: Optional[float] = 0.8,
+              regularizer: float = 0.1,
+              use_cache: bool = False,
+              cache_size_limit: int = -1,
+              device: Optional[torch.device] = None,
+              load_path: Optional[str] = None
+              ):
+        """Factory builder of the augmentor which manipulate the stain concentration by alpha * concentration + beta.
+
+        Args:
+            method: algorithm name to extract stain - support 'vahadane' or 'macenko'
+            reconst_method: algorithm to compute concentration. default ista
+            rng: a optional seed (either an int or a torch.Generator) to determine the random number generation.
+            target_stain_idx: what stains to augment: e.g., for HE cases, it can be either or both from [0, 1]
+            sigma_alpha: alpha is uniformly randomly selected from (1-sigma_alpha, 1+sigma_alpha)
+            sigma_beta: beta is uniformly randomly selected from (-sigma_beta, sigma_beta)
+            luminosity_threshold: luminosity threshold to find tissue regions (smaller than but positive)
+                a pixel is considered as being tissue if the intensity falls in the open interval of (0, threshold).
+            regularizer: regularization term in ISTA algorithm
+            use_cache: whether use cache to save the stain matrix to avoid recomputation
+            cache_size_limit: size limit of the cache. negative means no limits.
+            device: what device to hold the cache.
+            load_path: If specified, then stain matrix cache will be loaded from the file path. See the `cache`
+                module for more details.
+
+        Returns:
+
+        """
         method = method.lower()
         extractor = build_from_name(method)
+        cache = cls._init_cache(use_cache, cache_size_limit=cache_size_limit, device=device,
+                                load_path=load_path)
         return cls(extractor, reconst_method=reconst_method, rng=rng, target_stain_idx=target_stain_idx,
-                   sigma_alpha=sigma_alpha, sigma_beta=sigma_beta)
-
+                   sigma_alpha=sigma_alpha, sigma_beta=sigma_beta,
+                   luminosity_threshold=luminosity_threshold, regularizer=regularizer,
+                   cache=cache, device=device)