vectorize Macenko

CielAl · CielAl · commit f23aa7f38a58 · 2026-02-02T06:43:34.000-05:00
diff --git a/torch_staintools/__init__.py b/torch_staintools/__init__.py
@@ -0,0 +1 @@
+from .version import __version__
diff --git a/torch_staintools/constants/config.py b/torch_staintools/constants/config.py
@@ -14,6 +14,8 @@ class _Config:
     # Whether to enable torch.compile (currently only the dictionary learning is affected)
     ENABLE_COMPILE: bool = True
 
+    STAIN_MAT_BATCHIFY: bool = True
+
 CONFIG: _Config = _Config()
 
 
diff --git a/torch_staintools/functional/stain_extraction/macenko.py b/torch_staintools/functional/stain_extraction/macenko.py
@@ -1,9 +1,13 @@
 from typing import Callable, Optional
 
 import torch
-from .utils import percentile, cov
+from .utils import percentile, batch_masked_cov, batch_masked_perc, cov
 from dataclasses import dataclass
 
+from ..compile import lazy_compile
+from ...constants import CONFIG
+
+
 @dataclass(frozen=False)
 class MckCfg:
     """Configration of Macenko Stain Estimation.
@@ -23,38 +27,77 @@ def __init__(self, cfg: MckCfg):
         super().__init__()
         self.cfg = cfg
 
-    @staticmethod
-    def cov(x):
-        """Covariance matrix for eigen decomposition.
-        https://en.wikipedia.org/wiki/Covariance_matrix
-        """
-        E_x = x.mean(dim=1)
-        x = x - E_x[:, None]
-        return torch.mm(x, x.T) / (x.size(1) - 1)
 
     @staticmethod
     def angular_helper(t_hat, ):
         # todo deal with multi-dimensional scenario
         raise NotImplementedError
 
     @staticmethod
-    def stain_matrix_helper(t_hat: torch.Tensor, perc: int, eig_vecs: torch.Tensor):
+    def stain_matrix_helper(t_hat: torch.Tensor, mask_flatten: torch.Tensor,
+                            perc: int, eig_vecs: torch.Tensor):
         """Helper function to compute the stain matrix.
 
         Separate the projected OD vectors on singular vectors (SVD of OD in Macenko paper, which is also the
         eigen vector of the covariance matrix of the OD)
 
         Args:
             t_hat: projection of OD on the plane of most significant singular vectors of OD.
-            perc:  perc --> min angular term, 100 - perc --> max angular term
+                B x num_pixel. Not masked.
+            mask_flatten: the flattened mask. B x num_pixel x 1.
+            perc:  perc --> min angular term, 100 - perc --> max angular term. integer [0, 100].
             eig_vecs: eigen vectors of the cov(OD), which may also be the singular vectors of OD.
+                B x C x num_stains
 
         Returns:
             sorted stain matrix in shape of B x num_stains x num_input_color_channel. For H&E cases, the first row
             in dimension of num_stains is H and the second is E (only num_stains=2 supported for now).
         """
-        phi = torch.atan2(t_hat[:, 1], t_hat[:, 0])
+        # batchified. t_hat as B x num_pixel x num_stains
+        # phi as B x num_pixels. Unmasked at this point.
+        phi = torch.atan2(t_hat[..., 1], t_hat[..., 0])
+        # phi -> num_pix
+        # requires mask and phi has the same number of dimension.
+        # therefore collapse the final dim
+        min_phi = batch_masked_perc(phi, mask_flatten.squeeze(-1), perc, dim=1)
+        max_phi = batch_masked_perc(phi, mask_flatten.squeeze(-1), 100 - perc, dim=1)
+
+        # B x 2 x 1
+        rot_min = torch.stack([torch.cos(min_phi), torch.sin(min_phi)], dim=-1).unsqueeze(-1)
+        rot_max = torch.stack([torch.cos(max_phi), torch.sin(max_phi)], dim=-1).unsqueeze(-1)
+        # B x C x num_stain  @ B x num_stain x 1
+        # = B x C x 1
+        v_min = torch.bmm(eig_vecs, rot_min)
+        v_max = torch.bmm(eig_vecs, rot_max)
+
+        # a heuristic to make the vector corresponding to hematoxylin first and the
+        # one corresponding to eosin second. (OD_red)
 
+        flag: torch.Tensor = v_min[:, 0: 1, :] > v_max[:, 0: 1, :]
+        stain_mat = torch.where(flag,
+                                torch.cat((v_min, v_max), dim=-1),
+                                torch.cat((v_max, v_min), dim=-1))
+        return stain_mat
+
+
+    @staticmethod
+    def stain_matrix_helper_original(t_hat: torch.Tensor, perc: int, eig_vecs: torch.Tensor):
+        """Helper function to compute the stain matrix.
+
+        Separate the projected OD vectors on singular vectors (SVD of OD in Macenko paper, which is also the
+        eigen vector of the covariance matrix of the OD)
+
+        Args:
+            t_hat: projection of OD on the plane of most significant singular vectors of OD.
+            perc:  perc --> min angular term, 100 - perc --> max angular term
+            eig_vecs: eigen vectors of the cov(OD), which may also be the singular vectors of OD.
+
+        Returns:
+            sorted stain matrix in shape of B x num_stains x num_input_color_channel. For H&E cases, the first row
+            in dimension of num_stains is H and the second is E (only num_stains=2 supported for now).
+        """
+        phi = torch.atan2(t_hat[..., 1], t_hat[..., 0])
+        # phi -> num_pix
         min_phi = percentile(phi, perc, dim=0)
         max_phi = percentile(phi, 100 - perc, dim=0)
         v_min = torch.matmul(eig_vecs, torch.stack((torch.cos(min_phi), torch.sin(min_phi)))).unsqueeze(1)
@@ -97,13 +140,42 @@ def __call__(self, od: torch.Tensor,
         assert num_stains == 2, f"Num stains: {num_stains} not currently supported in Macenko. Only support: 2"
         #  B x (HxWx1)
         tissue_mask_flatten = tissue_mask.flatten(start_dim=1, end_dim=-1).to(device)
+        # add dim
+
         # B x (H*W) x C
+        #
         od_flatten = od.flatten(start_dim=2, end_dim=-1).permute(0, 2, 1)
         max_stains = od_flatten.shape[-1]
         assert num_stains <= max_stains, f"number of stains exceeds maximum stains allowed." \
                                          f" {num_stains} vs {max_stains}"
+        if CONFIG.STAIN_MAT_BATCHIFY:
+            return self.stain_mat_vectorize(od_flatten,
+                                            tissue_mask_flatten, num_stains, perc)
+        else:
+            return self.stain_mat_loop(od_flatten, tissue_mask_flatten, num_stains, perc)
+
+    # the actual overhead seems to be the eigh. compilation's impact is minimal.
+    # maybe don't need it at all?
+    # @lazy_compile
+    def stain_mat_vectorize(self, od_flatten: torch.Tensor,
+                            tissue_mask_flatten: torch.Tensor,
+                            num_stains: int, perc: int,):
+        # add a singleton dim for batchification
+        tissue_mask_flatten = tissue_mask_flatten[..., None]
+        cov_mat = batch_masked_cov(od_flatten, tissue_mask_flatten)
+        _, eig_vecs = torch.linalg.eigh(cov_mat)
+        eig_vecs = eig_vecs[:, :, -num_stains:]
+        # unmasked. handle masking later
+        t_hat = torch.bmm(od_flatten, eig_vecs)
+        stain_mat = MacenkoAlg.stain_matrix_helper(t_hat, tissue_mask_flatten,
+                                                   perc, eig_vecs)
+        stain_mat = stain_mat.transpose(1, 2)
+        return stain_mat
+
+    def stain_mat_loop(self, od_flatten: torch.Tensor, tissue_mask_flatten: torch.Tensor,
+                       num_stains: int, perc: int,
+                       ):
         stain_mat_list = []
-        # todo, batchify
         for od_single, mask_single in zip(od_flatten, tissue_mask_flatten):
             x = od_single[mask_single]
 
@@ -114,7 +186,9 @@ def __call__(self, od: torch.Tensor,
             # HW * C x C x num_stains --> HW x num_stains
             t_hat = torch.matmul(x, eig_vecs)
             # HW
-            stain_mat = MacenkoAlg.stain_matrix_helper(t_hat, perc, eig_vecs)
+            # t_hat -> num_pixels x num_stain
+            # eig_vecs -> C x num_stain
+            stain_mat = MacenkoAlg.stain_matrix_helper_original(t_hat, perc, eig_vecs)
             stain_mat = stain_mat.T
             stain_mat_list.append(stain_mat)
         return torch.stack(stain_mat_list)
diff --git a/torch_staintools/functional/stain_extraction/utils.py b/torch_staintools/functional/stain_extraction/utils.py
@@ -12,14 +12,28 @@ def normalize_matrix_rows(a: torch.Tensor) -> torch.Tensor:
     return a / torch.linalg.norm(a, dim=1)[:, None]
 
 
-def cov(x):
+def cov(x: torch.Tensor) -> torch.Tensor:
     """Covariance matrix for eigen decomposition.
     https://en.wikipedia.org/wiki/Covariance_matrix
     """
+    # x: C x num_pixel
     E_x = x.mean(dim=1)
     x = x - E_x[:, None]
     return torch.mm(x, x.T) / (x.size(1) - 1)
 
+
+@torch.no_grad()
+def batch_masked_cov(od_flatten: torch.Tensor, mask_flatten: torch.Tensor) -> torch.Tensor:
+    # mask B x num_pixel x 1
+    # clamp so avoid 0div in mean and cov computation
+    size_masked = mask_flatten.sum(dim=1).clamp_min(2).unsqueeze(-1)
+    # B x C
+    mean = (od_flatten * mask_flatten).sum(dim=1, keepdim=True) / size_masked
+    # B x num_pix x C
+    x = (od_flatten - mean) * mask_flatten
+    return torch.bmm(x.transpose(1, 2), x) / (size_masked - 1)
+
+
 def percentile(t: torch.Tensor, q: float, dim: int) -> torch.Tensor:
     """Author: adapted from https://gist.github.com/spezold/42a451682422beb42bc43ad0c0967a30
 
@@ -45,3 +59,17 @@ def percentile(t: torch.Tensor, q: float, dim: int) -> torch.Tensor:
     return t.kthvalue(k, dim=dim).values
 
 
+@torch.no_grad()
+def batch_masked_perc(phi: torch.Tensor, mask: torch.Tensor, q: int, dim: int) -> torch.Tensor:
+    # fill nan. use nanquantile to ignore the nans (bg)
+    # mask = mask.squeeze(-1)
+    phi_filled = torch.where(mask.bool(), phi, torch.tensor(torch.inf, device=phi.device))
+    # inf at the end. cut off.
+    phi_sorted, _ = torch.sort(phi_filled, dim=dim)
+    size_masked = mask.sum(dim=dim)
+    q_float = q / 100.0
+    target_indices = (q_float * (size_masked - 1)).long().clamp(min=0)
+
+    # not friendly to torch.compile
+    # torch.nanquantile(phi_masked, q_float, dim=dim, interpolation='nearest')  # B
+    return phi_sorted.gather(dim, target_indices.unsqueeze(dim)).squeeze(dim)
diff --git a/torch_staintools/normalizer/factory.py b/torch_staintools/normalizer/factory.py
@@ -29,7 +29,7 @@ def build(method: TYPE_SUPPORTED,
               dict_init: MODE_INIT = 'transpose',
               concentration_solver: METHOD_FACTORIZE = 'fista',
               num_stains: int = 2,
-              luminosity_threshold: float = 0.8,
+              luminosity_threshold: Optional[float] = 0.8,
               perc: int = 1,
               regularizer: float = PARAM.OPTIM_DEFAULT_SPARSE_LAMBDA,  # 1e-2
               maxiter: int = PARAM.OPTIM_SPARSE_DEFAULT_MAX_ITER,  # 50
diff --git a/torch_staintools/version.py b/torch_staintools/version.py
@@ -1 +1 @@
-__version__ = '1.0.5'
+__version__ = '1.0.6a'

Original file line number	Diff line number	Diff line change
`@@ -1 +1 @@`
`1`		`-__version__ = '1.0.5'`
	`1`	`+__version__ = '1.0.6a'`