Merge pull request #1374 from MouseLand/batch_img_refactor

mrariden · web-flow · commit 17fb25f40055 · 2025-12-03T09:47:46.000-05:00
Batch img refactor
diff --git a/cellpose/models.py b/cellpose/models.py
@@ -326,16 +326,37 @@ def eval(self, x, batch_size=8, resample=True, channels=None, channel_axis=None,
             torch.cuda.empty_cache()
             gc.collect()
 
-        if resample:
+            if resample:
+                # upsample flows flows before computing them:
+                # dP = self._resize_gradients(dP, to_y_size=Ly_0, to_x_size=Lx_0, to_z_size=Lz_0)
+                # cellprob = self._resize_cellprob(cellprob, to_x_size=Lx_0, to_y_size=Ly_0, to_z_size=Lz_0)
+
+                # resize XY then YZ and then put channels first
+                dP = transforms.resize_image(dP.transpose(1, 2, 3, 0), Ly=Ly_0, Lx=Lx_0, no_channels=False)
+                dP = transforms.resize_image(dP.transpose(1, 0, 2, 3), Lx=Lx_0, Ly=Lz_0, no_channels=False)
+                dP = dP.transpose(3, 1, 0, 2)
+
+                # resize cellprob:
+                cellprob = transforms.resize_image(cellprob, Ly=Ly_0, Lx=Lx_0, no_channels=True)
+                cellprob = transforms.resize_image(cellprob.transpose(1, 0, 2), Lx=Lx_0, Ly=Lz_0, no_channels=True)
+                cellprob = cellprob.transpose(1, 0, 2)
+
+
+        # 2d case:
+        if resample and not do_3D:
             # upsample flows before computing them: 
-            dP = self._resize_gradients(dP, to_y_size=Ly_0, to_x_size=Lx_0, to_z_size=Lz_0)
-            cellprob = self._resize_cellprob(cellprob, to_x_size=Lx_0, to_y_size=Ly_0, to_z_size=Lz_0)
+            # dP = self._resize_gradients(dP, to_y_size=Ly_0, to_x_size=Lx_0, to_z_size=Lz_0)
+            # cellprob = self._resize_cellprob(cellprob, to_x_size=Lx_0, to_y_size=Ly_0, to_z_size=Lz_0)
+
+            # 2D images have N = 1 in batch dimension:
+            dP = transforms.resize_image(dP.transpose(1, 2, 3, 0), Ly=Ly_0, Lx=Lx_0, no_channels=False).transpose(3, 0, 1, 2)
+            cellprob = transforms.resize_image(cellprob, Ly=Ly_0, Lx=Lx_0, no_channels=True)
 
         if compute_masks:
             # use user niter if specified, otherwise scale niter (200) with diameter
             niter_scale = 1 if image_scaling is None else image_scaling
             niter = int(200/niter_scale) if niter is None or niter == 0 else niter
-            masks = self._compute_masks(x.shape, dP, cellprob, flow_threshold=flow_threshold,
+            masks = self._compute_masks((Lz_0 or nimg, Ly_0, Lx_0), dP, cellprob, flow_threshold=flow_threshold,
                             cellprob_threshold=cellprob_threshold, min_size=min_size,
                         max_size_fraction=max_size_fraction, niter=niter,
                         stitch_threshold=stitch_threshold, do_3D=do_3D)
@@ -344,112 +365,9 @@ def eval(self, x, batch_size=8, resample=True, channels=None, channel_axis=None,
         
         masks, dP, cellprob = masks.squeeze(), dP.squeeze(), cellprob.squeeze()
 
-        # undo resizing:
-        if image_scaling is not None or anisotropy is not None:
-
-            dP = self._resize_gradients(dP, to_y_size=Ly_0, to_x_size=Lx_0, to_z_size=Lz_0) # works for 2 or 3D: 
-            cellprob = self._resize_cellprob(cellprob, to_x_size=Lx_0, to_y_size=Ly_0, to_z_size=Lz_0)
-
-            if do_3D:
-                if compute_masks:
-                    # Rescale xy then xz:
-                    masks = transforms.resize_image(masks, Ly=Ly_0, Lx=Lx_0, no_channels=True, interpolation=cv2.INTER_NEAREST)
-                    masks = masks.transpose(1, 0, 2)
-                    masks = transforms.resize_image(masks, Ly=Lz_0, Lx=Lx_0, no_channels=True, interpolation=cv2.INTER_NEAREST)
-                    masks = masks.transpose(1, 0, 2)
-
-            else:
-                # 2D or 3D stitching case:
-                if compute_masks:
-                    masks = transforms.resize_image(masks, Ly=Ly_0, Lx=Lx_0, no_channels=True, interpolation=cv2.INTER_NEAREST)
-
         return masks, [plot.dx_to_circ(dP), dP, cellprob], styles
     
 
-    def _resize_cellprob(self, prob: np.ndarray, to_y_size: int, to_x_size: int, to_z_size: int = None) -> np.ndarray:
-        """
-        Resize cellprob array to specified dimensions for either 2D or 3D.
-
-        Parameters:
-            prob (numpy.ndarray): The cellprobs to resize, either in 2D or 3D. Returns the same ndim as provided.
-            to_y_size (int): The target size along the Y-axis.
-            to_x_size (int): The target size along the X-axis.
-            to_z_size (int, optional): The target size along the Z-axis. Required
-                for 3D cellprobs.
-
-        Returns:
-            numpy.ndarray: The resized cellprobs array with the same number of dimensions
-            as the input.
-
-        Raises:
-            ValueError: If the input cellprobs array does not have 3 or 4 dimensions.
-        """
-        prob_shape = prob.shape
-        prob = prob.squeeze()
-        squeeze_happened = prob.shape != prob_shape
-        prob_shape = np.array(prob_shape)
-
-        if prob.ndim == 2:
-            # 2D case:
-            prob = transforms.resize_image(prob, Ly=to_y_size, Lx=to_x_size, no_channels=True)
-            if squeeze_happened:
-                prob = np.expand_dims(prob, int(np.argwhere(prob_shape == 1))) # add back empty axis for compatibility
-        elif prob.ndim == 3:
-            # 3D case: 
-            prob = transforms.resize_image(prob, Ly=to_y_size, Lx=to_x_size, no_channels=True)
-            prob = prob.transpose(1, 0, 2)
-            prob = transforms.resize_image(prob, Ly=to_z_size, Lx=to_x_size, no_channels=True)
-            prob = prob.transpose(1, 0, 2)
-        else:
-            raise ValueError(f'gradients have incorrect dimension after squeezing. Should be 2 or 3, prob shape: {prob.shape}')
-        
-        return prob
-
-
-    def _resize_gradients(self, grads: np.ndarray, to_y_size: int, to_x_size: int, to_z_size: int = None) -> np.ndarray:
-        """
-        Resize gradient arrays to specified dimensions for either 2D or 3D gradients.
-
-        Parameters:
-            grads (np.ndarray): The gradients to resize, either in 2D or 3D. Returns the same ndim as provided.
-            to_y_size (int): The target size along the Y-axis.
-            to_x_size (int): The target size along the X-axis.
-            to_z_size (int, optional): The target size along the Z-axis. Required
-                for 3D gradients.
-
-        Returns:
-            numpy.ndarray: The resized gradient array with the same number of dimensions
-            as the input.
-
-        Raises:
-            ValueError: If the input gradient array does not have 3 or 4 dimensions.
-        """
-        grads_shape = grads.shape
-        grads = grads.squeeze()
-        squeeze_happened = grads.shape != grads_shape
-        grads_shape = np.array(grads_shape)
-
-        if grads.ndim == 3:
-            # 2D case, with XY flows in 2 channels:
-            grads = np.moveaxis(grads, 0, -1) # Put gradients last
-            grads = transforms.resize_image(grads, Ly=to_y_size, Lx=to_x_size, no_channels=False)
-            grads = np.moveaxis(grads, -1, 0) # Put gradients first
-
-            if squeeze_happened:
-                grads = np.expand_dims(grads, int(np.argwhere(grads_shape == 1))) # add back empty axis for compatibility
-        elif grads.ndim == 4:
-            # dP has gradients that can be treated as channels:
-            grads = grads.transpose(1, 2, 3, 0) # move gradients last:
-            grads = transforms.resize_image(grads, Ly=to_y_size, Lx=to_x_size, no_channels=False)
-            grads = grads.transpose(1, 0, 2, 3) # switch axes to resize again
-            grads = transforms.resize_image(grads, Ly=to_z_size, Lx=to_x_size, no_channels=False)
-            grads = grads.transpose(3, 1, 0, 2) # undo transposition
-        else:
-            raise ValueError(f'gradients have incorrect dimension after squeezing. Should be 3 or 4, grads shape: {grads.shape}')
-        
-        return grads
-
-
     def _run_net(self, x, 
                 augment=False, 
                 batch_size=8, tile_overlap=0.1,
diff --git a/cellpose/transforms.py b/cellpose/transforms.py
@@ -545,6 +545,7 @@ def convert_image(x, channel_axis=None, z_axis=None, do_3D=False):
     Accepts: 
         - 2D images with no channel dimension: `z_axis` and `channel_axis` must be `None`
         - 2D images with channel dimension: `channel_axis` will be guessed between first or last axis, can also specify `channel_axis`. `z_axis` must be `None`
+        - Batch of 2D images having shape: [N, H, W, C] with N images in the batch
         - 3D images with or without channels: 
 
     Args:
@@ -554,11 +555,10 @@ def convert_image(x, channel_axis=None, z_axis=None, do_3D=False):
         do_3D (bool): Whether to process the image in 3D mode. Defaults to False.
 
     Returns:
-        numpy.ndarray: The converted image.
+        numpy.ndarray: The converted image with channels last.
 
     Raises:
         ValueError: If the input image is 2D and do_3D is True.
-        ValueError: If the input image is 4D and do_3D is False.
     """
 
     # check if image is a torch array instead of numpy array, convert to numpy
@@ -571,10 +571,6 @@ def convert_image(x, channel_axis=None, z_axis=None, do_3D=False):
     if z_axis is not None and not do_3D:
         raise ValueError("2D image provided, but z_axis is not None. Set z_axis=None to process 2D images of ndim=2 or 3.")
 
-    # make sure that channel_axis and z_axis are specified if 3D
-    if ndim == 4 and not do_3D:
-        raise ValueError("3D input image provided, but do_3D is False. Set do_3D=True to process 3D images. ndims=4")
-
     # make sure that channel_axis and z_axis are specified if 3D
     if do_3D:
         return _convert_image_3d(x, channel_axis=channel_axis, z_axis=z_axis)
@@ -616,6 +612,7 @@ def convert_image(x, channel_axis=None, z_axis=None, do_3D=False):
         x_out[..., 0] = x
         x = x_out
         del x_out
+        transforms_logger.info(f'processing grayscale image with {x.shape[0], x.shape[1]} HW')
     elif ndim == 3:
         # assume 2d with channels
         # find dim with smaller size between first and last dims
@@ -632,6 +629,20 @@ def convert_image(x, channel_axis=None, z_axis=None, do_3D=False):
         x_out[..., :num_channels] = x[..., :num_channels]
         x = x_out
         del x_out
+        transforms_logger.info(f'processing image with {x.shape[0], x.shape[1]} HW, and {x.shape[2]} channels')
+    elif ndim == 4:
+        # assume batch of 2d with channels
+
+        # zero padding up to 3 channels: 
+        num_channels = x.shape[-1]
+        if num_channels > 3: 
+            transforms_logger.warning("Found more than 3 channels, only using first 3")
+            num_channels = 3
+        x_out = np.zeros((x.shape[0], x.shape[1], x.shape[2], 3), dtype=x.dtype)
+        x_out[..., :num_channels] = x[..., :num_channels]
+        x = x_out
+        del x_out
+        transforms_logger.info(f'processing image batch with {x.shape[0]} images, {x.shape[1], x.shape[2]} HW, and {x.shape[3]} channels')
     else:
         # something is wrong: yell
         expected_shapes = "2D (H, W), 3D (H, W, C), or 4D (Z, H, W, C)"
diff --git a/tests/test_output.py b/tests/test_output.py
@@ -1,6 +1,7 @@
 from cellpose import io, metrics, utils, models
 import pytest
 from subprocess import check_output, STDOUT
+from pathlib import Path
 import os
 import numpy as np
 
@@ -43,7 +44,9 @@ def clear_output(data_dir, image_names):
                              (True, True, 40), 
                              (True, True, None), 
                              (False, True, None),
-                             (False, False, None)
+                             (False, False, None),
+                             (True, False, None),
+                             (True, False, 40)
                          ]
 )
 def test_class_2D_one_img(data_dir, image_names, cellposemodel_fixture_24layer, compute_masks, resample, diameter):
@@ -56,11 +59,21 @@ def test_class_2D_one_img(data_dir, image_names, cellposemodel_fixture_24layer,
 
     masks_pred, _, _ = cellposemodel_fixture_24layer.eval(img, normalize=True, compute_masks=compute_masks, resample=resample, diameter=diameter)
 
-    if not compute_masks or diameter:
+    if not compute_masks:
         # not compute_masks won't return masks so can't check
-        # different diameter will give different masks, so can't check
         return 
     
+    if diameter and compute_masks:
+        # size of masks will be different, so need to adjust calculation
+        masks_gt_file = Path(str(img_file).replace('_tif.tif', '_tif_cp4_gt_masks.png'))
+        masks_gt = io.imread_2D(masks_gt_file)
+
+        masks_pred_shape = [int(s * diameter/30) for s in masks_pred.shape]
+        assert [a == b for a, b in zip(masks_gt.shape[:2], masks_pred_shape[:2])]
+
+        # don't compare the images, because they are different sizes and won't match
+        return
+    
     io.imsave(data_dir / '2D' / (img_file.stem + "_cp_masks.png"), masks_pred)
     # flowsp_pred = np.concatenate([flows_pred[1], flows_pred[2][None, ...]], axis=0)
     # mse = np.sqrt((flowsp_pred - flowps) ** 2).sum()