apply_to_images for ToGray (#2558)

trylinka · sourcery-ai[bot] · web-flow · commit 195edf75b78e · 2025-06-12T18:48:56.000-07:00
* apply_to_images for ToGray

* revert tests

* Update albumentations/augmentations/pixel/functional.py

Co-authored-by: sourcery-ai[bot] &lt;58596630+sourcery-ai[bot]@users.noreply.github.com&gt;

* correct docstrings

* fix docstring desc

* fix return statement

* fix docstring

---------

Co-authored-by: sourcery-ai[bot] &lt;58596630+sourcery-ai[bot]@users.noreply.github.com&gt;
diff --git a/albumentations/augmentations/pixel/functional.py b/albumentations/augmentations/pixel/functional.py
@@ -38,6 +38,8 @@
     normalize_per_image,
     power,
     preserve_channel_dim,
+    reshape_for_channel,
+    restore_from_channel,
     sz_lut,
     uint8_io,
 )
@@ -1320,37 +1322,103 @@ def to_gray_weighted_average(img: np.ndarray) -> np.ndarray:
 @uint8_io
 @clipped
 def to_gray_from_lab(img: np.ndarray) -> np.ndarray:
-    """Convert an RGB image to grayscale using the L channel from the LAB color space.
+    """Convert an RGB image or batch of images to grayscale using LAB color space.
 
-    This function converts the RGB image to the LAB color space and extracts the L channel.
-    The LAB color space is designed to approximate human vision, where L represents lightness.
+    This function converts RGB images to grayscale by first converting to LAB color space
+    and then extracting the L (lightness) channel. It uses albucore's reshape utilities
+    to efficiently handle batches/volumes by processing them as a single tall image.
 
-    Key aspects of this method:
-    1. The L channel represents the lightness of each pixel, ranging from 0 (black) to 100 (white).
-    2. It's more perceptually uniform than RGB, meaning equal changes in L values correspond to
-       roughly equal changes in perceived lightness.
-    3. The L channel is independent of the color information (A and B channels), making it
-       suitable for grayscale conversion.
+    Implementation Details:
+        The function uses albucore's reshape_for_channel and restore_from_channel functions:
+        - reshape_for_channel: Flattens batches/volumes to 2D format for OpenCV processing
+        - restore_from_channel: Restores the original shape after processing
 
-    This method can be particularly useful when you want a grayscale image that closely
-    matches human perception of lightness, potentially preserving more perceived contrast
-    than simple RGB-based methods.
+        This enables processing all images in a single OpenCV call
 
     Args:
-        img (np.ndarray): Input RGB image as a numpy array.
+        img: Input RGB image(s) as a numpy array. Must have 3 channels in the last dimension.
+            Supported shapes:
+            - Single image: (H, W, 3)
+            - Batch of images: (N, H, W, 3)
+            - Volume: (D, H, W, 3)
+            - Batch of volumes: (N, D, H, W, 3)
+
+            Supported dtypes:
+            - np.uint8: Values in range [0, 255]
+            - np.float32: Values in range [0, 1]
 
     Returns:
-        np.ndarray: Grayscale image as a 2D numpy array, representing the L (lightness) channel.
-                    Values are scaled to match the input image's data type range.
+        Grayscale image(s) with the same spatial dimensions as input but without channel dimension:
+            - Single image: (H, W)
+            - Batch of images: (N, H, W)
+            - Volume: (D, H, W)
+            - Batch of volumes: (N, D, H, W)
 
-    Image types:
-        uint8, float32
+        The output dtype matches the input dtype. For float inputs, the L channel
+        is normalized to [0, 1] by dividing by 100.
 
-    Number of channels:
-        3
+    Raises:
+        ValueError: If the last dimension is not 3 (RGB channels)
+
+    Examples:
+        >>> # Single image
+        >>> img = np.random.randint(0, 256, (100, 100, 3), dtype=np.uint8)
+        >>> gray = to_gray_from_lab(img)
+        >>> assert gray.shape == (100, 100)
+
+        >>> # Batch of images - efficiently processed without loops
+        >>> batch = np.random.randint(0, 256, (10, 100, 100, 3), dtype=np.uint8)
+        >>> gray_batch = to_gray_from_lab(batch)
+        >>> assert gray_batch.shape == (10, 100, 100)
+
+        >>> # Volume (e.g., video frames or 3D medical data)
+        >>> volume = np.random.randint(0, 256, (16, 100, 100, 3), dtype=np.uint8)
+        >>> gray_volume = to_gray_from_lab(volume)
+        >>> assert gray_volume.shape == (16, 100, 100)
+
+        >>> # Float32 input
+        >>> img_float = img.astype(np.float32) / 255.0
+        >>> gray_float = to_gray_from_lab(img_float)
+        >>> assert 0 <= gray_float.min() <= gray_float.max() <= 1.0
+
+    Note:
+        The LAB color space provides perceptually uniform grayscale conversion,
+        where the L (lightness) channel represents human perception of brightness
+        better than simple RGB averaging or other methods.
 
     """
-    return cv2.cvtColor(img, cv2.COLOR_RGB2LAB)[..., 0]
+    original_dtype = img.dtype
+    ndim = img.ndim
+
+    # Handle single image case by adding a batch dimension
+    if ndim == 3:
+        # Add batch dimension to make it (1, H, W, C)
+        return cv2.cvtColor(img, cv2.COLOR_RGB2LAB)[..., 0]
+
+    # Determine dimensions for reshape_for_channel
+    if ndim == 4:
+        # Batch of images (N, H, W, C) or single image with added batch dimension
+        has_batch_dim = True
+        has_depth_dim = False
+    elif ndim == 5:
+        # Batch of volumes (N, D, H, W, C)
+        has_batch_dim = True
+        has_depth_dim = True
+
+    # Use reshape utilities from albucore for efficient batch processing
+    flattened, original_shape = reshape_for_channel(img, has_batch_dim=has_batch_dim, has_depth_dim=has_depth_dim)
+
+    lab = cv2.cvtColor(flattened, cv2.COLOR_RGB2LAB)
+
+    grayscale_flat = lab[..., 0]
+    grayscale = restore_from_channel(
+        grayscale_flat,
+        original_shape,
+        has_batch_dim=has_batch_dim,
+        has_depth_dim=has_depth_dim,
+    )
+
+    return grayscale / 100.0 if original_dtype == np.float32 else grayscale
 
 
 @clipped
@@ -1453,10 +1521,14 @@ def to_gray_pca(img: np.ndarray) -> np.ndarray:
     in the color data.
 
     Args:
-        img (np.ndarray): Input image as a numpy array with shape (height, width, channels).
+        img (np.ndarray): Input image as a numpy array. Can be:
+            - Single multi-channel image: (H, W, C)
+            - Batch of multi-channel images: (N, H, W, C)
+            - Single multi-channel volume: (D, H, W, C)
+            - Batch of multi-channel volumes: (N, D, H, W, C)
 
     Returns:
-        np.ndarray: Grayscale image as a 2D numpy array with shape (height, width).
+        np.ndarray: Grayscale image with the same spatial dimensions as input.
                     If input is uint8, output is uint8 in range [0, 255].
                     If input is float32, output is float32 in range [0, 1].
 
@@ -1474,14 +1546,14 @@ def to_gray_pca(img: np.ndarray) -> np.ndarray:
     """
     dtype = img.dtype
     # Reshape the image to a 2D array of pixels
-    pixels = img.reshape(-1, img.shape[2])
+    pixels = img.reshape(-1, img.shape[-1])
 
     # Perform PCA
     pca = PCA(n_components=1)
     pca_result = pca.fit_transform(pixels)
 
     # Reshape back to image dimensions and scale to 0-255
-    grayscale = pca_result.reshape(img.shape[:2])
+    grayscale = pca_result.reshape(img.shape[:-1])
     grayscale = normalize_per_image(grayscale, "min_max")
 
     return from_float(grayscale, target_dtype=dtype) if dtype == np.uint8 else grayscale
@@ -1557,14 +1629,13 @@ def grayscale_to_multichannel(
         np.ndarray: Multi-channel image with shape (height, width, num_channels)
 
     """
-    # If output should be single channel, just squeeze and return
+    # If output should be single channel, add channel dimension if needed
     if num_output_channels == 1:
         return grayscale_image
 
-    # For multi-channel output, squeeze and stack
     squeezed = np.squeeze(grayscale_image)
-
-    return cv2.merge([squeezed] * num_output_channels)
+    # For multi-channel output, stack channels
+    return np.stack([squeezed] * num_output_channels, axis=-1)
 
 
 @preserve_channel_dim
diff --git a/albumentations/augmentations/pixel/transforms.py b/albumentations/augmentations/pixel/transforms.py
@@ -3458,6 +3458,57 @@ def apply(self, img: np.ndarray, **params: Any) -> np.ndarray:
 
         return fpixel.to_gray(img, self.num_output_channels, self.method)
 
+    def apply_to_images(self, images: np.ndarray, **params: Any) -> np.ndarray:
+        """Apply ToGray to a batch of images.
+
+        Args:
+            images (np.ndarray): Batch of images with shape (N, H, W, C) or (N, H, W).
+            **params (Any): Additional parameters.
+
+        Returns:
+            np.ndarray: Batch of grayscale images.
+
+        """
+        if is_grayscale_image(images, has_batch_dim=True):
+            warnings.warn("The image is already gray.", stacklevel=2)
+            return images
+
+        return fpixel.to_gray(images, self.num_output_channels, self.method)
+
+    def apply_to_volume(self, volume: np.ndarray, **params: Any) -> np.ndarray:
+        """Apply ToGray to a single volume.
+
+        Args:
+            volume (np.ndarray): Volume with shape (D, H, W, C) or (D, H, W).
+            **params (Any): Additional parameters.
+
+        Returns:
+            np.ndarray: Grayscale volume.
+
+        """
+        if is_grayscale_image(volume, has_depth_dim=True):
+            warnings.warn("The volume is already gray.", stacklevel=2)
+            return volume
+
+        return fpixel.to_gray(volume, self.num_output_channels, self.method)
+
+    def apply_to_volumes(self, volumes: np.ndarray, **params: Any) -> np.ndarray:
+        """Apply ToGray to a batch of volumes.
+
+        Args:
+            volumes (np.ndarray): Batch of volumes with shape (N, D, H, W, C) or (N, D, H, W).
+            **params (Any): Additional parameters.
+
+        Returns:
+            np.ndarray: Batch of grayscale volumes.
+
+        """
+        if is_grayscale_image(volumes, has_batch_dim=True, has_depth_dim=True):
+            warnings.warn("The volumes are already gray.", stacklevel=2)
+            return volumes
+
+        return fpixel.to_gray(volumes, self.num_output_channels, self.method)
+
 
 class ToRGB(ImageOnlyTransform):
     """Convert an input image from grayscale to RGB format.
diff --git a/setup.py b/setup.py
@@ -8,7 +8,7 @@
     "PyYAML",
     "typing-extensions>=4.9.0; python_version<'3.10'",
     "pydantic>=2.9.2",
-    "albucore==0.0.25",
+    "albucore==0.0.26",
     "eval-type-backport; python_version<'3.10'",
 ]
 

Original file line number	Diff line number	Diff line change
`@@ -8,7 +8,7 @@`
`8`	`8`	`"PyYAML",`
`9`	`9`	`"typing-extensions>=4.9.0; python_version<'3.10'",`
`10`	`10`	`"pydantic>=2.9.2",`
`11`		`- "albucore==0.0.25",`
	`11`	`+ "albucore==0.0.26",`
`12`	`12`	`"eval-type-backport; python_version<'3.10'",`
`13`	`13`	`]`
`14`	`14`